from sklearn.metrics import f1_score
import torch_geometric.transforms as T
from torch_geometric.datasets import PPI
from torch_geometric.loader import DataLoader
from models import *
import scipy.sparse as sp
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import argparse
import warnings
warnings.filterwarnings("ignore")
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False,
help='Disables CUDA training.')
parser.add_argument('--fastmode', action='store_true', default=False,
help='Validate during training pass.')
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--epochs', type=int, default=200,
help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.005,
help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=5e-4,
help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=256,
help='Number of hidden units.')
parser.add_argument('--dropout', type=float, default=0.8,
help='Dropout rate (1 - keep probability).')
parser.add_argument("--normalization", default="FirstOrderGCN", # FirstOrderGCN
help="The normalization on the adj matrix.")
parser.add_argument('--dataset', default="ppi", help="The data set") #citeseer cora
parser.add_argument('--datapath', default='../../ppi', help="The data path.")
parser.add_argument("--task_type", default="full", help="The node classification task type (full and semi). Only valid for cora, citeseer and pubmed dataset.")
parser.add_argument("--no_tensorboard", default=False, help="Disable writing logs to tensorboard")
parser.add_argument('--lradjust', action='store_true',
default=False, help='Enable leraning rate adjust.(ReduceLROnPlateau or Linear Reduce)')
OUTPUT_PATH = r'F:\\testGCNfig1\\'
test_flag = True
args = parser.parse_args()
args.cuda = torch.cuda.is_available()
# random seed setting
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
sparse_mx = sparse_mx.tocoo().astype(np.float32)
indices = torch.from_numpy(
np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
values = torch.from_numpy(sparse_mx.data)
shape = torch.Size(sparse_mx.shape)
return torch.sparse.FloatTensor(indices, values, shape)
def preprocess_graph(adj):
adj = sp.coo_matrix(adj)
adj_ = adj + sp.eye(adj.shape[0]) # 邻接矩阵加入自身信息,adj = adj + I
rowsum = np.array(adj_.sum(1))
degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten()) # 节点的度矩阵
adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo() # 正则化,D^{-0.5}(adj+I)D^{-0.5}
return adj_normalized
def train(model, data_loader):
model.train()
total_loss = total_examples = 0
for data in data_loader:
adj = sp.csr_matrix(
(np.ones(data.num_edges), (data.edge_index[0, :], data.edge_index[1, :])),
shape=[data.num_nodes, data.num_nodes])
adj_nrom = preprocess_graph(adj)
adj_nrom = sparse_mx_to_torch_sparse_tensor(adj_nrom)
adj_nrom = adj_nrom.cuda()
data_x = data.x.cuda()
data_y = data.y.cuda()
optimizer.zero_grad()
loss = criterion(model(data_x, adj_nrom), data_y)
loss.backward()
optimizer.step()
total_loss += loss.item() * data.num_nodes
total_examples += data.num_nodes
return total_loss / total_examples, get_lr(optimizer)
def test(model, data_loader):
model.eval()
ys, preds = [], []
for data in data_loader:
ys.append(data.y)
adj = sp.csr_matrix(
(np.ones(data.num_edges), (data.edge_index[0, :], data.edge_index[1, :])),
shape=[data.num_nodes, data.num_nodes])
adj_nrom = preprocess_graph(adj)
adj_nrom = sparse_mx_to_torch_sparse_tensor(adj_nrom)
adj_nrom = adj_nrom.cuda()
data_x = data.x.cuda()
out = model(data_x, adj_nrom)
preds.append((out > 0).float().cpu())
y, pred = torch.cat(ys, dim=0).numpy(), torch.cat(preds, dim=0).numpy()
f1_score_out = f1_score(y, pred, average='micro') if pred.sum() > 0 else 0
return f1_score_out
path = args.datapath
pre_transform = T.Compose([T.GCNNorm(), T.ToSparseTensor()])
train_dataset = PPI(path, split='train', pre_transform=pre_transform)
val_dataset = PPI(path, split='val', pre_transform=pre_transform)
test_dataset = PPI(path, split='test', pre_transform=pre_transform)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)
if not test_flag:
test_cases = [
# base
{'num_layers':2, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
# test layer 1
{'num_layers':3, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':4, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':8, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':16, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':32, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
# test self loops 6
{'num_layers':2, 'add_self_loops':True, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':4, 'add_self_loops':True, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':8, 'add_self_loops':True, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':16, 'add_self_loops':True, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':32, 'add_self_loops':True, 'add_bn':False, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
# test batch norm 11
{'num_layers':2, 'add_self_loops':False, 'add_bn':True, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':4, 'add_self_loops':False, 'add_bn':True, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':8, 'add_self_loops':False, 'add_bn':True, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':16, 'add_self_loops':False, 'add_bn':True, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':32, 'add_self_loops':False, 'add_bn':True, 'use_pairnorm':'None', 'drop_edge':1.0, 'activation':'linear'},
# test use_pairnorm 16
{'num_layers':2, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'PN', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':4, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'PN', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':8, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'PN', 'drop_edge':1.0, 'activation':'linear'},
{'num_layers':16, 'add_self_loops':False, 'add_bn':False, 'use_pairnorm':'PN', 'drop_edge':