Cause of randomness in AUC score for GNN

Question

I have implemented a GraphSAGE model using dgl for link prediction. On average the auc score of the model is ~0.7 but the score varies a lot for different runs. Even though I am training and testing on the same data. I am not splitting the data randomly, the data is fixed, and still I got AUC scores ranging from 0.2-0.85. I could not figure out where is the randomness is generating from. Is the model itself work in a random way?

Here is my code:

import random
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import itertools
import numpy as np
import scipy.sparse as sp

import pandas as pd

import dgl.function as fn

add_param_len= True
add_arg_len = True
add_name = True


class DotPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
            # print("====>", h)
            g.ndata['h'] = h
            # Compute a new edge feature named 'score' by a dot-product between the
            # source node feature 'h' and destination node feature 'h'.
            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            # u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
            # print(g.edata['score'][:, 0])
            return g.edata['score'][:, 0]


from dgl.nn import SAGEConv

# ----------- 2. create model -------------- #
# build a two-layer GraphSAGE model
class GraphSAGE(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_feats, h_feats, 'mean')
        self.conv2 = SAGEConv(h_feats, h_feats, 'mean')
    
    def forward(self, g, in_feat):
        # print(g)
        # print(in_feat)
        h = self.conv1(g, in_feat)
        # print(h)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

class MLPPredictor(nn.Module):
    def __init__(self, h_feats):
        super().__init__()
        self.W1 = nn.Linear(h_feats * 2, h_feats)
        self.W2 = nn.Linear(h_feats, 1)

    def apply_edges(self, edges):
        """
        Computes a scalar score for each edge of the given graph.

        Parameters
        ----------
        edges :
            Has three members ``src``, ``dst`` and ``data``, each of
            which is a dictionary representing the features of the
            source nodes, the destination nodes, and the edges
            themselves.

        Returns
        -------
        dict
            A dictionary of new edge features.
        """
        # print("here!!!!!")
        h = torch.cat([edges.src['h'], edges.dst['h']], 1)
        return {'score': self.W2(F.relu(self.W1(h))).squeeze(1)}

    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            g.apply_edges(self.apply_edges)
            return g.edata['score']

df = pd.read_csv('nodes.csv')
# print(df)
nodes_data = df.drop(["start_line","start_column","end_line",  "end_column" ,"file_name"], axis=1)
# nodes_data

df = pd.read_csv('edges.csv', header=None)
# df
train_len = 3432
test_df = df[train_len:]
edges_data = df[:train_len]
# edges_data = df

import dgl

src = edges_data[0].to_numpy()
dst = edges_data[1].to_numpy()

# Create a DGL graph from a pair of numpy arrays
g = dgl.graph((src, dst))
# g

y= nodes_data['type'].tolist()
from sklearn import preprocessing
import torch

le = preprocessing.LabelEncoder()
targets = le.fit_transform(y)
targets = torch.as_tensor(targets)
targets = targets.type(torch.LongTensor)
type_one_hot = F.one_hot(targets)

g.ndata['x_one_hot'] = type_one_hot
y=torch.tensor(targets, dtype=torch.float32)
x=y.view(len(nodes_data),1)
g.ndata['x'] = x

if add_param_len:
    le = preprocessing.LabelEncoder()
    y=nodes_data['params_len'].tolist()
    targets = le.fit_transform(y)
    # print([x for x in targets if x!=0])
    targets = torch.as_tensor(targets)
    targets = targets.type(torch.LongTensor)
    param_len_one_hot = F.one_hot(targets)
    # param_len_one_hot

    g.ndata['param_len_one_hot'] = param_len_one_hot
    g.ndata['param_len'] = targets


if add_arg_len:
    le = preprocessing.LabelEncoder()
    y=nodes_data['argument_len'].tolist()
    targets = le.fit_transform(y)
    # print([x for x in targets if x!=0])
    targets = torch.as_tensor(targets)
    targets = targets.type(torch.LongTensor)
    args_len_one_hot = F.one_hot(targets)
    # param_len_one_hot

    g.ndata['args_len_one_hot'] = args_len_one_hot
    g.ndata['args_len'] = targets


if add_name:
    le = preprocessing.LabelEncoder()
    y=nodes_data['name'].tolist()
    targets = le.fit_transform(y)
    # targets
    # for x in targets:
        # print(x)
    # # print([x for x in targets if x!=0])
    targets = torch.as_tensor(targets)
    targets = targets.type(torch.LongTensor)
    # args_len_one_hot = F.one_hot(targets)
    # param_len_one_hot

    g.ndata['name'] = targets
    # g.ndata['args_len'] = targets

u, v = g.edges()

eids = np.arange(g.number_of_edges())
eids = np.random.permutation(eids)
test_size = len(test_df)
train_size = g.number_of_edges()

test_pos_u, test_pos_v = torch.tensor(test_df[0].to_numpy()), torch.tensor(test_df[1].to_numpy())
print(test_pos_u)
train_pos_u, train_pos_v = u,v

adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())), shape=(g.number_of_nodes(), g.number_of_nodes()))
print(adj.shape)
adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes())
neg_u, neg_v = np.where(adj_neg != 0)

neg_eids = np.random.choice(len(neg_u), g.number_of_edges())

set(nodes_data['type'])
stmt_type=['FunctionDeclaration', 'ArrowFunctionExpression', 'FunctionExpression']
df1 = nodes_data[nodes_data.type.isin(stmt_type)]
test_neg_id = df1['id'].tolist()
# print(test_neg_id)

df1 = nodes_data[nodes_data['type']=='CallExpression']
test_neg_call_site = df1['id'].tolist()
# print(test_neg_call_site)


all_combi = [(x, y) for x in test_neg_call_site for y in test_neg_id]
print(len(all_combi))
test_data = df[3402:]
pair_list = list(zip(test_data[0].tolist(),test_data[1].tolist()))
print(pair_list)

test_neg_u, test_neg_v = [], []

for pair_ in all_combi:
    # print(pair_)
    if pair_ not in pair_list:
        test_neg_u = np.append(test_neg_u,[pair_[0]])
        test_neg_v = np.append(test_neg_v,[pair_[1]])

print(len(test_neg_u))

train_neg_u, train_neg_v = neg_u[neg_eids[test_size:]], neg_v[neg_eids[test_size:]]

# train_g = dgl.remove_edges(g, eids[:test_size])
train_g = g

train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes())
train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes())

test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes())
print(test_pos_g)
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes())
print(test_neg_g)

from sklearn.metrics import roc_auc_score
model = GraphSAGE(g.ndata['x'].shape[1], 16)
# You can replace DotPredictor with MLPPredictor.
pred = MLPPredictor(16)
# pred = DotPredictor()

def compute_loss(pos_score, neg_score):
    # print("here!")
    scores = torch.cat([pos_score, neg_score])
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])])
    return F.binary_cross_entropy_with_logits(scores, labels)

def compute_auc(pos_score, neg_score):
    # print(len(neg_score))
    scores = torch.cat([pos_score, neg_score]).numpy()
    # print("scores =====> ", scores, len(scores))
    labels = torch.cat(
        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    # print(labels)
    return roc_auc_score(labels, scores)

# ----------- 3. set up loss and optimizer -------------- #
# in this case, loss will in training loop
# print(model.parameters)
# print(pred.parameters)
# print(train_g.ndata['x'])
optimizer = torch.optim.Adam(itertools.chain(model.parameters(), pred.parameters()), lr=0.01)

# ----------- 4. training -------------------------------- #
all_logits = []
for e in range(150):
    # forward
    h = model(train_g, train_g.ndata['x'])
    # print("this ok======>")
    pos_score = pred(train_pos_g, h)
    neg_score = pred(train_neg_g, h)
    loss = compute_loss(pos_score, neg_score)
    
    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if e % 5 == 0:
        print('In epoch {}, loss: {}'.format(e, loss))


from sklearn.metrics import roc_auc_score
with torch.no_grad():
    pos_score = pred(test_pos_g, h)
    # print(pos_score)
    neg_score = pred(test_neg_g, h)
    # print(neg_score)
    print('AUC', compute_auc(pos_score, neg_score))


    from sklearn.metrics import roc_curve
    scores = torch.cat([pos_score, neg_score]).numpy()
    labels = torch.cat(
        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    # roc curve for models
    fpr1, tpr1, thresh1 = roc_curve(labels, scores, pos_label=1)
    # print(thresh1)
    # print(fpr1)
    # print(tpr1)
    # print("threshold ====> ",thresh1)


    import matplotlib.pyplot as plt
    plt.style.use('seaborn')

    # plot roc curves
    plt.plot(fpr1, tpr1, linestyle='--',color='orange', label='Logistic Regression')
    # plt.plot(fpr2, tpr2, linestyle='--',color='green', label='KNN')
    # plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
    # title
    plt.title('ROC curve')
    # x label
    plt.xlabel('False Positive Rate')
    # y label
    plt.ylabel('True Positive rate')

    plt.legend(loc='best')
    # plt.savefig('ROC_with_all_features',dpi=300)
    plt.show();

Cause of randomness in AUC score for GNN

0 Answers0