Source code for pydgc.pipelines.dfcn_pipeline

# -*- coding: utf-8 -*-
from sklearn.decomposition import PCA
import scipy.sparse as sp
import numpy as np
import torch
from ..models import DFCN
from . import BasePipeline
from argparse import Namespace
from ..utils import perturb_data
from torch_geometric.utils import contains_self_loops, remove_self_loops, add_remaining_self_loops


[docs]def normalize(mx): """Row-normalize sparse matrix. Args: mx (scipy.sparse): Input sparse matrix. Returns: scipy.sparse: Row-normalized sparse matrix. """ row_sum = np.array(mx.sum(1)) r_inv = np.power(row_sum, -1).flatten() r_inv[np.isinf(r_inv)] = 0. r_mat_inv = sp.diags(r_inv) mx = r_mat_inv.dot(mx) return mx
[docs]def sparse_mx_to_torch_sparse_tensor(sparse_mx): """Convert a scipy sparse matrix to a torch sparse tensor. Args: sparse_mx (scipy.sparse): Input sparse matrix. Returns: torch.sparse_coo_tensor: The torch sparse tensor. """ sparse_mx = sparse_mx.tocoo().astype(np.float32) indices = torch.from_numpy( np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) values = torch.from_numpy(sparse_mx.data) shape = torch.Size(sparse_mx.shape) return torch.sparse_coo_tensor(indices, values, shape)
[docs]class DFCNPipeline(BasePipeline): """DFCN pipeline. Args: args (Namespace): Arguments. """ def __init__(self, args: Namespace): super(DFCNPipeline, self).__init__(args)
[docs] def augment_data(self): self.data = perturb_data(self.data, self.cfg.dataset.augmentation) pca = PCA(n_components=self.cfg.dataset.augmentation.pca_dim) self.data.x = torch.from_numpy(pca.fit_transform(self.data.x)).float() if hasattr(self.cfg.dataset.augmentation, 'add_self_loops'): if self.cfg.dataset.augmentation.add_self_loops: edge_index, _ = add_remaining_self_loops(self.data.edge_index, num_nodes=self.data.num_nodes) self.data.edge_index = edge_index if self.dataset_name == "DBLP": edges = self.data.edge_index.numpy().T n = self.cfg.dataset.num_nodes adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(n, n), dtype=np.float32) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) adj = normalize(adj) adj = sparse_mx_to_torch_sparse_tensor(adj) self.data.adj = adj else: if contains_self_loops(self.data.edge_index): edge_index = remove_self_loops(self.data.edge_index)[0] self.data.edge_index = edge_index edges = self.data.edge_index.numpy().T n = self.cfg.dataset.num_nodes adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(n, n), dtype=np.float32) adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) adj = adj + sp.eye(adj.shape[0]) adj = normalize(adj) adj = sparse_mx_to_torch_sparse_tensor(adj) self.data.adj = adj
[docs] def build_model(self): model = DFCN(self.logger, self.cfg) self.logger.model_info(model) return model