Source code for pydgc.pipelines.dfcn_pipeline

# -*- coding: utf-8 -*-
from sklearn.decomposition import PCA
import scipy.sparse as sp
import numpy as np
import torch
from ..models import DFCN
from . import BasePipeline
from argparse import Namespace
from ..utils import perturb_data
from torch_geometric.utils import contains_self_loops, remove_self_loops, add_remaining_self_loops


[docs]def normalize(mx):
    """Row-normalize sparse matrix.

    Args:
        mx (scipy.sparse): Input sparse matrix.

    Returns:
        scipy.sparse: Row-normalized sparse matrix.
    """
    row_sum = np.array(mx.sum(1))
    r_inv = np.power(row_sum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx


[docs]def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor.

    Args:
        sparse_mx (scipy.sparse): Input sparse matrix.

    Returns:
        torch.sparse_coo_tensor: The torch sparse tensor.
    """
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse_coo_tensor(indices, values, shape)


[docs]class DFCNPipeline(BasePipeline):
    """DFCN pipeline.

    Args:
        args (Namespace): Arguments.
    """
    def __init__(self, args: Namespace):
        super(DFCNPipeline, self).__init__(args)

[docs]    def augment_data(self):
        self.data = perturb_data(self.data, self.cfg.dataset.augmentation)
        pca = PCA(n_components=self.cfg.dataset.augmentation.pca_dim)
        self.data.x = torch.from_numpy(pca.fit_transform(self.data.x)).float()

        if hasattr(self.cfg.dataset.augmentation, 'add_self_loops'):
            if self.cfg.dataset.augmentation.add_self_loops:
                edge_index, _ = add_remaining_self_loops(self.data.edge_index, num_nodes=self.data.num_nodes)
                self.data.edge_index = edge_index
        if self.dataset_name == "DBLP":
            edges = self.data.edge_index.numpy().T
            n = self.cfg.dataset.num_nodes
            adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(n, n), dtype=np.float32)
            adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
            adj = normalize(adj)
            adj = sparse_mx_to_torch_sparse_tensor(adj)
            self.data.adj = adj
        else:
            if contains_self_loops(self.data.edge_index):
                edge_index = remove_self_loops(self.data.edge_index)[0]
                self.data.edge_index = edge_index
            edges = self.data.edge_index.numpy().T
            n = self.cfg.dataset.num_nodes
            adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(n, n), dtype=np.float32)
            adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
            adj = adj + sp.eye(adj.shape[0])
            adj = normalize(adj)
            adj = sparse_mx_to_torch_sparse_tensor(adj)
            self.data.adj = adj

[docs]    def build_model(self):
        model = DFCN(self.logger, self.cfg)
        self.logger.model_info(model)
        return model