import dgl
import torch as th
import torch.nn as nn
from . import BaseModel, register_model
import dgl.function as fn
[文档]@register_model('NARS')
class NARS(BaseModel):
r"""
`SCALABLE GRAPH NEURAL NETWORKS FOR HETEROGENEOUS GRAPHS <https://arxiv.org/pdf/2011.09679.pdf>`_.
Given a heterogeneous graph :math:`G` and its edge relation type set :math:`\mathcal{R}`,
our proposed method first samples :math:`K` unique subsets from :math:`\mathcal{R}`.
Then for each sampled subset :math:`R_i \subseteq \mathcal{R}`, we generate a relation subgraph
:math:`G_i` from :math:`G` in which only edges whose type belongs to :math:`R_i` are kept.
We treat :math:`G_i` as a homogeneous graph or a bipartite graph, and perform neighbor aggregation to generate :math:`L`-hop neighbor features for each node.
Let :math:`H_{v,0}` be the input features (of dimension :math:`D`) for node :math:`v`. For each subgraph :math:`G_i`
, the :math:`l`-th hop
features :math:`H_{v,l}^{i}` are computed as
.. math::
H_{v, l}^{i}=\sum_{u \in N_{i}(v)} \frac{1}{\left|N_{i}(v)\right|} H_{u, l-1}^{i}
where :math:`N_i(v)` is the set of neighbors of node :math:`v` in :math:`G_i`.
For each layer :math:`l`, we let the model adaptively learn which relation-subgraph features to use by aggregating
features from different subgraphs :math:`G_i` with learnable 1-D convolution. The aggregated :math:`l`-hop
features across all subgraphs are calculated as
.. math::
H_{v, l}^{a g g}=\sum_{i=1}^{K} a_{i, l} \cdot H_{v, l}^{i}
where :math:`H^i` is the neighbor averaging features on subgraph :math:`G_i` and :math:`a_{i,l}` is a learned vector of length equal
to the feature dimension :math:`D`.
Parameters
----------
num_hops : int
Number of hops.
category : str
Type of predicted nodes.
hidden_dim : int
The dimention of hidden layer.
num_feats : int
The number of relation subsets.
Note
----
We do not support the dataset without feature, (e.g. HGBn-Freebase
because the model performs neighbor aggregation to generate :math:`L`-hop neighbor features at once.
"""
@classmethod
def build_model_from_args(cls, args, hg):
num_hops = args.num_hops + 1
return cls(num_hops=num_hops,
args = args,
hg = hg,
)
def __init__(self, num_hops, args, hg):
super(NARS, self).__init__()
self.category = args.category
self.dropout = args.dropout
self.input_dropout = args.input_dropout
# self.feats = feats
self.device = args.device
self.num_hops = num_hops
self.args = args
in_size = hg.nodes[args.category].data["h"].shape[1]
etypes = hg.canonical_etypes
mps = []
for etype in etypes:
if etype[0] == args.category:
for dst_e in etypes:
if etype[0] == dst_e[2] and etype[2] == dst_e[0] and etype[0] != etype[2]:
mps.append([etype, dst_e])
self.mps = mps
self.num_feats = len(mps)
with th.no_grad():
self.feats = preprocess_features(hg, mps, args, args.device, self.args.category)
print("Done preprocessing")
self.seq = nn.Sequential(
WeightedAggregator(self.num_feats, in_size, num_hops),
SIGN(in_size, args.hidden_dim, args.out_dim, num_hops,
args.ff_layer, args.dropout, args.input_dropout)
)
def forward(self, hg, h_dict):
#ffeats = [x.to(self.device) for x in self.feats]
ffeats = [x.to(self.device) for x in self.feats]
return {self.category: self.seq.forward(ffeats)}
def reset_parameters(self):
self.seq.register_parameter()
def preprocess_features(g, mps, args, device, predict):
"""
pre-process heterogeneous graph g to generate neighbor-averaged features
for each relation subsets
Parameters
-----------
g :
heterogeneous graph
rel_subsets :
relations of subsets
args :
arguments
device :
device
Return
------
new features of each relation subsets
"""
category_dim = g.nodes[predict].data["feat"].shape[1]
for ntype in g.ntypes:
ntype_dim = g.nodes[ntype].data["feat"].shape[1]
if category_dim != ntype_dim:
rand_weight = th.Tensor(ntype_dim, category_dim).uniform_(-0.5, 0.5).to(device)
g.nodes[ntype].data["feat"] = th.matmul(g.nodes[ntype].data["feat"], rand_weight)
num_paper, feat_size = g.nodes[predict].data["feat"].shape
new_feats = [th.zeros(num_paper, len(mps), feat_size) for _ in range(args.num_hops + 1)]
for subset_id, subset in enumerate(mps):
# print(subset)
feats = gen_rel_subset_feature(g, subset, args, device, predict)
for i in range(args.num_hops + 1):
feat = feats[i]
new_feats[i][:feat.shape[0], subset_id, :] = feat
feats = None
return new_feats
def gen_rel_subset_feature(g, rel_subset, args, device, predict):
"""
Build relation subgraph given relation subset and generate multi-hop
neighbor-averaged feature on this subgraph
Parameters
----------
g :
Heterogeneous graph
rel_subset :
relation of subsets
args :
arguments
device :
device
Returns
------
new features of a relation subsets
"""
new_g = g.edge_type_subgraph(rel_subset).to(g.device)
ntypes = new_g.ntypes
# set node feature and calc deg
for ntype in ntypes:
num_nodes = new_g.number_of_nodes(ntype)
if num_nodes < g.nodes[ntype].data["feat"].shape[0]:
new_g.nodes[ntype].data["hop_0"] = g.nodes[ntype].data["feat"][:num_nodes, :]
else:
new_g.nodes[ntype].data["hop_0"] = g.nodes[ntype].data["feat"]
deg = 0
for etype in new_g.etypes:
_, _, dtype = new_g.to_canonical_etype(etype)
if ntype == dtype:
deg = deg + new_g.in_degrees(etype=etype)
if th.is_tensor(deg):
norm = 1.0 / deg.float()
norm[th.isinf(norm)] = 0
new_g.nodes[ntype].data["norm"] = norm.view(-1, 1).to(device)
res = []
# compute k-hop feature
for hop in range(1, args.num_hops + 1):
ntype2feat = {}
for etype in new_g.etypes:
stype, _, dtype = new_g.to_canonical_etype(etype)
new_g[etype].update_all(fn.copy_u(f'hop_{hop-1}', 'm'), fn.sum('m', 'new_feat'))
new_feat = new_g.nodes[dtype].data.pop("new_feat")
assert("new_feat" not in new_g.nodes[stype].data)
if dtype in ntype2feat:
ntype2feat[dtype] += new_feat
else:
ntype2feat[dtype] = new_feat
for ntype in new_g.ntypes:
assert ntype in ntype2feat # because subgraph is not directional
feat_dict = new_g.nodes[ntype].data
old_feat = feat_dict.pop(f"hop_{hop-1}")
if ntype == predict:
res.append(old_feat.cpu())
feat_dict[f"hop_{hop}"] = ntype2feat.pop(ntype).mul_(feat_dict["norm"])
res.append(new_g.nodes[predict].data.pop(f"hop_{args.num_hops}").cpu())
return res
class FeedForwardNet(nn.Module):
"""
A feedforward net.
Input
------
in_feats :
input feature dimention
hidden :
hidden layer dimention
out_feats :
output feature dimention
num_layers :
number of layers
dropout :
dropout rate
"""
def __init__(self, in_feats, hidden, out_feats, num_layers, dropout):
super(FeedForwardNet, self).__init__()
self.layers = nn.ModuleList()
self.num_layers = num_layers
if num_layers == 1:
self.layers.append(nn.Linear(in_feats, out_feats))
else:
self.layers.append(nn.Linear(in_feats, hidden))
for i in range(num_layers - 2):
self.layers.append(nn.Linear(hidden, hidden))
self.layers.append(nn.Linear(hidden, out_feats))
if self.num_layers > 1:
self.prelu = nn.PReLU()
self.dropout = nn.Dropout(dropout)
self.reset_parameters()
def reset_parameters(self):
gain = nn.init.calculate_gain("relu")
for layer in self.layers:
nn.init.xavier_uniform_(layer.weight, gain=gain)
nn.init.zeros_(layer.bias)
def forward(self, x):
for layer_id, layer in enumerate(self.layers):
x = layer(x)
if layer_id < self.num_layers - 1:
x = self.dropout(self.prelu(x))
return x
class SIGN(nn.Module):
"""
The SIGN model.
Parameters
------------
in_feats :
input feature dimention
hidden :
hidden layer dimention
out_feats :
output feature dimention
num_hops :
number of hops
num_layers :
number of layers
dropout :
dropout rate
input_drop :
whether or not to dropout when inputting features
"""
def __init__(
self, in_feats, hidden, out_feats, num_hops, num_layers, dropout, input_drop
):
super(SIGN, self).__init__()
self.dropout = nn.Dropout(dropout)
self.prelu = nn.PReLU()
self.inception_ffs = nn.ModuleList()
self.input_drop = input_drop
for i in range(num_hops):
self.inception_ffs.append(
FeedForwardNet(in_feats, hidden, hidden, num_layers, dropout)
)
self.project = FeedForwardNet(
num_hops * hidden, hidden, out_feats, num_layers, dropout
)
def forward(self, feats):
hidden = []
for feat, ff in zip(feats, self.inception_ffs):
if self.input_drop:
feat = self.dropout(feat)
hidden.append(ff(feat))
out = self.project(self.dropout(self.prelu(th.cat(hidden, dim=-1))))
return out
class WeightedAggregator(nn.Module):
"""
Get new features by multiplying the old features by the weight matrix.
Parameters
-------------
num_feats :
number of subsets
in_feats :
input feature dimention
num_hops :
number of hops
"""
def __init__(self, num_feats, in_feats, num_hops):
super(WeightedAggregator, self).__init__()
self.agg_feats = nn.ParameterList()
for _ in range(num_hops):
self.agg_feats.append(nn.Parameter(th.Tensor(num_feats, in_feats)))
nn.init.xavier_uniform_(self.agg_feats[-1])
def forward(self, feats):
new_feats = []
for feat, weight in zip(feats, self.agg_feats):
new_feats.append((feat * weight.unsqueeze(0)).sum(dim=1).squeeze())
return new_feats