Source code for openhgnn.models.MHNF

import dgl
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GraphConv, EdgeWeightNorm
from ..utils import transform_relation_graph_list
from . import BaseModel, register_model


[docs] @register_model('MHNF') class MHNF(BaseModel): r""" MHNF from paper `Multi-hop Heterogeneous Neighborhood information Fusion graph representation learning <https://arxiv.org/pdf/2106.09289.pdf>`__. Given a heterogeneous graph :math:`G` and its edge relation type set :math:`\mathcal{R}`.Then we can extract l-hops hybrid adjacency matrix list in HMAE model. The hybrid adjacency matrix list can be used in HLHIA model to generate l-hops representations. Then HSAF model use attention mechanism to aggregate l-hops representations and because of multi-channel conv, the HSAF model also aggregates different channels l-hops representations to generate a final representation. You can see detail operation in correspond model. Parameters ---------- num_edge_type : int Number of relations. num_channels : int Number of conv channels. in_dim : int The dimension of input feature. hidden_dim : int The dimension of hidden layer. num_class : int Number of classification type. num_layers : int Length of hybrid metapath. category : string Type of predicted nodes. norm : bool If True, the adjacency matrix will be normalized. identity : bool If True, the identity matrix will be added to relation matrix set. """ @classmethod def build_model_from_args(cls, args, hg): if args.identity == True: num_edge_type = len(hg.canonical_etypes) + 1 else: num_edge_type = len(hg.canonical_etypes) # add self-loop edge return cls(num_edge_type=num_edge_type, num_channels=args.num_channels, in_dim=args.hidden_dim, hidden_dim=args.hidden_dim, num_class=args.out_dim, num_layers=args.num_layers, category=args.category, norm=args.norm_emd_flag, identity=args.identity) def __init__(self, num_edge_type, num_channels, in_dim, hidden_dim, num_class, num_layers, category, norm, identity): super(MHNF, self).__init__() self.num_edge_type = num_edge_type self.num_channels = num_channels self.in_dim = in_dim self.hidden_dim = hidden_dim self.num_class = num_class self.num_layers = num_layers self.is_norm = norm self.category = category self.identity = identity self.HSAF = HSAF(num_edge_type, self.num_channels, self.num_layers, self.in_dim, self.hidden_dim) self.linear = nn.Linear(self.hidden_dim, self.num_class) self.category_idx = None self.A = None self.h = None def forward(self, hg, h=None): with hg.local_scope(): #Ws = [] hg.ndata['h'] = h # * =============== Extract edges in original graph ================ if self.category_idx is None: self.A, h, self.category_idx = transform_relation_graph_list(hg, category=self.category, identity=self.identity) else: g = dgl.to_homogeneous(hg, ndata='h') h = g.ndata['h'] # g = dgl.to_homogeneous(hg, ndata='h') #X_ = self.gcn(g, self.h) A = self.A final_representation = self.HSAF(A, h) y = self.linear(final_representation) return {self.category: y[self.category_idx]}
class HSAF(nn.Module): r''' HSAF: Hierarchical Semantic Attention Fusion The HSAF model use two level attention mechanism to generate final representation * Hop-level attention .. math:: \alpha_{i, l}^{\Phi_{p}}=\sigma\left[\delta^{\Phi_{p}} \tanh \left(W^{\Phi_{p}} Z_{i, l}^{\Phi_{p}}\right)\right] In which, :math:`\alpha_{i, l}^{\Phi_{p}}` is the importance of the information :math:`\left(Z_{i, l}^{\Phi_{p}}\right)` of the l-th-hop neighbors of node i under the path :math:`\Phi_{p}`, and :math:`\delta^{\Phi_{p}}` represents the learnable matrix. Then normalize :math:`\alpha_{i, l}^{\Phi_{p}}` .. math:: \beta_{i, l}^{\Phi_{p}}=\frac{\exp \left(\alpha_{i, l}^{\Phi_{p}}\right)}{\sum_{j=1}^{L} \exp \left(\alpha_{i, j}^{\Phi_{p}}\right)} Finally, we get hop-level attention representation in one hybrid metapath. .. math:: Z_{i}^{\Phi_{p}}=\sum_{l=1}^{L} \beta_{l}^{\Phi_{p}} Z_{l}^{\Phi_{p}} * Channel-level attention It also can be seen as multi-head attention mechanism. .. math:: \alpha_{i, \Phi_{p}}=\sigma\left[\delta \tanh \left(W Z_{i}^{\Phi_{p}}\right)\right. Then normalize :math:`\alpha_{i, \Phi_{p}}` .. math:: \beta_{i, \Phi_{p}}=\frac{\exp \left(\alpha_{i, \Phi_{p}}\right)}{\sum_{p^{\prime} \in P} \exp \left(\alpha_{\Phi_{p^{\prime}}}\right)} Finally, we get final representation of every nodes. .. math:: Z_{i}=\sum_{p \in P} \beta_{i, \Phi_{p}} Z_{i, \Phi_{p}} ''' def __init__(self, num_edge_type, num_channels, num_layers, in_dim, hidden_dim): super(HSAF, self).__init__() self.num_channels = num_channels self.num_layers = num_layers self.in_dim = in_dim self.hidden_dim = hidden_dim self.HLHIA_layer = HLHIA(num_edge_type, self.num_channels, self.num_layers, self.in_dim, self.hidden_dim) # * =============== channel attention operation ================ self.channel_attention = nn.Sequential( nn.Linear(self.hidden_dim, 1), nn.Tanh(), nn.Linear(1, 1, bias=False), nn.ReLU() ) # * =============== layers attention operation ================ self.layers_attention = nn.ModuleList() for i in range(num_channels): self.layers_attention.append(nn.Sequential( nn.Linear(self.hidden_dim, 1), nn.Tanh(), nn.Linear(1, 1, bias=False), nn.ReLU() )) def forward(self, A, h): attention_list = self.HLHIA_layer(A, h) channel_attention_list = [] for i in range(self.num_channels): layer_level_feature_list = attention_list[i] layer_attention = self.layers_attention[i] for j in range(self.num_layers + 1): layer_level_feature = layer_level_feature_list[j] if j == 0: layer_level_alpha = layer_attention(layer_level_feature) else: layer_level_alpha = th.cat((layer_level_alpha, layer_attention(layer_level_feature)), dim=-1) layer_level_beta = th.softmax(layer_level_alpha, dim=-1) channel_attention_list.append( th.bmm(th.stack(layer_level_feature_list, dim=-1), layer_level_beta.unsqueeze(-1)).squeeze(-1)) for i in range(self.num_channels): channel_level_feature = channel_attention_list[i] if i == 0: channel_level_alpha = self.channel_attention(channel_level_feature) else: channel_level_alpha = th.cat((channel_level_alpha, self.channel_attention(channel_level_feature)), dim=-1) channel_level_beta = th.softmax(channel_level_alpha, dim=-1) channel_attention = th.bmm(th.stack(channel_attention_list, dim=-1), channel_level_beta.unsqueeze(-1)).squeeze( -1) return channel_attention class HLHIA(nn.Module): r""" HLHIA: The Hop-Level Heterogeneous Information Aggregation The l-hop representation :math:`Z_{l}` is generated by the original node feature through a graph conv .. math:: Z_{l}^{\Phi_{p}} = \sigma\left[\left(D_{(l)}^{\Phi_{p}}\right)^{-1} A_{(l)}^{\Phi_{p}} h W^{\Phi_{p}}\right] where :math:`\Phi_{p}` is the hybrid l-hop metapath and `\mathcal{h}` is the original node feature. """ def __init__(self, num_edge_type, num_channels, num_layers, in_dim, hidden_dim): super(HLHIA, self).__init__() self.num_channels = num_channels self.in_dim = in_dim self.hidden_dim = hidden_dim layers = [] for i in range(num_layers): if i == 0: layers.append(HMAELayer(num_edge_type, num_channels, first=True)) else: layers.append(HMAELayer(num_edge_type, num_channels, first=False)) self.layers = nn.ModuleList(layers) self.gcn_list = nn.ModuleList() for i in range(num_channels): self.gcn_list.append(GraphConv(in_feats=self.in_dim, out_feats=hidden_dim, norm='none', activation=F.relu)) self.norm = EdgeWeightNorm(norm='right') def forward(self, A, h): layer_list = [] for i in range(len(self.layers)): if i == 0: H, W, first_adj = self.layers[i](A) layer_list.append(first_adj) layer_list.append(H) else: H, W, first_adj = self.layers[i](A, H) layer_list.append(H) # * =============== GCN Encoder ================ channel_attention_list = [] for i in range(self.num_channels): gcn = self.gcn_list[i] layer_attention_list = [] for j in range(len(layer_list)): layer = layer_list[j][i] layer = dgl.remove_self_loop(layer) edge_weight = layer.edata['w_sum'] layer = dgl.add_self_loop(layer) edge_weight = th.cat((edge_weight, th.full((layer.number_of_nodes(),), 1, device=layer.device))) edge_weight = self.norm(layer, edge_weight) layer_attention_list.append(gcn(layer, h, edge_weight=edge_weight)) channel_attention_list.append(layer_attention_list) return channel_attention_list class HMAELayer(nn.Module): r""" HMAE: Hybrid Metapath Autonomous Extraction The method to generate l-hop hybrid adjacency matrix .. math:: A_{(l)}=\Pi_{i=1}^{l} A_{i} """ def __init__(self, in_channels, out_channels, first=True): super(HMAELayer, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.first = first self.norm = EdgeWeightNorm(norm='right') if self.first == True: self.conv1 = GTConv(in_channels, out_channels, softmax_flag=False) self.conv2 = GTConv(in_channels, out_channels, softmax_flag=False) else: self.conv1 = GTConv(in_channels, out_channels, softmax_flag=False) def softmax_norm(self, H): norm_H = [] for i in range(len(H)): g = H[i] g.edata['w_sum'] = self.norm(g, th.exp(g.edata['w_sum'])) # normalize the hybrid relationship matrix norm_H.append(g) return norm_H def forward(self, A, H_=None): if self.first == True: result_A = self.softmax_norm(self.conv1(A)) result_B = self.softmax_norm(self.conv2(A)) W = [self.conv1.weight.detach(), self.conv2.weight.detach()] else: result_A = H_ result_B = self.conv1(A) W = [self.conv1.weight.detach().detach()] H = [] for i in range(len(result_A)): g = dgl.adj_product_graph(result_A[i], result_B[i], 'w_sum') H.append(g) return H, W, result_A class GTConv(nn.Module): r""" We conv each sub adjacency matrix :math:`A_{R_{i}}` to a combination adjacency matrix :math:`A_{1}`: .. math:: A_{1} = conv\left(A ; W_{c}\right)=\sum_{R_{i} \in R} w_{R_{i}} A_{R_{i}} where :math:`R_i \subseteq \mathcal{R}` and :math:`W_{c}` is the weight of each relation matrix """ def __init__(self, in_channels, out_channels, softmax_flag=True): super(GTConv, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.weight = nn.Parameter(th.Tensor(out_channels, in_channels)) self.softmax_flag = softmax_flag self.reset_parameters() def reset_parameters(self): nn.init.normal_(self.weight, std=0.01) def forward(self, A): if self.softmax_flag: Filter = F.softmax(self.weight, dim=1) else: Filter = self.weight num_channels = Filter.shape[0] results = [] for i in range(num_channels): for j, g in enumerate(A): A[j].edata['w_sum'] = g.edata['w'] * Filter[i][j] sum_g = dgl.adj_sum_graph(A, 'w_sum') results.append(sum_g) return results