importdglg=...# create or load a DGLGraph objectdgl.distributed.partition_graph(g,'mygraph',2,'data_root_dir')
将输出以下数据文件。
data_root_dir/
|-- mygraph.json # metadata JSON. File name is the given graph name.
|-- part0/ # data for partition 0
| |-- node_feats.dgl # node features stored in binary format
| |-- edge_feats.dgl # edge features stored in binary format
| |-- graph.dgl # graph structure of this partition stored in binary format
|
|-- part1/ # data for partition 1
|-- node_feats.dgl
|-- edge_feats.dgl
|-- graph.dgl
node_map,edge_map=dgl.distributed.partition_graph(g,'graph_name',4,'/tmp/test',balance_ntypes=g.ndata['train_mask'],return_mapping=True)# Let's assume that node_emb is saved from the distributed training.orig_node_emb=th.zeros(node_emb.shape,dtype=node_emb.dtype)orig_node_emb[node_map]=node_emb
{"graph_name":"MAG240M-LSC",# given graph name"node_type":["author","paper","institution"],"num_nodes_per_chunk":[[61191556,61191556],# number of author nodes per chunk[61191553,61191552],# number of paper nodes per chunk[12861,12860]# number of institution nodes per chunk],# The edge type name is a colon-joined string of source, edge, and destination type."edge_type":["author:writes:paper","author:affiliated_with:institution","paper:cites:paper"],"num_edges_per_chunk":[[193011360,193011360],# number of author:writes:paper edges per chunk[22296293,22296293],# number of author:affiliated_with:institution edges per chunk[648874463,648874463]# number of paper:cites:paper edges per chunk],"edges":{"author:writes:paper":{# edge type"format":{"name":"csv","delimiter":" "},# The list of paths. Can be relative or absolute."data":["edges/writes-part1.csv","edges/writes-part2.csv"]},"author:affiliated_with:institution":{"format":{"name":"csv","delimiter":" "},"data":["edges/affiliated_with-part1.csv","edges/affiliated_with-part2.csv"]},"paper:cites:paper":{"format":{"name":"csv","delimiter":" "},"data":["edges/cites-part1.csv","edges/cites-part2.csv"]}},"node_data":{"paper":{# node type"feat":{# feature key"format":{"name":"numpy"},"data":["node_data/paper-feat-part1.npy","node_data/paper-feat-part2.npy"]},"label":{# feature key"format":{"name":"numpy"},"data":["node_data/paper-label-part1.npy","node_data/paper-label-part2.npy"]},"year":{# feature key"format":{"name":"numpy"},"data":["node_data/paper-year-part1.npy","node_data/paper-year-part2.npy"]}}},"edge_data":{}# MAG240M-LSC does not have edge features}
data_root_dir/
|-- graph_name.json # partition configuration file in JSON
|-- part0/ # data for partition 0
| |-- orig_nids.dgl # original node IDs
| |-- orig_eids.dgl # original edge IDs
| |-- ... # other data such as graph and node/edge feats
|
|-- part1/ # data for partition 1
| |-- orig_nids.dgl
| |-- orig_eids.dgl
| |-- ...
|
|-- ... # data for other partitions
# Load the original IDs for the nodes in partition 0.orig_nids_0=dgl.data.load_tensors('/path/to/data/part0/orig_nids.dgl')# Get the original node IDs for node type 'user'user_orig_nids_0=orig_nids_0['user']# Load the original IDs for the edges in partition 0.orig_eids_0=dgl.data.load_tensors('/path/to/data/part0/orig_eids.dgl')# Get the original edge IDs for edge type 'like'like_orig_eids_0=orig_nids_0['like']