要求及数据集
hw0.pdfWiki-Vote.txtstackoverflow-Java.txt
内容很简单
代码
Wiki
import snapimport matplotlib.pyplot as pltdef init_graph():G = snap.TNGraph.New()with open('Wiki-Vote.txt', 'r', encoding='utf-8') as f:for line in f.readlines():if line[0] == '#':continuenode_in, node_out = line.split('\t')if not G.IsNode(int(node_in)):G.AddNode(int(node_in))if not G.IsNode(int(node_out)):G.AddNode(int(node_out))G.AddEdge(int(node_in), int(node_out))return Gdef analyze_graph(G):print('nodes in network:')total = G.GetNodes()print(total)print('number of nodes with self edge:')num = 0for node in G.Nodes():if node.IsOutNId(node.GetId()):num += 1print(num)print('number of directed edges:', total - num)print('number of undirected edges:')num = 0for edge in G.Edges():node_1 = edge.GetSrcNId()node_2 = edge.GetDstNId()if not G.IsEdge(node_2, node_1):num += 1print(num)print('number of reciprocated edges:', G.GetEdges() - num)num_in1, num_out1 = 0, 0num_in2, num_out2 = 0, 0for node in G.Nodes():in_deg = node.GetInDeg()out_deg = node.GetOutDeg()if not in_deg:num_in1 += 1elif in_deg > 10:num_in2 += 1if not out_deg:num_out1 += 1elif out_deg > 10:num_out2 += 1print('zero in deg:', num_in1)print('zero out deg', num_out1)print(num_in2, num_out2)def analyze_further(G):X, Y = [], []map = {}for node in G.Nodes():out_deg = node.GetOutDeg()if out_deg not in map:map[out_deg] = 1else:map[out_deg] += 1X = [item for item in map.keys()]X.sort()Y = [map[x] for x in X]plt.loglog(X, Y, linestyle='dotted', color='b', label='Wiki Network')plt.xlabel('Node Degree (log)')plt.ylabel('Proportion of Nodes with a Given Degree (log)')plt.title('Degree Distribution of Wiki-vote')plt.legend()plt.show()if __name__ == '__main__':G = init_graph()analyze_graph(G)analyze_further(G)
Java
import snapdef init_graph():G = snap.TNGraph().New()with open('stackoverflow-Java.txt', 'r', encoding='utf-8') as f:for line in f.readlines():node_in, node_out = line.split('\t')if not G.IsNode(int(node_in)):G.AddNode(int(node_in))if not G.IsNode(int(node_out)):G.AddNode(int(node_out))G.AddEdge(int(node_in), int(node_out))return Gdef analyze_graph(G):print('weakly connected components:', len(G.GetWccs()))print('The number of edges and the number of nodes in the largest weakly connected component:')component = G.GetMxWcc()print(component.GetNodes(), component.GetEdges())print('IDs of the top 3 most central nodes in the network by PagePank scores:')PRankH = G.GetPageRank()values = PRankH.values()values.sort()for item in PRankH:if PRankH[item] == values[-1]:print(item)if PRankH[item] == values[-2]:print(item)if PRankH[item] == values[-3]:print(item)print('IDs of the top 3 hubs and top 3 authorities in the network by HITS scores',)NIdHubH, NIdAuthH = G.GetHits()values = NIdHubH.values()values.sort()for item in NIdHubH:if NIdHubH[item] == values[-1]:print(item)if NIdHubH[item] == values[-2]:print(item)if NIdHubH[item] == values[-3]:print(item)values = NIdAuthH.values()values.sort()for item in NIdAuthH:if NIdAuthH[item] == values[-1]:print(item)if NIdAuthH[item] == values[-2]:print(item)if NIdAuthH[item] == values[-3]:print(item)if __name__ == '__main__':G = init_graph()analyze_graph(G)
