要求及数据集

hw0.pdfWiki-Vote.txtstackoverflow-Java.txt
内容很简单

代码

Wiki

  1. import snap
  2. import matplotlib.pyplot as plt
  3. def init_graph():
  4. G = snap.TNGraph.New()
  5. with open('Wiki-Vote.txt', 'r', encoding='utf-8') as f:
  6. for line in f.readlines():
  7. if line[0] == '#':
  8. continue
  9. node_in, node_out = line.split('\t')
  10. if not G.IsNode(int(node_in)):
  11. G.AddNode(int(node_in))
  12. if not G.IsNode(int(node_out)):
  13. G.AddNode(int(node_out))
  14. G.AddEdge(int(node_in), int(node_out))
  15. return G
  16. def analyze_graph(G):
  17. print('nodes in network:')
  18. total = G.GetNodes()
  19. print(total)
  20. print('number of nodes with self edge:')
  21. num = 0
  22. for node in G.Nodes():
  23. if node.IsOutNId(node.GetId()):
  24. num += 1
  25. print(num)
  26. print('number of directed edges:', total - num)
  27. print('number of undirected edges:')
  28. num = 0
  29. for edge in G.Edges():
  30. node_1 = edge.GetSrcNId()
  31. node_2 = edge.GetDstNId()
  32. if not G.IsEdge(node_2, node_1):
  33. num += 1
  34. print(num)
  35. print('number of reciprocated edges:', G.GetEdges() - num)
  36. num_in1, num_out1 = 0, 0
  37. num_in2, num_out2 = 0, 0
  38. for node in G.Nodes():
  39. in_deg = node.GetInDeg()
  40. out_deg = node.GetOutDeg()
  41. if not in_deg:
  42. num_in1 += 1
  43. elif in_deg > 10:
  44. num_in2 += 1
  45. if not out_deg:
  46. num_out1 += 1
  47. elif out_deg > 10:
  48. num_out2 += 1
  49. print('zero in deg:', num_in1)
  50. print('zero out deg', num_out1)
  51. print(num_in2, num_out2)
  52. def analyze_further(G):
  53. X, Y = [], []
  54. map = {}
  55. for node in G.Nodes():
  56. out_deg = node.GetOutDeg()
  57. if out_deg not in map:
  58. map[out_deg] = 1
  59. else:
  60. map[out_deg] += 1
  61. X = [item for item in map.keys()]
  62. X.sort()
  63. Y = [map[x] for x in X]
  64. plt.loglog(X, Y, linestyle='dotted', color='b', label='Wiki Network')
  65. plt.xlabel('Node Degree (log)')
  66. plt.ylabel('Proportion of Nodes with a Given Degree (log)')
  67. plt.title('Degree Distribution of Wiki-vote')
  68. plt.legend()
  69. plt.show()
  70. if __name__ == '__main__':
  71. G = init_graph()
  72. analyze_graph(G)
  73. analyze_further(G)

Java

  1. import snap
  2. def init_graph():
  3. G = snap.TNGraph().New()
  4. with open('stackoverflow-Java.txt', 'r', encoding='utf-8') as f:
  5. for line in f.readlines():
  6. node_in, node_out = line.split('\t')
  7. if not G.IsNode(int(node_in)):
  8. G.AddNode(int(node_in))
  9. if not G.IsNode(int(node_out)):
  10. G.AddNode(int(node_out))
  11. G.AddEdge(int(node_in), int(node_out))
  12. return G
  13. def analyze_graph(G):
  14. print('weakly connected components:', len(G.GetWccs()))
  15. print('The number of edges and the number of nodes in the largest weakly connected component:')
  16. component = G.GetMxWcc()
  17. print(component.GetNodes(), component.GetEdges())
  18. print('IDs of the top 3 most central nodes in the network by PagePank scores:')
  19. PRankH = G.GetPageRank()
  20. values = PRankH.values()
  21. values.sort()
  22. for item in PRankH:
  23. if PRankH[item] == values[-1]:
  24. print(item)
  25. if PRankH[item] == values[-2]:
  26. print(item)
  27. if PRankH[item] == values[-3]:
  28. print(item)
  29. print('IDs of the top 3 hubs and top 3 authorities in the network by HITS scores',)
  30. NIdHubH, NIdAuthH = G.GetHits()
  31. values = NIdHubH.values()
  32. values.sort()
  33. for item in NIdHubH:
  34. if NIdHubH[item] == values[-1]:
  35. print(item)
  36. if NIdHubH[item] == values[-2]:
  37. print(item)
  38. if NIdHubH[item] == values[-3]:
  39. print(item)
  40. values = NIdAuthH.values()
  41. values.sort()
  42. for item in NIdAuthH:
  43. if NIdAuthH[item] == values[-1]:
  44. print(item)
  45. if NIdAuthH[item] == values[-2]:
  46. print(item)
  47. if NIdAuthH[item] == values[-3]:
  48. print(item)
  49. if __name__ == '__main__':
  50. G = init_graph()
  51. analyze_graph(G)