script

  1. import os
  2. import json
  3. import re
  4. class KOParser(object):
  5. def __init__(self, map):
  6. self.map = map
  7. def json_parser(self):
  8. # check the existance of json file
  9. if os.path.exists(self.map):
  10. with open(self.map, 'r') as f:
  11. ko_list = []
  12. # convert json to dict
  13. map_dict = json.load(f)
  14. # print(type(map_dict)) dict
  15. maps = map_dict['children']
  16. # print(type(maps)) list
  17. for map in maps:
  18. map_name = map['name'][0:5] + '\t' + map['name'][6:]
  19. # print(map_name)
  20. # print(type(map['children']))
  21. for map_l_1 in map['children']:
  22. map_l_1_name = map_l_1['name'][0:5] + '\t' + map_l_1['name'][6:]
  23. # print(map_l_1_name)
  24. for pathway in map_l_1['children']:
  25. try:
  26. for genes in pathway['children']:
  27. pathway_name = pathway['name'][0:5] + '\t' + pathway['name'][6:]
  28. # print(genes['name'])
  29. k_num = genes['name'].split(sep=' ')[0]
  30. gene_name = genes['name'].split(sep=' ')[1].split(sep=';')[0]
  31. anno = genes['name'].split(sep=' ')[1].split(sep=';')[-1]
  32. try:
  33. pattern = re.compile('(.*)(\[EC:.*\])')
  34. product = re.search(pattern, anno).group(1)
  35. ec = re.search(pattern, anno).group(2)
  36. ko = k_num + '\t' + gene_name + '\t' + product + '\t' + ec
  37. except:
  38. ko = k_num + '\t' + gene_name + '\t' + anno
  39. # print(ko)
  40. info = map_name + '\t' + map_l_1_name + '\t' + pathway_name + '\t' + ko
  41. ko_list.append(info)
  42. except:
  43. continue
  44. return ko_list
  45. else:
  46. print('Error: Json file does not exist.')
  47. def save_data(self, result, file_name):
  48. # convert list to string for writing as file
  49. for info in result:
  50. kos = ''.join(info) + '\n'
  51. with open(file_name, 'a', encoding='utf-8') as f:
  52. f.write(kos)
  53. if __name__ == '__main__':
  54. map = 'ko00001.json'
  55. file_name = 'ko.txt'
  56. ko_json = KOParser(map)
  57. ko = ko_json.json_parser()
  58. ko_json.save_data(ko, file_name)

download json

choose Download json.

run

  1. python pathway_annotation.py

Reference

  1. 解析KEGG json文件