下载NR库pyNCBI_Nr.py

    1. ## ascp -T -l640M -i /home/linxz/.aspera/connect/etc/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v5/nr.00.tar.gz /media/linxz/Data/BioDB/NCBI/NR/
    2. from ftplib import FTP
    3. from tqdm import tqdm
    4. import os
    5. #获取指定目录文件名并返回列表函数
    6. def get_files(file_dir,ext1 = 'gz',ext2='md5'):
    7. L=[]
    8. for root, dirs, files in os.walk(file_dir):
    9. for file in files:
    10. if os.path.splitext(file)[1] == '.' + ext1:
    11. L.append(file)
    12. elif os.path.splitext(file)[1] == '.' + ext2:
    13. L.append(file)
    14. return L
    15. ftp = FTP('ftp.ncbi.nlm.nih.gov') # connect to host, default port
    16. ftp.login() # user anonymous, passwd anonymous@ 登录
    17. ftp.cwd('blast/db/v5') # 切换到blast/db/v5文件夹
    18. blast_db_list = ftp.nlst() #获取当前目录文件名,生成字典
    19. ascp_list = []
    20. db = 'nr'
    21. out_dir = '/media/linxz/Data/BioDB/NCBI/NR/' #输出文件夹路径
    22. ascp_cmd = 'ascp -T -l640M -i /home/linxz/.aspera/connect/etc/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v5/'
    23. finsh_list = get_files(out_dir) #调用函数获取指定目录文件名
    24. #构建shell命令
    25. for file in blast_db_list:
    26. if db == file[:len(db)]:
    27. if file not in finsh_list:
    28. ascp_cmd += file
    29. ascp_cmd += ' ' + out_dir
    30. ascp_list.append(ascp_cmd)
    31. ascp_cmd = 'ascp -T -l640M -i /home/linxz/.aspera/connect/etc/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v5/'
    32. else:
    33. print (file + '已完成下载')
    34. print ('开始下载')
    35. while ascp_list != []:
    36. for cmd in tqdm(ascp_list):
    37. os.system(cmd)
    38. ascp_list.remove(cmd)