下载NR库pyNCBI_Nr.py
## ascp -T -l640M -i /home/linxz/.aspera/connect/etc/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v5/nr.00.tar.gz /media/linxz/Data/BioDB/NCBI/NR/from ftplib import FTPfrom tqdm import tqdmimport os#获取指定目录文件名并返回列表函数def get_files(file_dir,ext1 = 'gz',ext2='md5'):L=[]for root, dirs, files in os.walk(file_dir):for file in files:if os.path.splitext(file)[1] == '.' + ext1:L.append(file)elif os.path.splitext(file)[1] == '.' + ext2:L.append(file)return Lftp = FTP('ftp.ncbi.nlm.nih.gov') # connect to host, default portftp.login() # user anonymous, passwd anonymous@ 登录ftp.cwd('blast/db/v5') # 切换到blast/db/v5文件夹blast_db_list = ftp.nlst() #获取当前目录文件名,生成字典ascp_list = []db = 'nr'out_dir = '/media/linxz/Data/BioDB/NCBI/NR/' #输出文件夹路径ascp_cmd = 'ascp -T -l640M -i /home/linxz/.aspera/connect/etc/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v5/'finsh_list = get_files(out_dir) #调用函数获取指定目录文件名#构建shell命令for file in blast_db_list:if db == file[:len(db)]:if file not in finsh_list:ascp_cmd += fileascp_cmd += ' ' + out_dirascp_list.append(ascp_cmd)ascp_cmd = 'ascp -T -l640M -i /home/linxz/.aspera/connect/etc/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v5/'else:print (file + '已完成下载')print ('开始下载')while ascp_list != []:for cmd in tqdm(ascp_list):os.system(cmd)ascp_list.remove(cmd)
