下载NR库pyNCBI_Nr.py
## ascp -T -l640M -i /home/linxz/.aspera/connect/etc/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v5/nr.00.tar.gz /media/linxz/Data/BioDB/NCBI/NR/
from ftplib import FTP
from tqdm import tqdm
import os
#获取指定目录文件名并返回列表函数
def get_files(file_dir,ext1 = 'gz',ext2='md5'):
L=[]
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] == '.' + ext1:
L.append(file)
elif os.path.splitext(file)[1] == '.' + ext2:
L.append(file)
return L
ftp = FTP('ftp.ncbi.nlm.nih.gov') # connect to host, default port
ftp.login() # user anonymous, passwd anonymous@ 登录
ftp.cwd('blast/db/v5') # 切换到blast/db/v5文件夹
blast_db_list = ftp.nlst() #获取当前目录文件名,生成字典
ascp_list = []
db = 'nr'
out_dir = '/media/linxz/Data/BioDB/NCBI/NR/' #输出文件夹路径
ascp_cmd = 'ascp -T -l640M -i /home/linxz/.aspera/connect/etc/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v5/'
finsh_list = get_files(out_dir) #调用函数获取指定目录文件名
#构建shell命令
for file in blast_db_list:
if db == file[:len(db)]:
if file not in finsh_list:
ascp_cmd += file
ascp_cmd += ' ' + out_dir
ascp_list.append(ascp_cmd)
ascp_cmd = 'ascp -T -l640M -i /home/linxz/.aspera/connect/etc/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:/blast/db/v5/'
else:
print (file + '已完成下载')
print ('开始下载')
while ascp_list != []:
for cmd in tqdm(ascp_list):
os.system(cmd)
ascp_list.remove(cmd)