先上代码,后面有空再整理细节:

  1. # -*- coding:utf-8 -*-
  2. # vim:et:ts=4:sw=4:
  3. #!/usr/bin/env python
  4. ######################################################################
  5. __author__ = 'ishenweiyan@qq.com'
  6. __create__ = '2019-09-19'
  7. __file__ = 'download_demo.py'
  8. __license__ = '2019 All rights reserved.'
  9. __doc__ = 'The test script.'
  10. #####################################################################
  11. import os
  12. from urllib.request import urlopen
  13. import requests
  14. from tqdm import tqdm
  15. from contextlib import closing
  16. class ProgressBar(object):
  17. def __init__(self, title,
  18. count=0.0,
  19. run_status=None,
  20. fin_status=None,
  21. total=100.0,
  22. unit='', sep='/',
  23. chunk_size=1.0):
  24. super(ProgressBar, self).__init__()
  25. self.info = "[%s] %s %.2f %s %s %.2f %s"
  26. self.title = title
  27. self.total = total
  28. self.count = count
  29. self.chunk_size = chunk_size
  30. self.status = run_status or ""
  31. self.fin_status = fin_status or " " * len(self.status)
  32. self.unit = unit
  33. self.seq = sep
  34. def __get_info(self):
  35. # [名称] 状态 进度 单位 分割线 总数 单位
  36. _info = self.info % (self.title, self.status,
  37. self.count/self.chunk_size, self.unit, self.seq, self.total/self.chunk_size, self.unit)
  38. return _info
  39. def refresh(self, count=1, status=None):
  40. self.count += count
  41. # if status is not None:
  42. self.status = status or self.status
  43. end_str = "\r"
  44. if self.count >= self.total:
  45. end_str = '\n'
  46. self.status = status or self.fin_status
  47. print(self.__get_info(), end=end_str)
  48. def download_from_url(url, dst):
  49. """
  50. @param: url to download file
  51. @param: dst place to put the file
  52. """
  53. file_size = int(urlopen(url).info().get('Content-Length', -1))
  54. if os.path.exists(dst):
  55. first_byte = os.path.getsize(dst)
  56. else:
  57. first_byte = 0
  58. if first_byte >= file_size:
  59. return file_size
  60. header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
  61. pbar = tqdm(
  62. total=file_size, initial=first_byte,
  63. unit='B', unit_scale=True, desc=url.split('/')[-1])
  64. req = requests.get(url, headers=header, stream=True)
  65. with(open(dst, 'ab')) as f:
  66. for chunk in req.iter_content(chunk_size=1024):
  67. if chunk:
  68. f.write(chunk)
  69. pbar.update(1024)
  70. pbar.close()
  71. return file_size
  72. def __main__():
  73. url = 'http://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2ensembl.gz'
  74. file_name = 'gene2ensembl.gz'
  75. '''
  76. with closing(requests.get(url, stream=True)) as response:
  77. chunk_size = 1024 # 单次请求最大值
  78. content_size = int(response.headers['content-length']) # 内容体总大小
  79. progress = ProgressBar(file_name, total=content_size,
  80. unit="KB", chunk_size=chunk_size, run_status="正在下载", fin_status="下载完成")
  81. with open(file_name, "wb") as file:
  82. for data in response.iter_content(chunk_size=chunk_size):
  83. file.write(data)
  84. progress.refresh(count=len(data))
  85. '''
  86. download_from_url(url, file_name)
  87. if __name__ == "__main__":
  88. __main__()

参考资料