一、目录扫描原理

1.读取字典URL
2.HTTP GET请求url
3.判断状态码 输出保存在目录

二、字典读取函数

with open(xxxx, ‘a+’) as f:
for line in f.readlines

三、代码实现

  1. import sys
  2. import requests
  3. import threading
  4. import queue
  5. import time
  6. import argparse
  7. class Dirscan(object):
  8. """
  9. 目录扫描器 类
  10. """
  11. def __init__(self, scanSite, scanDict, scanOutput, threadNum):
  12. """
  13. :param scanSite: 初始扫描站点
  14. :param scanDict: 初始化扫描字典
  15. :param scanOutput: 扫描输出结果
  16. :param threadNum: 线程数
  17. """
  18. self.scanSite = scanSite if scanSite.find('://')!=1 else 'http://%s' % scanSite
  19. print('扫描目标:',self.scanSite)
  20. self.scanDict = scanDict
  21. self.scanOutput = scanSite.rstrip('/').replace('https://', '').replace('http://', '')+'.txt' if scanOutput == 0 else scanOutput
  22. truncate = open(self.scanOutput,'w')
  23. truncate.close()
  24. self.threadNum = threadNum
  25. self.lock = threading.Lock()#线程锁
  26. # 类的功能 加载请求头,加载字典 初始化的
  27. self._loadHeaders()
  28. self._loadDict(self.scanDict)
  29. self._analysis404()
  30. self.STOP_ME = False
  31. def _loadDict(self,dict_list):
  32. #加入队列
  33. self.qlists = queue.Queue()
  34. with open(dict_list,encoding='utf-8') as f:
  35. for line in f:
  36. if line[0:1] != '#':
  37. self.qlists.put(line.strip())
  38. #默认请求头
  39. def _loadHeaders(self):
  40. self.headers = {
  41. 'Accept': '*/*',
  42. 'Referer': 'http://www.baidu.com',
  43. 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; ',
  44. 'Cache-Control': 'no-cache',
  45. }
  46. #代理服务器 ip port
  47. def _proxies(self):
  48. ip = "192.168.43.125"
  49. port = 225
  50. proxies = {'http':ip+':'+port,'https':ip+':'+port}
  51. #404 NOT FIND
  52. def _analysis404(self):
  53. notFoundPage = requests.get(self.scanSite + '/songgeshigedashuaibi/hello.html', allow_redirects=False)
  54. self.notFoundPageText = notFoundPage.text.replace('/songgeshigedashuaibi/hello.html', '')
  55. #输出结果
  56. def _writeOutput(self, result):
  57. self.lock.acquire()#线程取锁
  58. with open(self.scanOutput, 'a+') as f:
  59. f.write(result + '\n')
  60. self.lock.release()
  61. #扫描目标站点
  62. def _scan(self,url):
  63. html_result = 0
  64. try:
  65. html_result = requests.get(url, headers=self.headers, allow_redirects=False, timeout=60)
  66. except requests.exceptions.ConnectionError:
  67. # print 'Request Timeout:%s' % url
  68. pass
  69. finally:
  70. if html_result != 0:
  71. #访问网站状态码 是否是200 是就存在该页面
  72. if html_result.status_code == 200 and html_result.text != self.notFoundPageText:
  73. print('[%i]%s' % (html_result.status_code, html_result.url))
  74. self._writeOutput('[%i]%s' % (html_result.status_code, html_result.url))
  75. def run(self):
  76. while not self.qlists.empty() and self.STOP_ME == False:
  77. url = self.scanSite + self.qlists.get() # url + dir字典队列
  78. self._scan(url)
  79. if __name__ == '__main__':
  80. #命令解析器使用
  81. """
  82. 1.添加参数
  83. parser.add_argument('integers', metavar='N', type=int, nargs='+', help='an integer for the accumu
  84. 2.解析参数
  85. parser.parse_args(['--sum', '7', '-1', '42'])
  86. """
  87. #实例化命令解析器
  88. parser = argparse.ArgumentParser()
  89. parser.add_argument('scanSite', help="将要扫描的站点", type=str)
  90. parser.add_argument('-d', '--dict', dest="scanDict", help="扫描字典", type=str,
  91. default="dict.txt")
  92. parser.add_argument('-o', '--output', dest="scanOutput", help="扫描结果存为文件", type=str, default=0)
  93. parser.add_argument('-t', '--thread', dest="threadNum", help="输入线程数量", type=int,
  94. default=60)#默认是60
  95. args = parser.parse_args()
  96. # 实例化SCAN对象 参数
  97. """
  98. 1.
  99. """
  100. scan = Dirscan(args.scanSite, args.scanDict, args.scanOutput, args.threadNum)
  101. for i in range(args.threadNum):
  102. t = threading.Thread(target=scan.run)
  103. # 通过setDaemon(true)
  104. # 来设置线程为“守护线程”;将一个用户线程设置为守护线程的方式是在
  105. # 线程对象创建
  106. # 之前
  107. # 用线程对象的setDaemon方法。
  108. t.setDaemon(True)
  109. t.start()
  110. while True:
  111. #此方法返回活动线程的当前线程的线程组中的数量。
  112. if threading.activeCount() <= 1:
  113. break
  114. else:
  115. try:
  116. time.sleep(0.1)
  117. except KeyboardInterrupt as e:
  118. print('\n[警告]用户中止,等待所有从线程退出,当前 (%i)'% threading.activeCount())
  119. scan.STOP_ME = True
  120. print('扫描结束')