一、目录扫描原理
1.读取字典URL
2.HTTP GET请求url
3.判断状态码 输出保存在目录
二、字典读取函数
with open(xxxx, ‘a+’) as f:
for line in f.readlines
三、代码实现
import sysimport requestsimport threadingimport queueimport timeimport argparseclass Dirscan(object):"""目录扫描器 类"""def __init__(self, scanSite, scanDict, scanOutput, threadNum):""":param scanSite: 初始扫描站点:param scanDict: 初始化扫描字典:param scanOutput: 扫描输出结果:param threadNum: 线程数"""self.scanSite = scanSite if scanSite.find('://')!=1 else 'http://%s' % scanSiteprint('扫描目标:',self.scanSite)self.scanDict = scanDictself.scanOutput = scanSite.rstrip('/').replace('https://', '').replace('http://', '')+'.txt' if scanOutput == 0 else scanOutputtruncate = open(self.scanOutput,'w')truncate.close()self.threadNum = threadNumself.lock = threading.Lock()#线程锁# 类的功能 加载请求头,加载字典 初始化的self._loadHeaders()self._loadDict(self.scanDict)self._analysis404()self.STOP_ME = Falsedef _loadDict(self,dict_list):#加入队列self.qlists = queue.Queue()with open(dict_list,encoding='utf-8') as f:for line in f:if line[0:1] != '#':self.qlists.put(line.strip())#默认请求头def _loadHeaders(self):self.headers = {'Accept': '*/*','Referer': 'http://www.baidu.com','User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; ','Cache-Control': 'no-cache',}#代理服务器 ip portdef _proxies(self):ip = "192.168.43.125"port = 225proxies = {'http':ip+':'+port,'https':ip+':'+port}#404 NOT FINDdef _analysis404(self):notFoundPage = requests.get(self.scanSite + '/songgeshigedashuaibi/hello.html', allow_redirects=False)self.notFoundPageText = notFoundPage.text.replace('/songgeshigedashuaibi/hello.html', '')#输出结果def _writeOutput(self, result):self.lock.acquire()#线程取锁with open(self.scanOutput, 'a+') as f:f.write(result + '\n')self.lock.release()#扫描目标站点def _scan(self,url):html_result = 0try:html_result = requests.get(url, headers=self.headers, allow_redirects=False, timeout=60)except requests.exceptions.ConnectionError:# print 'Request Timeout:%s' % urlpassfinally:if html_result != 0:#访问网站状态码 是否是200 是就存在该页面if html_result.status_code == 200 and html_result.text != self.notFoundPageText:print('[%i]%s' % (html_result.status_code, html_result.url))self._writeOutput('[%i]%s' % (html_result.status_code, html_result.url))def run(self):while not self.qlists.empty() and self.STOP_ME == False:url = self.scanSite + self.qlists.get() # url + dir字典队列self._scan(url)if __name__ == '__main__':#命令解析器使用"""1.添加参数parser.add_argument('integers', metavar='N', type=int, nargs='+', help='an integer for the accumu2.解析参数parser.parse_args(['--sum', '7', '-1', '42'])"""#实例化命令解析器parser = argparse.ArgumentParser()parser.add_argument('scanSite', help="将要扫描的站点", type=str)parser.add_argument('-d', '--dict', dest="scanDict", help="扫描字典", type=str,default="dict.txt")parser.add_argument('-o', '--output', dest="scanOutput", help="扫描结果存为文件", type=str, default=0)parser.add_argument('-t', '--thread', dest="threadNum", help="输入线程数量", type=int,default=60)#默认是60args = parser.parse_args()# 实例化SCAN对象 参数"""1."""scan = Dirscan(args.scanSite, args.scanDict, args.scanOutput, args.threadNum)for i in range(args.threadNum):t = threading.Thread(target=scan.run)# 通过setDaemon(true)# 来设置线程为“守护线程”;将一个用户线程设置为守护线程的方式是在# 线程对象创建# 之前# 用线程对象的setDaemon方法。t.setDaemon(True)t.start()while True:#此方法返回活动线程的当前线程的线程组中的数量。if threading.activeCount() <= 1:breakelse:try:time.sleep(0.1)except KeyboardInterrupt as e:print('\n[警告]用户中止,等待所有从线程退出,当前 (%i)'% threading.activeCount())scan.STOP_ME = Trueprint('扫描结束')
