一、目录扫描原理
1.读取字典URL
2.HTTP GET请求url
3.判断状态码 输出保存在目录
二、字典读取函数
with open(xxxx, ‘a+’) as f:
for line in f.readlines
三、代码实现
import sys
import requests
import threading
import queue
import time
import argparse
class Dirscan(object):
"""
目录扫描器 类
"""
def __init__(self, scanSite, scanDict, scanOutput, threadNum):
"""
:param scanSite: 初始扫描站点
:param scanDict: 初始化扫描字典
:param scanOutput: 扫描输出结果
:param threadNum: 线程数
"""
self.scanSite = scanSite if scanSite.find('://')!=1 else 'http://%s' % scanSite
print('扫描目标:',self.scanSite)
self.scanDict = scanDict
self.scanOutput = scanSite.rstrip('/').replace('https://', '').replace('http://', '')+'.txt' if scanOutput == 0 else scanOutput
truncate = open(self.scanOutput,'w')
truncate.close()
self.threadNum = threadNum
self.lock = threading.Lock()#线程锁
# 类的功能 加载请求头,加载字典 初始化的
self._loadHeaders()
self._loadDict(self.scanDict)
self._analysis404()
self.STOP_ME = False
def _loadDict(self,dict_list):
#加入队列
self.qlists = queue.Queue()
with open(dict_list,encoding='utf-8') as f:
for line in f:
if line[0:1] != '#':
self.qlists.put(line.strip())
#默认请求头
def _loadHeaders(self):
self.headers = {
'Accept': '*/*',
'Referer': 'http://www.baidu.com',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; ',
'Cache-Control': 'no-cache',
}
#代理服务器 ip port
def _proxies(self):
ip = "192.168.43.125"
port = 225
proxies = {'http':ip+':'+port,'https':ip+':'+port}
#404 NOT FIND
def _analysis404(self):
notFoundPage = requests.get(self.scanSite + '/songgeshigedashuaibi/hello.html', allow_redirects=False)
self.notFoundPageText = notFoundPage.text.replace('/songgeshigedashuaibi/hello.html', '')
#输出结果
def _writeOutput(self, result):
self.lock.acquire()#线程取锁
with open(self.scanOutput, 'a+') as f:
f.write(result + '\n')
self.lock.release()
#扫描目标站点
def _scan(self,url):
html_result = 0
try:
html_result = requests.get(url, headers=self.headers, allow_redirects=False, timeout=60)
except requests.exceptions.ConnectionError:
# print 'Request Timeout:%s' % url
pass
finally:
if html_result != 0:
#访问网站状态码 是否是200 是就存在该页面
if html_result.status_code == 200 and html_result.text != self.notFoundPageText:
print('[%i]%s' % (html_result.status_code, html_result.url))
self._writeOutput('[%i]%s' % (html_result.status_code, html_result.url))
def run(self):
while not self.qlists.empty() and self.STOP_ME == False:
url = self.scanSite + self.qlists.get() # url + dir字典队列
self._scan(url)
if __name__ == '__main__':
#命令解析器使用
"""
1.添加参数
parser.add_argument('integers', metavar='N', type=int, nargs='+', help='an integer for the accumu
2.解析参数
parser.parse_args(['--sum', '7', '-1', '42'])
"""
#实例化命令解析器
parser = argparse.ArgumentParser()
parser.add_argument('scanSite', help="将要扫描的站点", type=str)
parser.add_argument('-d', '--dict', dest="scanDict", help="扫描字典", type=str,
default="dict.txt")
parser.add_argument('-o', '--output', dest="scanOutput", help="扫描结果存为文件", type=str, default=0)
parser.add_argument('-t', '--thread', dest="threadNum", help="输入线程数量", type=int,
default=60)#默认是60
args = parser.parse_args()
# 实例化SCAN对象 参数
"""
1.
"""
scan = Dirscan(args.scanSite, args.scanDict, args.scanOutput, args.threadNum)
for i in range(args.threadNum):
t = threading.Thread(target=scan.run)
# 通过setDaemon(true)
# 来设置线程为“守护线程”;将一个用户线程设置为守护线程的方式是在
# 线程对象创建
# 之前
# 用线程对象的setDaemon方法。
t.setDaemon(True)
t.start()
while True:
#此方法返回活动线程的当前线程的线程组中的数量。
if threading.activeCount() <= 1:
break
else:
try:
time.sleep(0.1)
except KeyboardInterrupt as e:
print('\n[警告]用户中止,等待所有从线程退出,当前 (%i)'% threading.activeCount())
scan.STOP_ME = True
print('扫描结束')