目录扫描 - Python目录扫描工具 - 《网络安全》

一、目录扫描原理
二、字典读取函数
三、代码实现
一、目录扫描原理

1.读取字典URL
2.HTTP GET请求url
3.判断状态码输出保存在目录
二、字典读取函数

with open(xxxx, ‘a+’) as f:
for line in f.readlines
三、代码实现

import sys
import requests
import threading
import queue
import time
import argparse
class Dirscan(object):
    """
        目录扫描器 类
    """
    def __init__(self, scanSite, scanDict, scanOutput, threadNum):
        """
        :param scanSite: 初始扫描站点
        :param scanDict:  初始化扫描字典
        :param scanOutput:  扫描输出结果
        :param threadNum: 线程数
        """
        self.scanSite = scanSite if scanSite.find('://')!=1 else 'http://%s' % scanSite
        print('扫描目标：',self.scanSite)
        self.scanDict = scanDict
        self.scanOutput = scanSite.rstrip('/').replace('https://', '').replace('http://', '')+'.txt' if scanOutput == 0 else scanOutput
        truncate = open(self.scanOutput,'w')
        truncate.close()
        self.threadNum = threadNum
        self.lock = threading.Lock()#线程锁
        # 类的功能 加载请求头，加载字典 初始化的
        self._loadHeaders()
        self._loadDict(self.scanDict)
        self._analysis404()
        self.STOP_ME = False
    def _loadDict(self,dict_list):
        #加入队列
        self.qlists = queue.Queue()
        with open(dict_list,encoding='utf-8') as f:
            for line in f:
                if line[0:1] != '#':
                    self.qlists.put(line.strip())
    #默认请求头
    def _loadHeaders(self):
        self.headers = {
            'Accept': '*/*',
            'Referer': 'http://www.baidu.com',
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; ',
            'Cache-Control': 'no-cache',
        }
    #代理服务器 ip port
    def _proxies(self):
        ip = "192.168.43.125"
        port = 225
        proxies = {'http':ip+':'+port,'https':ip+':'+port}
    #404 NOT FIND
    def _analysis404(self):
        notFoundPage = requests.get(self.scanSite + '/songgeshigedashuaibi/hello.html', allow_redirects=False)
        self.notFoundPageText = notFoundPage.text.replace('/songgeshigedashuaibi/hello.html', '')
    #输出结果
    def _writeOutput(self, result):
        self.lock.acquire()#线程取锁
        with open(self.scanOutput, 'a+') as f:
            f.write(result + '\n')
        self.lock.release()
    #扫描目标站点
    def _scan(self,url):
        html_result = 0
        try:
            html_result = requests.get(url, headers=self.headers, allow_redirects=False, timeout=60)
        except requests.exceptions.ConnectionError:
            # print 'Request Timeout:%s' % url
            pass
        finally:
            if html_result != 0:
                #访问网站状态码 是否是200 是就存在该页面
                if html_result.status_code == 200 and html_result.text != self.notFoundPageText:
                    print('[%i]%s' % (html_result.status_code, html_result.url))
                    self._writeOutput('[%i]%s' % (html_result.status_code, html_result.url))
    def run(self):
        while not self.qlists.empty() and self.STOP_ME == False:
            url = self.scanSite + self.qlists.get() # url + dir字典队列
            self._scan(url)
if __name__ == '__main__':
    #命令解析器使用
    """
    1.添加参数
    parser.add_argument('integers', metavar='N', type=int, nargs='+', help='an integer for the accumu
    2.解析参数
    parser.parse_args(['--sum', '7', '-1', '42'])
    """
    #实例化命令解析器
    parser = argparse.ArgumentParser()
    parser.add_argument('scanSite', help="将要扫描的站点", type=str)
    parser.add_argument('-d', '--dict', dest="scanDict", help="扫描字典", type=str,
                        default="dict.txt")
    parser.add_argument('-o', '--output', dest="scanOutput", help="扫描结果存为文件", type=str, default=0)
    parser.add_argument('-t', '--thread', dest="threadNum", help="输入线程数量", type=int,
                        default=60)#默认是60
    args = parser.parse_args()
    # 实例化SCAN对象 参数
    """
        1.
    """
    scan = Dirscan(args.scanSite, args.scanDict, args.scanOutput, args.threadNum)
    for i in range(args.threadNum):
        t = threading.Thread(target=scan.run)
        # 通过setDaemon(true)
        # 来设置线程为“守护线程”；将一个用户线程设置为守护线程的方式是在
        # 线程对象创建
        # 之前
        # 用线程对象的setDaemon方法。
        t.setDaemon(True)
        t.start()
    while True:
        #此方法返回活动线程的当前线程的线程组中的数量。
        if threading.activeCount() <= 1:
            break
        else:
            try:
                time.sleep(0.1)
            except KeyboardInterrupt as e:
                print('\n[警告]用户中止，等待所有从线程退出，当前 (%i)'% threading.activeCount())
                scan.STOP_ME = True
    print('扫描结束')