#!/usr/bin/python
# -*- coding: UTF-8 -*-
# @date: 2020/2/27 16:31
# @name: Fofa_Spider
# @author:Mke2fs
from tld import get_tld
import requests,re,time,base64,urllib
import requests
import random
import re
import time
from threading import Thread
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
Host = 'https://fofa.so/'
#cookies每次都要新加进去,修改cookie就能跑
data= {\
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding': 'gzip, deflate',
'_fofapro_ars_session': '3a96d0e7e6caf3b25d06723f682ef807',
'Connection': 'close',
'Upgrade-Insecure-Requests': '1',
'If-None-Match':'W/"fc89a1c5bc61e3b8e515db61cef74ac0',
'Cache-Control': 'max-age=0'
}
zhanzhang_headers = {
'Host': 'rank.chinaz.com',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cookie': 'BDTUJIAID=febc82b216a29e116730505bc1e471a9; inputbox_urls=%5b%22passivcashincome.com%22%2c%22feifeizuida.com%22%5d; UM_distinctid=16e63892b4e3b1-031b6053dcfc9f-7711b3e-100200-16e63892b4fa8a; Hm_lvt_aecc9715b0f5d5f7f34fba48a3c511d6=1579746706; CNZZDATA433095=cnzz_eid%3D297046501-1578041490-null%26ntime%3D1583974744; CNZZDATA5082706=cnzz_eid%3D902178444-1578044637-null%26ntime%3D1583975389; qHistory=aHR0cDovL3Rvb2wuY2hpbmF6LmNvbV/nq5nplb/lt6Xlhbd8aHR0cDovL3JhbmsuY2hpbmF6LmNvbV/nmb7luqbmnYPph43mn6Xor6J8aHR0cDovL3Rvb2wuY2hpbmF6LmNvbS90b29scy9lc2NhcGUuYXNweF9Fc2NhcGXliqDlr4Yv6Kej5a+GfGh0dHA6Ly93aG9pcy5jaGluYXouY29tL3JldmVyc2UrV2hvaXPlj43mn6V8aHR0cDovL3dob2lzLmNoaW5hei5jb20vK1dob2lz5p+l6K+i',
'Connection': 'close'
}
cookies = {'_fofapro_ars_session': '3a96d0e7e6caf3b25d06723f682ef807'}
def getdata(Host):
html = requests.get(Host, headers=data, cookies=cookies).content
# print(requests.get(Host,data=data).cookies)
#print(html.decode("utf-8"))
IP=re.findall \
('<a target="_blank" href="(.*)">', html.decode('utf-8'))
#print(IP)
aa=[]
for ii in IP:
pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') # 匹配模式只匹配web服务
#pattern = r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b" # 匹配IP
if re.findall(pattern, ii) !=[]:
reallist=re.findall(pattern, ii)
reallist=sorted(set(reallist), key=reallist.index) #去除列表重复项
reallist="".join(reallist) #列表转字符,二层嵌套转换为单层列表
aa.append(reallist)
return aa
def init(search_content,pages):
Host = 'https://fofa.so/'
for page in range(3,pages+1): #页数控制
quary = 'result?qbase64=' + str(base64.b64encode(search_content.encode("utf-8")), "utf-8") + '&page=' + str(page)
Hosts = Host + quary
print(Hosts,'剩余查询次数'+str(500-page))
getlist=getdata(str(Hosts))
time.sleep(random.randint(1,3))
getlist=" ".join(getlist)
getlist=getlist.replace('http://beian.miit.gov.cn','')
#print(getlist)
print(list(getlist.split(' ')))
###暂时关闭权重查询模块
#IRank(list(getlist.split(' ')))
write2file(list(getlist.split(' ')))
def write2file(sites):
for ii in sites:
with open('Coremail-2020.txt', 'a', encoding='utf-8') as l:
l.write(ii + '\n')
def IRank(sub):
print('[+] 正在后台打开谷歌浏览器...')
chrome_option = Options()
chrome_option.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 提升速度
chrome_option.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
chrome_option.add_experimental_option('excludeSwitches', ['enable-logging'])#关闭控制台日志,看着太乱
driver=webdriver.Chrome(options=chrome_option)
driver.set_page_load_timeout(5000)
print('[+] 正在查询中,请稍等 ~')
num=0
for line in sub:
try:
quanzhong=line.strip('\n')
site=quanzhong.strip('https://')
driver.get('https://www.aizhan.com/seo/{domain}'.format(domain=site))
baidurank_pattern = re.compile(r'<img src="//statics.aizhan.com/images/br/(.*?).png')
try:
html_text = driver.page_source.encode('utf-8')
baidurank = re.findall(baidurank_pattern,html_text.decode('utf-8'))[0]
except:
time.sleep(random.randint(1,3))
html_text = driver.page_source.encode('utf-8')
baidurank = re.findall(baidurank_pattern,html_text.decode('utf-8'))[0]
num=num+1
print("[+] 正在查询第"+str(num)+"条"+" 百度权重:"+str(baidurank)+" url: "+site)
if int(baidurank) > 0:
with open('iRank_Thinkcmf.txt','a',encoding='utf-8') as l:
l.write(site+'\n')
except Exception as e:
pass
driver.close()
if __name__ == "__main__":
rule='app="Coremail-邮件系统" && host=".com"' #输入查询参数跑到第700页
p2=999
init(rule, p2)
"""
规则库:
Struts2:
app="struts2" && country="CN" && host=".com"
app="ThinkPHP" && region="Shanghai" && host=".com"
app="thinkcmf" && region="Zhejiang" && host=".com"
app="Coremail-邮件系统" && country="CN" && host=".com"
app="Coremail-邮件系统" && host=".com"
"""