首先来看一下url
:这个也是用来处理路径的,这个路径是网络路径
首先来查看一下内置的模块:
let url = require('url')
console.log(url)
结果(node xxx.js)
{
Url: [Function: Url],
parse: [Function: urlParse],
resolve: [Function: urlResolve],
resolveObject: [Function: urlResolveObject],
format: [Function: urlFormat],
URL: [class URL],
URLSearchParams: [class URLSearchParams],
domainToASCII: [Function: domainToASCII],
domainToUnicode: [Function: domainToUnicode],
pathToFileURL: [Function: pathToFileURL],
fileURLToPath: [Function: fileURLToPath],
urlToHttpOptions: [Function: urlToHttpOptions]
}
我们常常用到的就是parse
这个可以帮助我们快速的将一个目录地址解析成一个**url对象**
let url = require('url')
//console.log(url)
//https:协议
//www.bilibili.com:域名
//目录以及文件
//?后:查询的数据
let httpUrl = "https://www.bilibili.com"
//都可以将他分开来
//parse能够帮我们快速的解析出来(解析可以用正则表达式)
let urlObj = url.parse(httpUrl)
console.log(urlObj)//输出
结果:
Url {
protocol: 'https:', //协议
slashes: true,
auth: null,
host: 'www.bilibili.com', //域名,主机名
port: null, //端口号,null:默认的端口号
hostname: 'www.bilibili.com',
hash: null, //#+数据
search: null, //"/参数"
query: null, //?后的参数信息
pathname: '/',
path: '/', //域名后的路径
href: 'https://www.bilibili.com/'
}
合成路径
**url.resolve()**
方法解析相对基于URL的目标URL。第一个参数:基URL,第二个参数:目标URL
let url = require('url')
let urlStr = url.resolve('https://www.bilibili.com','/home')
console.log(urlStr)
这个API最好的地方在于:
let urlObj = url.parse(httpUrl)
let xdurl = "./home"
let newUrl = url.resolve(httpUrl,xdurl)
console.log(newUrl)
学Vue一般做请求都用axios
这个库来做ajax
请求axios
的好处就是前后端都能请求
你也可以用原生ajax
去写请求,这样就是比较累
axios中文文档
这是第三方库,所以要安装
npm install axios
或者
cnpm install axios
获取axios
js文件:
let axios = require('axios')
console.log(axios)
结果:
<ref *1> [Function: wrap] {
request: [Function: wrap],
getUri: [Function: wrap],
delete: [Function: wrap],
get: [Function: wrap],
head: [Function: wrap],
options: [Function: wrap],
post: [Function: wrap],
put: [Function: wrap],
patch: [Function: wrap],
defaults: {
transitional: {
silentJSONParsing: true,
forcedJSONParsing: true,
clarifyTimeoutError: false
},
adapter: [Function: httpAdapter],
transformRequest: [ [Function: transformRequest] ],
transformResponse: [ [Function: transformResponse] ],
timeout: 0,
xsrfCookieName: 'XSRF-TOKEN',
xsrfHeaderName: 'X-XSRF-TOKEN',
maxContentLength: -1,
maxBodyLength: -1,
validateStatus: [Function: validateStatus],
headers: {
common: [Object],
delete: {},
get: {},
head: {},
post: [Object],
put: [Object],
patch: [Object]
}
},
interceptors: {
request: InterceptorManager { handlers: [] },
response: InterceptorManager { handlers: [] }
},
create: [Function: create],
Axios: [Function: Axios],
Cancel: [Function: Cancel],
CancelToken: [Function: CancelToken] { source: [Function: source] },
isCancel: [Function: isCancel],
VERSION: '0.24.0',
all: [Function: all],
spread: [Function: spread],
isAxiosError: [Function: isAxiosError],
default: [Circular *1]
}
可以get
,post
,put
,delete
去请求bilibili首页
let bilibiliMasterUrl = 'https://www.bilibili.com'
axios.get(bilibiliMasterUrl).then(function(res){
console.log(res)
})
如果拒绝,有可能是不信任你,觉得你不是一个浏览器,你可以模仿,让自己成为浏览器:用header
let bilibiliMasterUrl = 'https://www.bilibili.com'
axios.get(bilibiliMasterUrl,{header:{'X-Requested-With':'XMLHttpRequest'}}).then(function(res){
console.log(res)
})
还有一种:
let bilibiliMasterUrl = 'https://www.bilibili.com'
axios.get(bilibiliMasterUrl,{header:{'X-Requested-With':'XMLHttpRequest'},
"upgrade-insecure-requests": 1,
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36 Edg/97.0.1072.55"}).then(function(res){
console.log(res)
})
upgrade-insecure-requests:升级成https
user-agent:用户代理,告知我是一个浏览器
还是请求失败的话:
可以使用另一个第三方库:request
爬取数据
获取起始地址
let bilibiliMasterUrl = 'https://www.bilibili.com'
获取起始页面的所有分类地址(用正则写)
let request = require('request')
function req(url){
return new Promise(function(resolve,reject){
request.get(url,function(err,response,body){
if(err){
reject(err)
}else{
//有些时候它会返回给你一些cookie
//比如开通会员会给你一些凭证,这些凭证都在response里面
resolve({response,body})
}
})
})
}
这样我们就可以对他进行一个请求了:
let request = require('request')
let httpbilibili = "https://www.bilibili.com"
//获取起始页面的所有分类地址
async function getClassUrl(){
let {response,body} = await req(httpbilibili)
console.log(body)
//解析html内容
}
function req(url){
return new Promise(function(resolve,reject){
request.get(url,function(err,response,body){
if(err){
reject(err)
}else{
//有些时候它会返回给你一些cookie
//比如开通会员会给你一些凭证,这些凭证都在response里面
resolve({response,body})
}
})
})
}
getClassUrl()//调用
有数据就OK
然后我们分析导航栏源代码:
如果我们找到了,那我们怎么去拿?
用正则(自己不太熟不要仿照)
let request = require('request')
let httpbilibili = "https://www.bilibili.com/"
function req(url){
return new Promise(function(resolve,reject){
request.get(url,function(err,response,body){
if(err){
reject(err)
}else{
//有些时候它会返回给你一些cookie
//比如开通会员会给你一些凭证,这些凭证都在response里面
resolve({response,body})
}
})
})
}
//获取起始页面的所有分类地址
async function getClassUrl(){
let {response,body} = await req(httpbilibili)
console.log(body)
//解析html内容
//用正则
//从<div class="channel-items">开始,<div>结束,(.*?)里面的全部内容;igs就可以匹配换行符
let reg = /<div class="channel-items">(.*?)<\/div>/igs
let result = reg.exec(body)[1]
let reg1 = /<a class="channel-link" href=(.*?)>(.*?)<\/a>/igs //</a>打\:转义
let res = rsg1.exec(result)
//创建对象
let obj = {
className: res[2],
url: res[1]
}
console.log(obj)
}
getClassUrl()