首先来看一下url:这个也是用来处理路径的,这个路径是网络路径
首先来查看一下内置的模块:
let url = require('url')console.log(url)
结果(node xxx.js)
{Url: [Function: Url],parse: [Function: urlParse],resolve: [Function: urlResolve],resolveObject: [Function: urlResolveObject],format: [Function: urlFormat],URL: [class URL],URLSearchParams: [class URLSearchParams],domainToASCII: [Function: domainToASCII],domainToUnicode: [Function: domainToUnicode],pathToFileURL: [Function: pathToFileURL],fileURLToPath: [Function: fileURLToPath],urlToHttpOptions: [Function: urlToHttpOptions]}
我们常常用到的就是parse
这个可以帮助我们快速的将一个目录地址解析成一个**url对象**
let url = require('url')//console.log(url)//https:协议//www.bilibili.com:域名//目录以及文件//?后:查询的数据let httpUrl = "https://www.bilibili.com"//都可以将他分开来//parse能够帮我们快速的解析出来(解析可以用正则表达式)let urlObj = url.parse(httpUrl)console.log(urlObj)//输出
结果:
Url {protocol: 'https:', //协议slashes: true,auth: null,host: 'www.bilibili.com', //域名,主机名port: null, //端口号,null:默认的端口号hostname: 'www.bilibili.com',hash: null, //#+数据search: null, //"/参数"query: null, //?后的参数信息pathname: '/',path: '/', //域名后的路径href: 'https://www.bilibili.com/'}
合成路径
**url.resolve()**方法解析相对基于URL的目标URL。第一个参数:基URL,第二个参数:目标URL
let url = require('url')let urlStr = url.resolve('https://www.bilibili.com','/home')console.log(urlStr)
这个API最好的地方在于:
let urlObj = url.parse(httpUrl)let xdurl = "./home"let newUrl = url.resolve(httpUrl,xdurl)console.log(newUrl)
学Vue一般做请求都用axios这个库来做ajax请求axios的好处就是前后端都能请求
你也可以用原生ajax去写请求,这样就是比较累
axios中文文档
这是第三方库,所以要安装
npm install axios
或者
cnpm install axios
获取axiosjs文件:
let axios = require('axios')console.log(axios)
结果:
<ref *1> [Function: wrap] {request: [Function: wrap],getUri: [Function: wrap],delete: [Function: wrap],get: [Function: wrap],head: [Function: wrap],options: [Function: wrap],post: [Function: wrap],put: [Function: wrap],patch: [Function: wrap],defaults: {transitional: {silentJSONParsing: true,forcedJSONParsing: true,clarifyTimeoutError: false},adapter: [Function: httpAdapter],transformRequest: [ [Function: transformRequest] ],transformResponse: [ [Function: transformResponse] ],timeout: 0,xsrfCookieName: 'XSRF-TOKEN',xsrfHeaderName: 'X-XSRF-TOKEN',maxContentLength: -1,maxBodyLength: -1,validateStatus: [Function: validateStatus],headers: {common: [Object],delete: {},get: {},head: {},post: [Object],put: [Object],patch: [Object]}},interceptors: {request: InterceptorManager { handlers: [] },response: InterceptorManager { handlers: [] }},create: [Function: create],Axios: [Function: Axios],Cancel: [Function: Cancel],CancelToken: [Function: CancelToken] { source: [Function: source] },isCancel: [Function: isCancel],VERSION: '0.24.0',all: [Function: all],spread: [Function: spread],isAxiosError: [Function: isAxiosError],default: [Circular *1]}
可以get,post,put,delete
去请求bilibili首页
let bilibiliMasterUrl = 'https://www.bilibili.com'axios.get(bilibiliMasterUrl).then(function(res){console.log(res)})
如果拒绝,有可能是不信任你,觉得你不是一个浏览器,你可以模仿,让自己成为浏览器:用header
let bilibiliMasterUrl = 'https://www.bilibili.com'axios.get(bilibiliMasterUrl,{header:{'X-Requested-With':'XMLHttpRequest'}}).then(function(res){console.log(res)})
还有一种:
let bilibiliMasterUrl = 'https://www.bilibili.com'axios.get(bilibiliMasterUrl,{header:{'X-Requested-With':'XMLHttpRequest'},"upgrade-insecure-requests": 1,"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36 Edg/97.0.1072.55"}).then(function(res){console.log(res)})
upgrade-insecure-requests:升级成httpsuser-agent:用户代理,告知我是一个浏览器
还是请求失败的话:
可以使用另一个第三方库:request
爬取数据
获取起始地址
let bilibiliMasterUrl = 'https://www.bilibili.com'
获取起始页面的所有分类地址(用正则写)
let request = require('request')function req(url){return new Promise(function(resolve,reject){request.get(url,function(err,response,body){if(err){reject(err)}else{//有些时候它会返回给你一些cookie//比如开通会员会给你一些凭证,这些凭证都在response里面resolve({response,body})}})})}
这样我们就可以对他进行一个请求了:
let request = require('request')let httpbilibili = "https://www.bilibili.com"//获取起始页面的所有分类地址async function getClassUrl(){let {response,body} = await req(httpbilibili)console.log(body)//解析html内容}function req(url){return new Promise(function(resolve,reject){request.get(url,function(err,response,body){if(err){reject(err)}else{//有些时候它会返回给你一些cookie//比如开通会员会给你一些凭证,这些凭证都在response里面resolve({response,body})}})})}getClassUrl()//调用
有数据就OK
然后我们分析导航栏源代码:
如果我们找到了,那我们怎么去拿?
用正则(自己不太熟不要仿照)
let request = require('request')let httpbilibili = "https://www.bilibili.com/"function req(url){return new Promise(function(resolve,reject){request.get(url,function(err,response,body){if(err){reject(err)}else{//有些时候它会返回给你一些cookie//比如开通会员会给你一些凭证,这些凭证都在response里面resolve({response,body})}})})}//获取起始页面的所有分类地址async function getClassUrl(){let {response,body} = await req(httpbilibili)console.log(body)//解析html内容//用正则//从<div class="channel-items">开始,<div>结束,(.*?)里面的全部内容;igs就可以匹配换行符let reg = /<div class="channel-items">(.*?)<\/div>/igslet result = reg.exec(body)[1]let reg1 = /<a class="channel-link" href=(.*?)>(.*?)<\/a>/igs //</a>打\:转义let res = rsg1.exec(result)//创建对象let obj = {className: res[2],url: res[1]}console.log(obj)}getClassUrl()
