首先来看一下url:这个也是用来处理路径的,这个路径是网络路径
首先来查看一下内置的模块:

  1. let url = require('url')
  2. console.log(url)

结果(node xxx.js)

  1. {
  2. Url: [Function: Url],
  3. parse: [Function: urlParse],
  4. resolve: [Function: urlResolve],
  5. resolveObject: [Function: urlResolveObject],
  6. format: [Function: urlFormat],
  7. URL: [class URL],
  8. URLSearchParams: [class URLSearchParams],
  9. domainToASCII: [Function: domainToASCII],
  10. domainToUnicode: [Function: domainToUnicode],
  11. pathToFileURL: [Function: pathToFileURL],
  12. fileURLToPath: [Function: fileURLToPath],
  13. urlToHttpOptions: [Function: urlToHttpOptions]
  14. }

我们常常用到的就是parse

这个可以帮助我们快速的将一个目录地址解析成一个**url对象**

  1. let url = require('url')
  2. //console.log(url)
  3. //https:协议
  4. //www.bilibili.com:域名
  5. //目录以及文件
  6. //?后:查询的数据
  7. let httpUrl = "https://www.bilibili.com"
  8. //都可以将他分开来
  9. //parse能够帮我们快速的解析出来(解析可以用正则表达式)
  10. let urlObj = url.parse(httpUrl)
  11. console.log(urlObj)//输出

结果:

  1. Url {
  2. protocol: 'https:', //协议
  3. slashes: true,
  4. auth: null,
  5. host: 'www.bilibili.com', //域名,主机名
  6. port: null, //端口号,null:默认的端口号
  7. hostname: 'www.bilibili.com',
  8. hash: null, //#+数据
  9. search: null, //"/参数"
  10. query: null, //?后的参数信息
  11. pathname: '/',
  12. path: '/', //域名后的路径
  13. href: 'https://www.bilibili.com/'
  14. }

合成路径

**url.resolve()**方法解析相对基于URL的目标URL。第一个参数:基URL,第二个参数:目标URL

  1. let url = require('url')
  2. let urlStr = url.resolve('https://www.bilibili.com','/home')
  3. console.log(urlStr)

这个API最好的地方在于:

  1. let urlObj = url.parse(httpUrl)
  2. let xdurl = "./home"
  3. let newUrl = url.resolve(httpUrl,xdurl)
  4. console.log(newUrl)

学Vue一般做请求都用axios这个库来做ajax请求
axios的好处就是前后端都能请求
你也可以用原生ajax去写请求,这样就是比较累

axios中文文档

这是第三方库,所以要安装

  1. npm install axios

或者

  1. cnpm install axios

获取axiosjs文件:

  1. let axios = require('axios')
  2. console.log(axios)

结果:

  1. <ref *1> [Function: wrap] {
  2. request: [Function: wrap],
  3. getUri: [Function: wrap],
  4. delete: [Function: wrap],
  5. get: [Function: wrap],
  6. head: [Function: wrap],
  7. options: [Function: wrap],
  8. post: [Function: wrap],
  9. put: [Function: wrap],
  10. patch: [Function: wrap],
  11. defaults: {
  12. transitional: {
  13. silentJSONParsing: true,
  14. forcedJSONParsing: true,
  15. clarifyTimeoutError: false
  16. },
  17. adapter: [Function: httpAdapter],
  18. transformRequest: [ [Function: transformRequest] ],
  19. transformResponse: [ [Function: transformResponse] ],
  20. timeout: 0,
  21. xsrfCookieName: 'XSRF-TOKEN',
  22. xsrfHeaderName: 'X-XSRF-TOKEN',
  23. maxContentLength: -1,
  24. maxBodyLength: -1,
  25. validateStatus: [Function: validateStatus],
  26. headers: {
  27. common: [Object],
  28. delete: {},
  29. get: {},
  30. head: {},
  31. post: [Object],
  32. put: [Object],
  33. patch: [Object]
  34. }
  35. },
  36. interceptors: {
  37. request: InterceptorManager { handlers: [] },
  38. response: InterceptorManager { handlers: [] }
  39. },
  40. create: [Function: create],
  41. Axios: [Function: Axios],
  42. Cancel: [Function: Cancel],
  43. CancelToken: [Function: CancelToken] { source: [Function: source] },
  44. isCancel: [Function: isCancel],
  45. VERSION: '0.24.0',
  46. all: [Function: all],
  47. spread: [Function: spread],
  48. isAxiosError: [Function: isAxiosError],
  49. default: [Circular *1]
  50. }

可以get,post,put,delete

去请求bilibili首页

  1. let bilibiliMasterUrl = 'https://www.bilibili.com'
  2. axios.get(bilibiliMasterUrl).then(function(res){
  3. console.log(res)
  4. })

如果拒绝,有可能是不信任你,觉得你不是一个浏览器,你可以模仿,让自己成为浏览器:用header

  1. let bilibiliMasterUrl = 'https://www.bilibili.com'
  2. axios.get(bilibiliMasterUrl,{header:{'X-Requested-With':'XMLHttpRequest'}}).then(function(res){
  3. console.log(res)
  4. })

还有一种:

  1. let bilibiliMasterUrl = 'https://www.bilibili.com'
  2. axios.get(bilibiliMasterUrl,{header:{'X-Requested-With':'XMLHttpRequest'},
  3. "upgrade-insecure-requests": 1,
  4. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36 Edg/97.0.1072.55"}).then(function(res){
  5. console.log(res)
  6. })
  1. upgrade-insecure-requests:升级成https
  2. user-agent:用户代理,告知我是一个浏览器

还是请求失败的话:
可以使用另一个第三方库:request

爬取数据

获取起始地址
  1. let bilibiliMasterUrl = 'https://www.bilibili.com'

获取起始页面的所有分类地址(用正则写)
  1. let request = require('request')
  2. function req(url){
  3. return new Promise(function(resolve,reject){
  4. request.get(url,function(err,response,body){
  5. if(err){
  6. reject(err)
  7. }else{
  8. //有些时候它会返回给你一些cookie
  9. //比如开通会员会给你一些凭证,这些凭证都在response里面
  10. resolve({response,body})
  11. }
  12. })
  13. })
  14. }

这样我们就可以对他进行一个请求了:

  1. let request = require('request')
  2. let httpbilibili = "https://www.bilibili.com"
  3. //获取起始页面的所有分类地址
  4. async function getClassUrl(){
  5. let {response,body} = await req(httpbilibili)
  6. console.log(body)
  7. //解析html内容
  8. }
  9. function req(url){
  10. return new Promise(function(resolve,reject){
  11. request.get(url,function(err,response,body){
  12. if(err){
  13. reject(err)
  14. }else{
  15. //有些时候它会返回给你一些cookie
  16. //比如开通会员会给你一些凭证,这些凭证都在response里面
  17. resolve({response,body})
  18. }
  19. })
  20. })
  21. }
  22. getClassUrl()//调用

有数据就OK
然后我们分析导航栏源代码:

如果我们找到了,那我们怎么去拿?

用正则(自己不太熟不要仿照)

  1. let request = require('request')
  2. let httpbilibili = "https://www.bilibili.com/"
  3. function req(url){
  4. return new Promise(function(resolve,reject){
  5. request.get(url,function(err,response,body){
  6. if(err){
  7. reject(err)
  8. }else{
  9. //有些时候它会返回给你一些cookie
  10. //比如开通会员会给你一些凭证,这些凭证都在response里面
  11. resolve({response,body})
  12. }
  13. })
  14. })
  15. }
  16. //获取起始页面的所有分类地址
  17. async function getClassUrl(){
  18. let {response,body} = await req(httpbilibili)
  19. console.log(body)
  20. //解析html内容
  21. //用正则
  22. //从<div class="channel-items">开始,<div>结束,(.*?)里面的全部内容;igs就可以匹配换行符
  23. let reg = /<div class="channel-items">(.*?)<\/div>/igs
  24. let result = reg.exec(body)[1]
  25. let reg1 = /<a class="channel-link" href=(.*?)>(.*?)<\/a>/igs //</a>打\:转义
  26. let res = rsg1.exec(result)
  27. //创建对象
  28. let obj = {
  29. className: res[2],
  30. url: res[1]
  31. }
  32. console.log(obj)
  33. }
  34. getClassUrl()