第一步:开发配置(使用node.js,express框架)
1.新建目录antdComponent
2.进入跟文件夹antdComponent
3.初始化文件,执行npm init创建package.json文件
4.安装依赖 npm install add express, npm install add superagent, npm install add cheerio
注:express (使用express来搭建一个简单的Http服务器。当然,你也可以使用node中自带的http模块)superagent (superagent是node里一个非常方便的、轻量的、渐进式的第三方客户端请求代理模块,用他 来请求目标页面)cheerio (cheerio相当于node版的jQuery,用过jQuery的同学会非常容易上手。它主要是用来获取抓取到 的页面元素和其中的数据信息)
作者:MagicEyes
链接:https://juejin.im/post/5b4f007fe51d4519277b9707
来源:掘金
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
第二部:开始coding
1.在根目录下创建index.js文件
2.创建好index.js后,我们首先实例化一个express对象,用它来启动一个本地监听3000端口的Http服务。
const express = require('express');const app = express();// ...let server = app.listen(3000, function () {let host = server.address().address;let port = server.address().port;console.log('Your App is running at http://%s:%s', host, port);});
3.按照国际惯例,我们希望在访问本机地址http://localhost:3000的时候,这个服务能给我们犯规一个Hello World!在index.js中加入如下代码:
app.get('/', function (req, res) {res.send('Hello World!');});
此时,在DOS中项目根目录baiduNews下执行node index.js,让项目跑起来。之后,打开浏览器,访问http://localhost:3000,你就会发现页面上显示’Hellow World!’字样。
这样,在后面我们获取到信息后,就可以在访问http://localhost:3000时看到这些信息。
4.分析页面结构

找到所需要的信息所在节点
5.开始进行爬取,我这里封装了两个函数,分别爬取了如4图中的左边组件列表包含了每个组件的链接地址(每一个组件都有一个地址)及基本信息和右边的api信息,函数如下
1.分析结构,找到你想要的
2.函数封装(一个函数只做一件事)
1.获取组件列表
/**** @param {String} docUrl* @return {Array} [{title, name, category, url}]*/async function getComponentList(url){let componentList = []let list=await new Promise(resolve=>{superagent.get(url).end((err, res) => {if (err) {// 如果访问失败或者出错,会这行这里console.log(`组件链接列表请求 - ${err}`)} else {let $ = cheerio.load(res.text);$('.ant-menu .ant-menu-item-group .ant-menu-item').each((idx, ele) => {let component = {name:$(ele).find('span').eq(0).text(),title:$(ele).find('span').eq(1).text(),category:$(ele).parent('.ant-menu-item-group-list').prev('div').text(),url: 'https://ant.design'+$(ele).find('a').attr('href')}componentList.push(component)});resolve(componentList)}});});return list;};
2.获取单个组件apis
/*** 获取组件的API* @param {String} url* @return {Array} [{name, type, description, defaultValue}]*/async function getComponentApis(url){let getComponentApis=[]//let arry = []let apis = await new Promise(resolve=>{superagent.get(url).end((err, res) => {if (err) {// 如果访问失败或者出错,会这行这里console.log(`组件apis请求失败 - ${err}`)} else {let $ = cheerio.load(res.text);$('#react-content .api-container table').each((idx, ele) => {// cherrio中$('selector').each()用来遍历所有匹配到的DOM元素// 参数idx是当前遍历的元素的索引,ele就是当前便利的DOM元素if(idx === 0){getComponentApis = parseApis(ele)}});resolve(getComponentApis)}});})return apis}
3.获取单个组件的自组件
/*** 获取组件的子组件* @param {String} url* @return {Array} [{ title, name, category, url, apis }]*/async function getSubComponent(url, category){var allDOMlet componentNames = await new Promise(resolve=>{superagent.get(url).end((err, res) => {if (err) {// 如果访问失败或者出错,会这行这里console.log(`组件apis请求失败 - ${err}`)} else {allDOM = res.textlet $ = cheerio.load(res.text);let subNameList = []$('#react-content .api-container h3 span').each((idx, ele) => {// cherrio中$('selector').each()用来遍历所有匹配到的DOM元素// 参数idx是当前遍历的元素的索引,ele就是当前便利的DOM元素subNameList.push($(ele).text())});resolve(subNameList)}});})let subComponents = componentNames.slice(1).map(name => {return {name,title: '',category,url,apis: []}}).map(comp => {let apis = []let $ = cheerio.load(allDOM);$('#react-content .api-container table').each((idx, ele) => {// cherrio中$('selector').each()用来遍历所有匹配到的DOM元素// 参数idx是当前遍历的元素的索引,ele就是当前便利的DOM元素if(idx !== 0){apis = parseApis(ele)}});return {...comp,apis}})return subComponents}
3.调用函数,得到想要的结构
app.get('/parse', async (req, res, next) => {const docLink = 'https://ant.design/components/button-cn/'let componentList = await getComponentList(docLink)//let nameList = []for (let component of componentList) {try {// 1 get apilet componentApis = await getComponentApis(component.url)//2 get subComponentslet subComponents = await getSubComponent(component.url,component.category)// 3 查询api中文名 labellet der = {disabled: "是否禁用",ghost: "按钮背景透明",href: " 点击跳转的地址",htmlType: " 设置button的原生type值",icon: "图标类型",loading: "载入状态",shape: "形状",size: "大小",target: "相当于a链接的target属性",type: "类型",onClick: "点击按钮时的回调",block: "将按钮的宽度调整为其父宽度的选项",style: "图标的样式",theme: "图标的主题风格",spin: "是否有旋转动画",rotate: "图标旋转角度",component: "组件",twoToneColor: "双色图标颜色",scriptUrl: "项目生成的js地址",extraCommonProps: "svg图标设置额外的属性",width: "元素宽度",height: "元素高度",fill: "元素的填充颜色",className: "类名",copyable: "是否可拷贝", delete: "删除",editable: "是否可编辑",ellipsis: "省略号",mark: "标记",underline: "下划线",strong: "加粗",level: "重要程度",onChange: "提交编辑内容时触发",align: "对齐方式",gutter: "栅格间隔",justify: "水平排列方式",offset: "栅格左侧间隔",order: "栅格顺序",pull: "栅格向左移动格数",push: "栅格向右移动格数",span: " 栅格占位格数",xs: " <576px 响应式栅格",sm: " ≥576px 响应式栅格",md: " ≥768px 响应式栅格",lg: " ≥992px 响应式栅格",xl: " ≥1200px 响应式栅格",xxl: "≥1600px 响应式栅格",hasSider: "有sider",breakpoint: "断点",collapsed: "收起状态",collapsedWidth: "收缩宽度",collapsible: "是否可收起",defaultCollapsed: "是否默认收起",reverseArrow: "翻转折叠",trigger: "自定义tigger",onCollapse: "收起时的回调函数",onBreakpoint: "断点时的回调",zeroWidthTriggerStyle: "0宽度触发样式",offsetBottom: "偏移底部",offsetTop: "偏移顶部",itemRender: "项目渲染",params: "参数",routes: "路由栈信息",separator: "分隔符",overlay: "下拉菜单内容",children: "要显示的分隔符",getPopupContainer: "菜单渲染父节点",overlayClassName: "下拉根元素的类名称",overlayStyle: "下拉根元素的样式",placement: "菜单弹出位置",visible: "是否显示",onVisibleChange: "显示状态改变时调用",defaultOpenKeys: "初始展开的菜单项key数组",defaultSelectedKeys: "初始选中的菜单项key数组",forceSubMenuRender: "在子菜单展示前就渲染DOM",inlineCollapsed: "内联模式折叠",inlineIndent: "内联模式缩进",mode: "菜单类型", multiple: "是否允许多选",openKeys: "菜单项key数组",selectable: "可选",selectedKeys: "选定的键",subMenuCloseDelay: "子菜单关闭延迟",subMenuOpenDelay: "子菜单开启延迟",onDeselect: "取消选择",onOpenChange: "展开/关闭的回调",onSelect: "选择",overflowedIndicator: "指标溢出",key: "键",title: "标题", popupClassName: "弹出类别名称",onTitleClick: "在标题上单击",current: "当前页数",defaultCurrent: "默认当前页数",defaultPageSize: "默认页面大小",hideOnSinglePage: "默认每页条数",pageSize: "每页条数",pageSizeOptions: "指定每页显示条数",showLessItems: "显示较少的项目",showQuickJumper: "快速跳转至某页",showSizeChanger: "改变pageSize",showTotal: "显示总和",simple: "简单分页",total: "总页数",onShowSizeChange: "pageSize变化的回调",subTitle: "副标题",avatar: "头像",backIcon: "后退图标",tags: "标签",extra: "额外信息",breadcrumb: "面包屑",footer: "页脚",onBack: "返回按钮的点击事件",direction: "步骤条方向",labelPlacement: "标签放置位置",progressDot: "点状步骤条",status: "当前步骤的状态",initial: "起始序号",description: "详情描述",allowClear: "允许清除",autoFocus: "自动对焦",backfill: "回填",children:"子项",dataSource: "数据源",dropdownMenuStyle: "菜单自定义样式",defaultActiveFirstOption: "默认高亮第一个选项",defaultValue: "默认值",filterOption: "过滤器选项",optionLabelProp: "回填选择框的option属性值",placeholder: "输入框提示",value: "指定值",onBlur: "模糊",onFocus: "获取焦点",onSearch: "搜索补全项",defaultOpen: "默认打开",open: "打开",onDropdownVisibleChange: "展开下拉菜单的回调",blur: " 移除焦点",focus: " 获取焦点",changeOnSelect: "选择时更改",displayRender: "选择后显示渲染函数",expandTrigger: "次菜单的展开方式",fieldNames: "字段名称",loadData: "动态加载选项",notFoundContent: "未找到内容",options: "可选项",popupPlacement: "浮层预设位置",popupVisible: "弹出窗口可见",showSearch: "显示搜索框",suffixIcon: "后缀图标",onPopupVisibleChange: "显示/隐藏浮层的回调",filter: "筛选条件",limit: "限制数量",matchInputWidth: "是否同宽",render: "渲染",sort: "排序",checked: "是否选中",defaultChecked: "默认状态",indeterminate: "全选属性",name: "名称",dateRender: "自定义日期单元格内容",disabledDate: "禁用日期",dropdownClassName: "额外的弹出的className",getCalendarContainer: "定义浮层的容器",locale: "国际化配置",popupStyle: "弹出样式",onPanelChange: "日历面板切换",defaultPickerValue: "默认面板日期",disabledTime: "禁用时间",format: "日期格式",renderExtraFooter: "呈现额外的页脚",showTime: "时间功能",showTimeDefaultValue: "显示时间默认值",showToday: "显示今天",onOk: "确定",monthCellContentRender: "自定义月份内容呈现",ranges: "时间范围",onCalendarChange: "待选时间更改",form: "表格",hideRequiredMark: "隐藏必需标记",labelAlign: "标签对齐",labelCol: "标签宽度",layout: "布局方式",onSubmit: "提交成功的回调",wrapperCol: " 组件宽度",colon: "显示冒号",mapPropsToFields: "将父组件属性映射到表单项上",validateMessages: "默认校验信息",onFieldsChange: "子节点变化时触发",onValuesChange: "值改变时的回调",getFieldDecorator: "双向绑定",getFieldError: "获取字段错误",getFieldsError: "获取字段错误",getFieldsValue: "获取字段值",getFieldValue: "获取字段值",isFieldsTouched: "值的收集时机",isFieldTouched: "值的收集时机",isFieldValidating:"是否在校验状态",resetFields: "重置字段",setFields: "设置控件字段",setFieldsValue: "设置控件字段值",validateFields: "校验值字段",validateFieldsAndScroll: "校验字段是否在可见范围",optionsFirst: " 每一表单校验",optionsFirstFields: " 指定表单域校验",optionsForce: " 已经校验过的表单域再次校验",optionsScroll: "滚动行为",id: "组件id",optionsGetValueFromEvent: " options.get事件的值",optionsInitialValue: " options.initial值",optionsNormalize: " options.normalize",optionsPreserve: " options.preserve",optionsRules: " options.rules",optionsTrigger: " options.trigger",optionsValidateFirst: " options.validate first",optionsValidateTrigger: " options.校验值的时机",optionsValuePropName: "值的属性",hasFeedback: "校验展示",help: "提示信息",htmlFor: " html for",label: "标签名称",required: "是否必填",validateStatus: "校验状态",enum: "枚举类型",len: " 字段长度",max: "最大长度",message: "校验文案",min: "最小长度",pattern: "正则表达式校验",transform: "转换字段值",validator: "自定义校验",whitespace: "空白",addonAfter: "前置标签",addonBefore: "后置标签",prefix: "前置图标",suffix: "后置图标",onPressEnter: "按回车",autoSize: "自适应高度",enterButton: "输入按钮",compact: "是否用紧凑模式",visibilityToggle: "是否显示切换按钮",formatter: "指定输入框格式",parser: "转换方式",precision: "数值精度",decimalSeparator: "小数点",step: "步数",split: "拆分",validateSearch: "自定义验证",allowHalf: "允许半选",character: "自定义字符",count: "star总数",tooltips: "提示信息",onHoverChange: "鼠标事件",onKeyDown: "按键回调",buttonStyle: "按钮样式",autoClearSearchValue: "清空搜索框",dropdownMatchSelectWidth: "下拉同宽",dropdownRender: "自定义下拉框内容",dropdownStyle: "下拉样式",firstActiveValue: "高亮第一个",labelInValue: "值包含内容",maxTagCount: "最大标签数",maxTagTextLength: "最大标签文本长度",maxTagPlaceholder: "最大标签占位显示内容",optionFilterProp: "过滤属性",showArrow: "是否显示下拉箭头",removeIcon: "清除图标",clearIcon: "清空图标",menuItemSelectedIcon: "当前选中条目图标",tokenSeparators: "自动分词的分隔符",onInputKeyDown: "按下输入键时的回调",onMouseEnter: "鼠标移入时回调",onMouseLeave: "鼠标移出时回调",onPopupScroll: "下拉列表滚动时回调",dots: "只能刻度上",included: "包含关系",marks: "刻度标记",range: "双滑块模式",reverse: "反向坐标轴",tipFormatter: "提示格式化程序",vertical: "垂直方向",onAfterChange: "改变后回调",tooltipPlacement: "提示信息展示位置",tooltipVisible: "提示信息的显示",getTooltipPopupContainer: "获取工具提示弹出式容器",checkedChildren: "选中时内容",unCheckedChildren: "未选中内容",filterTreeNode: "过滤属性",searchPlaceholder: "搜索占位",searchValue: "搜索框值",treeIcon: "树形图标",showCheckedStrategy: "显示节点",treeCheckable: "显示复选框",treeCheckStrictly: "节点受控",treeData: "节点配置",treeDataSimpleMode: "简单格式",treeDefaultExpandAll: "展开所有",treeDefaultExpandedKeys: "默认展开树节点",treeExpandedKeys: "展开的树节点",treeNodeFilterProp: "过滤属性",treeNodeLabelProp: "显示属性",onTreeExpand: "展开节点时调用",disableCheckbox: "禁用复选框",isLeaf: "是否叶子节点",addon: "底部显示内容",clearText: "清除文案",defaultOpenValue: "默认打开值",disabledHours: "禁选时间",disabledMinutes: "禁选分钟",disabledSeconds: "禁选秒数",hideDisabledOptions: "隐藏禁用的选项",hourStep: "小时步长",inputReadOnly: "只读",minuteStep: "分钟步长",secondStep: "秒钟步长",use12Hours: "使用12小时制",lazy: "懒",listStyle: "列表样式",operations: "操作",showSelectAll: "显示全选",targetKeys: "目标键",titles: "标题",onScroll: "滚动",onSelectChange: "按选择更改",filteredItems: "过滤属性",onItemSelect: "选择项目",onItemSelectAll: "全部选中",accept: "文件类型",action: "上传地址",directory: "上传目录",beforeUpload: "上传之前",customRequest: "自定义请求",data: "上传参数",defaultFileList: "默认列表",fileList: "文件列表",headers: "请求头部",listType: "内联样式",previewFile: "预览文件",showUploadList: "展示已上传",supportServerRender: "支持服务器渲染",withCredentials: "是否携带cookie",openFileDialogOnClick: "点击打开文件对话框",onPreview: "预览回调",onRemove : "移除回调",onDownload: "下载回调",transformFile : "转换文件",src: " 资源地址",srcSet: "响应式资源地址",alt: "替代文本",onError: "加载失败事件",color: "颜色",dot: "展示成红点",overflowCount: "封顶数字值",showZero: "数值为0 是否展示badge",text: "状态点文本",dateCellRender: "日期单元格渲染",dateFullCellRender: "日期全单元格渲染",fullscreen: "全屏展示",monthCellRender: "月单元格渲染",monthFullCellRender: "月满单元格渲染",validRange: "日期范围",headerRender: "头部渲染",actions: "卡片操作组",activeTabKey: "激活面板",headStyle: "头部样式",bodyStyle: "内容区样式",bordered: "显示边框",cover: "卡片封面",defaultActiveTabKey: "默认选中",hoverable: "悬浮",tabList: "页签标题",tabBarExtraContent: "额外内容",onTabChange: "页签切换回调",afterChange: "切换面板的回调",autoplay: "自动切换",beforeChange: "切换面板的回调",dotPosition: "面板点位置",easing: "动画效果",effect: "效果函数",goTo: "切换到指定面板",next: "切到下一面板",prev: " 切到上一面板",activeKey: "激活面板",defaultActiveKey: "选中面板",accordion: "手风琴式",expandIcon: "切换图标",expandIconPosition: "图标位置",destroyInactivePanel: "销毁面板",forceRender: "隐藏渲染",header: "面板头部",author: "作者",content: "评论内容",datetime: "日期时间",column: "列",imageStyle: "图片样式",image: "图片",grid: "网格",itemLayout: "项目布局",loadMore: "加载更多",pagination: "分页",renderItem: "渲染列表",position: "位置",groupSeparator: "千位分隔符",valueStyle: "数值样式",onFinish: "完成时触发",arrowPointAtCenter: "指向中心",autoAdjustOverflow: "自动调整溢出",defaultVisible: "默认显示",mouseEnterDelay: "鼠标进入延迟",mouseLeaveDelay: "鼠标离开延迟",tableLayout: "表格布局",childrenColumnName: "子列名称",columns: "列配置",components: "组件",defaultExpandAllRows: "默认展开",defaultExpandedRowKeys: "固定展开行",expandedRowKeys: "扩展行键",expandedRowRender: "扩展行渲染",expandRowByClick: "点击扩展",indentSize: "缩进大小",rowClassName: "行类名称",rowKey: "行键",rowSelection: "行选择",scroll: "滚动",showHeader: "显示标题",onExpand: "扩展",onExpandedRowsChange: "在扩展的行上更改",onHeaderRow: "在标题行上",onRow: "按行",colSpan: " col span",dataIndex: "数据索引",defaultSortOrder: "默认排序顺序",filterDropdown: "过滤器下拉列表",filterDropdownVisible: "可见过滤器下拉列表",filtered: "已过滤",filteredValue: "过滤值",filterIcon: "过滤器图标",filterMultiple: "过滤多个",filters: "过滤器",fixed: "固定",sorter: "分类器",sortOrder: "排序顺序",sortDirections: "排序方向",onCell: "在单元格上",onFilter: "在过滤器上",onFilterDropdownVisibleChange: "关于过滤器下拉列表的可见变化",onHeaderCell: "在标题单元格上",columnWidth: "列宽",columnTitle: "列标题",getCheckboxProps: "获取复选框道具",hideDefaultSelections: "隐藏默认选择",selectedRowKeys: "选定的行键",selections: "选择",onSelectAll: "全选",onSelectInvert: "在选择反转时",x: "横向滚动",y: "纵向滚动",scrollToFirstRowOnChange: "滚动到更改后的第一行",animated: "动画",renderTabBar: "渲染选项卡栏",hideAdd: "隐藏添加",tabBarGutter: "标签栏装订线",tabBarStyle: "标签栏样式",tabPosition: "标签位置",onEdit: "在编辑中",onNextClick: "下次点击",onPrevClick: "按一下",onTabClick: "单击选项卡",tab: "显示文字",afterClose: "关闭后",closable: "可关闭",onClose: "关闭",pending: "待处理",pendingDot: "待定点",autoExpandParent: "自动扩展父级",blockNode: "块节点",checkable: "可检查",checkedKeys: "选中的键",checkStrictly: "严格检查",defaultCheckedKeys: "默认选中的密钥",defaultExpandAll: "默认全部展开",defaultExpandedKeys: "默认扩展键",defaultExpandParent: "默认扩展父级",draggable: "可拖动",expandedKeys: "扩展键",loadedKeys: "已加载的密钥",showIcon: "显示图标",switcherIcon: "切换器图标",showLine: "显示行",onCheck: "在支票上",onDragEnd: "在拖尾",onDragEnter: "在拖动进入",onDragLeave: "在拖假期间",onDragOver: "拖曳",onDragStart: "在拖动开始",onDrop: "放下",onLoad: "正在加载",onRightClick: "单击右键",expandAction: "展开动作",banner: "顶部公告",closeText: "自定义关闭",destroyOnClose: "销毁子元素",getContainer: "挂载节点",maskClosable: "是否蒙层",mask: "是否遮罩",maskStyle: "遮罩样式",drawerStyle: "弹出层样式",headerStyle: "头部样式",zIndex: "层级",afterVisibleChange: "切换结束时的回调",keyboard: "支持esc关闭",cancelText: "取消文字",centered: "居中展示",closeIcon: "自定义关闭图标",confirmLoading: "确认加载",okText: "确认文字",okType: "确认类型",okButtonProps: "ok 按钮 props",cancelButtonProps: "cancel按钮props",wrapClassName: "外层容器类名",onCancel: "关闭时的回调",autoFocusButton: "自动对获取焦点",iconType: "图标类型",duration: "是否延时",maxCount: "最大数量",top: "距顶部位置",btn: "关闭按钮",bottom: "距底部位置",percent: "百分比",showInfo: "是否显示信息",strokeLinecap: "笔画线帽",strokeColor: "颜色",successPercent: "完成百分比",onConfirm: "确认的回调",delay: "延迟显示",indicator: "指示符",spinning: "加载状态",tip: "描述文案",wrapperClassName: "包装器类属性",active: "是否展示动画",paragraph: "是否显示段落",rows: "行数",affix: "是否浮动",bounds: "锚点边界",showInkInFixed: "固定显示",getCurrentAnchor: "获取当前锚点",targetOffset: "偏移量",visibilityHeight: "可见高度",autoInsertSpaceInButton: "是否移除空格",csp: " csp配置",renderEmpty: "空状态",prefixCls: "统一前缀",pageHeader: "页面标题",dashed: "是否为虚线",orientation: "标题位置",getMentions: "获得提及",toContentState: "到内容状态",toString: "转字符串",defaultSuggestions: "默认内容",getSuggestionContainer: "获取建议容器",multiLines: "多行模式",readOnly: "是否只读",suggestions: "建议内容",suggestionStyle: "下拉框样式",onSearchChange: "根据搜索变化"}for(let name in der){for (let componentApi of componentApis){if(componentApi.name === name){componentApi.label = der[name]}//componentApi.label = componentApi.name === name ? der[name] : '-'//console.log(componentApi.label,'componentApi.label')}if(subComponents.length>0){for(let subComponent of subComponents){for (let componentApi of subComponent.apis){if(componentApi.name === name){componentApi.label = der[name]}//componentApi.label = componentApi.name === name ? der[name] : '-'}}}}Object.assign(component,{apis:componentApis},{subComponents:subComponents})} catch(e) {console.error('---1')}}let file = path.join(__dirname, 'souce_components.json');fs.writeFileSync(file, JSON.stringify(componentList), (err)=>{if(err){return console.log(err)}console.log('文件创建成功,地址:' + file);})res.send('数据获取并保存成功')});
学习链接
