项目地址
在线体验
介绍
首先 这里的HTMLParser不是 很完备,做了基本处理。
此项目主要是做HTML2AST & AST2HTML即HTMLparser和AST还原到HTML。
代码
html2ast.ts HTML2AST
function lexer(item: any, index: any, file: any) {function isEnd(index: number) {return file.length <= index}function isCommentEnd(hanlder: any, index: number) {let count = index;let target = "-->"let sour = ""while (count < index + 3) {sour += hanlder.charAt(count)count++}return target === sour}if (item === "<") { // <index++if (file.charAt(index) === "/") { // </index++let tag = ""let cur = file.charAt(index)while (cur !== ">" && !isEnd(index)) { // </xx>tag += curindex++cur = file.charAt(index)}// if (isEnd(index)) {// return {// type: "EOF",// index// }// }return {type: "node",tag,index,closeTag: true}} else if (file.charAt(index) === "!") {index++let cur = file.charAt(index)let count = 2;while (count) { // <!--if (cur !== "-") {console.assert("fail")}index++cur = file.charAt(index)count--}// -->结束let content = ""cur = file.charAt(index)let isCEnd = falsewhile (!isEnd(index)) {isCEnd = isCommentEnd(file, index)if (isCEnd) {break}content += curindex++cur = file.charAt(index)}if (isCEnd) { // -->index += 3}if (isEnd(index) && content === '') {return {type: "EOF",index}}return {type: "comment",content,index}} else { // <let tag = ""let cur = file.charAt(index)while (cur !== " " && cur !== ">" && !isEnd(index)) {tag += curindex++cur = file.charAt(index)}let attrs = []if (cur === " ") {while (file.charAt(index) === " ") {index++}let key = ""let value = ""cur = file.charAt(index)while (cur !== ">" && !isEnd(index)) {if (cur === " " && cur !== ">") {while (file.charAt(index) === " ") {index++cur = file.charAt(index)}}if (cur !== "=" && cur !== ">") {key += cur} else if (cur === "=" && cur !== ">") {index++cur = file.charAt(index)while (cur !== " " && !isEnd(index) && cur !== ">") {if (cur === '"') {index++cur = file.charAt(index)} else {value += curindex++cur = file.charAt(index)}}attrs.push({[key]: value})key = ""value = ""index--} else if (cur === ">") {break}index++cur = file.charAt(index)}}cur = file.charAt(index)while (cur !== ">" && !isEnd(index)) {index++cur = file.charAt(index)}if (file.charAt(index + 1) === "<") { // <xx></xx>index++}// if (isEnd(index)) {// return {// type: "EOF",// index// }// }return {type: "node",tag,index,children: [],attrs}}} else if (item === ">") { // >index++let content = ""let cur = file.charAt(index)while (cur !== "<" && !isEnd(index)) {content += curindex++cur = file.charAt(index)}if (isEnd(index) && content === '') {return {type: "EOF",index}}return {type: "text",content,index}} else {let content = ""let cur = file.charAt(index)while (cur !== "<" && !isEnd(index)) {content += curindex++cur = file.charAt(index)}if (isEnd(index) && content === '') {return {type: "EOF",index}}return {type: "text",content,index}}}export function parser(file: any) {let index = 0let root = {type: "root",children: []}let stack: any = [root]while (index < file.length) {let item = file.charAt(index)let token = lexer(item, index, file)index = token.indexdelete token.indexif (token.type === "EOF") {return stack[0]}if (!token.closeTag) { // 非闭合标签stack[stack.length - 1].children.push(token) // 放入栈顶children处if (token.type === "node") { // 如果是标签节点,则放入栈中stack.push(token)}} else { // 闭合标签,栈顶标签出栈stack.pop()}}console.log(stack)return stack[0]}
ast2html.ts AST2HTML
export function generate(ast: any) {let stack = []// 深度遍历function getAll(ast: any) {if (ast.children) {for (let item of ast.children) {// stack.push(item)getAll(item)stack.push(item)}}}getAll(ast)stack.push(ast)for (let index = 0; index < stack.length;) {if (stack[index].children) {if (stack[index].type === "node") {let aa = ""// let finds = []/*for (let i of stack[index].children) {finds.push(stack.indexOf(i))}for (let q of finds) {aa = `${aa}${stack[q].output}`}*/for (let q of stack[index].children) {aa = `${aa}${q.output}`}let attrs = []if (stack[index].attrs) {for (let ll of stack[index].attrs) {for (let bb of Object.keys(ll)) {attrs.push(`${bb}="${ll[bb]}"`)}}}stack[index].output = `<${stack[index].tag}${attrs.length > 0 ? " " : ''}${attrs.join(" ")}>${aa}</${stack[index].tag}>`} else if (stack[index].type === "root") {let aa = ""for (let q of stack[index].children) {aa = `${aa}${q.output}`}stack[index].output = aa}} else {if (stack[index].type === "node") {let attrs = []if (stack[index].attrs) {for (let ll of stack[index].attrs) {for (let bb of Object.keys(ll)) {attrs.push(`${bb}="${ll[bb]}"`)}}}stack[index].output = `<${stack[index].tag}${attrs.length > 0 ? " " : ''}${attrs.join(" ")}></${stack[index].tag}>`} else if (stack[index].type === "text") {stack[index].output = stack[index].content} else if (stack[index].type === "comment") {stack[index].output = `<!--${stack[index].content}-->`}}index++}// return stack}// let stack = []// // 广度遍历// function levelOrder(ast) {// let queue = [];// queue.push(ast);// while (queue.length) {// let cur = queue.shift();// stack.push(cur)// if (cur.children)// queue.push(...cur.children)// }// }// levelOrder(ast)
最后
有时间我再加注释吧。
