项目地址
在线体验
介绍
首先 这里的HTMLParser不是 很完备,做了基本处理。
此项目主要是做HTML2AST & AST2HTML即HTMLparser和AST还原到HTML。
代码
html2ast.ts HTML2AST
function lexer(item: any, index: any, file: any) {
function isEnd(index: number) {
return file.length <= index
}
function isCommentEnd(hanlder: any, index: number) {
let count = index;
let target = "-->"
let sour = ""
while (count < index + 3) {
sour += hanlder.charAt(count)
count++
}
return target === sour
}
if (item === "<") { // <
index++
if (file.charAt(index) === "/") { // </
index++
let tag = ""
let cur = file.charAt(index)
while (cur !== ">" && !isEnd(index)) { // </xx>
tag += cur
index++
cur = file.charAt(index)
}
// if (isEnd(index)) {
// return {
// type: "EOF",
// index
// }
// }
return {
type: "node",
tag,
index,
closeTag: true
}
} else if (file.charAt(index) === "!") {
index++
let cur = file.charAt(index)
let count = 2;
while (count) { // <!--
if (cur !== "-") {
console.assert("fail")
}
index++
cur = file.charAt(index)
count--
}
// -->结束
let content = ""
cur = file.charAt(index)
let isCEnd = false
while (!isEnd(index)) {
isCEnd = isCommentEnd(file, index)
if (isCEnd) {
break
}
content += cur
index++
cur = file.charAt(index)
}
if (isCEnd) { // -->
index += 3
}
if (isEnd(index) && content === '') {
return {
type: "EOF",
index
}
}
return {
type: "comment",
content,
index
}
} else { // <
let tag = ""
let cur = file.charAt(index)
while (cur !== " " && cur !== ">" && !isEnd(index)) {
tag += cur
index++
cur = file.charAt(index)
}
let attrs = []
if (cur === " ") {
while (file.charAt(index) === " ") {
index++
}
let key = ""
let value = ""
cur = file.charAt(index)
while (cur !== ">" && !isEnd(index)) {
if (cur === " " && cur !== ">") {
while (file.charAt(index) === " ") {
index++
cur = file.charAt(index)
}
}
if (cur !== "=" && cur !== ">") {
key += cur
} else if (cur === "=" && cur !== ">") {
index++
cur = file.charAt(index)
while (cur !== " " && !isEnd(index) && cur !== ">") {
if (cur === '"') {
index++
cur = file.charAt(index)
} else {
value += cur
index++
cur = file.charAt(index)
}
}
attrs.push({
[key]: value
})
key = ""
value = ""
index--
} else if (cur === ">") {
break
}
index++
cur = file.charAt(index)
}
}
cur = file.charAt(index)
while (cur !== ">" && !isEnd(index)) {
index++
cur = file.charAt(index)
}
if (file.charAt(index + 1) === "<") { // <xx></xx>
index++
}
// if (isEnd(index)) {
// return {
// type: "EOF",
// index
// }
// }
return {
type: "node",
tag,
index,
children: [],
attrs
}
}
} else if (item === ">") { // >
index++
let content = ""
let cur = file.charAt(index)
while (cur !== "<" && !isEnd(index)) {
content += cur
index++
cur = file.charAt(index)
}
if (isEnd(index) && content === '') {
return {
type: "EOF",
index
}
}
return {
type: "text",
content,
index
}
} else {
let content = ""
let cur = file.charAt(index)
while (cur !== "<" && !isEnd(index)) {
content += cur
index++
cur = file.charAt(index)
}
if (isEnd(index) && content === '') {
return {
type: "EOF",
index
}
}
return {
type: "text",
content,
index
}
}
}
export function parser(file: any) {
let index = 0
let root = {
type: "root",
children: []
}
let stack: any = [root]
while (index < file.length) {
let item = file.charAt(index)
let token = lexer(item, index, file)
index = token.index
delete token.index
if (token.type === "EOF") {
return stack[0]
}
if (!token.closeTag) { // 非闭合标签
stack[stack.length - 1].children.push(token) // 放入栈顶children处
if (token.type === "node") { // 如果是标签节点,则放入栈中
stack.push(token)
}
} else { // 闭合标签,栈顶标签出栈
stack.pop()
}
}
console.log(stack)
return stack[0]
}
ast2html.ts AST2HTML
export function generate(ast: any) {
let stack = []
// 深度遍历
function getAll(ast: any) {
if (ast.children) {
for (let item of ast.children) {
// stack.push(item)
getAll(item)
stack.push(item)
}
}
}
getAll(ast)
stack.push(ast)
for (let index = 0; index < stack.length;) {
if (stack[index].children) {
if (stack[index].type === "node") {
let aa = ""
// let finds = []
/*
for (let i of stack[index].children) {
finds.push(stack.indexOf(i))
}
for (let q of finds) {
aa = `${aa}${stack[q].output}`
}
*/
for (let q of stack[index].children) {
aa = `${aa}${q.output}`
}
let attrs = []
if (stack[index].attrs) {
for (let ll of stack[index].attrs) {
for (let bb of Object.keys(ll)) {
attrs.push(`${bb}="${ll[bb]}"`)
}
}
}
stack[index].output = `<${stack[index].tag}${attrs.length > 0 ? " " : ''}${attrs.join(" ")}>${aa}</${stack[index].tag}>`
} else if (stack[index].type === "root") {
let aa = ""
for (let q of stack[index].children) {
aa = `${aa}${q.output}`
}
stack[index].output = aa
}
} else {
if (stack[index].type === "node") {
let attrs = []
if (stack[index].attrs) {
for (let ll of stack[index].attrs) {
for (let bb of Object.keys(ll)) {
attrs.push(`${bb}="${ll[bb]}"`)
}
}
}
stack[index].output = `<${stack[index].tag}${attrs.length > 0 ? " " : ''}${attrs.join(" ")}></${stack[index].tag}>`
} else if (stack[index].type === "text") {
stack[index].output = stack[index].content
} else if (stack[index].type === "comment") {
stack[index].output = `<!--${stack[index].content}-->`
}
}
index++
}
// return stack
}
// let stack = []
// // 广度遍历
// function levelOrder(ast) {
// let queue = [];
// queue.push(ast);
// while (queue.length) {
// let cur = queue.shift();
// stack.push(cur)
// if (cur.children)
// queue.push(...cur.children)
// }
// }
// levelOrder(ast)
最后
有时间我再加注释吧。