项目地址

在线体验

介绍

首先 这里的HTMLParser不是 很完备,做了基本处理。

此项目主要是做HTML2AST & AST2HTML即HTMLparser和AST还原到HTML。

代码

html2ast.ts HTML2AST

  1. function lexer(item: any, index: any, file: any) {
  2. function isEnd(index: number) {
  3. return file.length <= index
  4. }
  5. function isCommentEnd(hanlder: any, index: number) {
  6. let count = index;
  7. let target = "-->"
  8. let sour = ""
  9. while (count < index + 3) {
  10. sour += hanlder.charAt(count)
  11. count++
  12. }
  13. return target === sour
  14. }
  15. if (item === "<") { // <
  16. index++
  17. if (file.charAt(index) === "/") { // </
  18. index++
  19. let tag = ""
  20. let cur = file.charAt(index)
  21. while (cur !== ">" && !isEnd(index)) { // </xx>
  22. tag += cur
  23. index++
  24. cur = file.charAt(index)
  25. }
  26. // if (isEnd(index)) {
  27. // return {
  28. // type: "EOF",
  29. // index
  30. // }
  31. // }
  32. return {
  33. type: "node",
  34. tag,
  35. index,
  36. closeTag: true
  37. }
  38. } else if (file.charAt(index) === "!") {
  39. index++
  40. let cur = file.charAt(index)
  41. let count = 2;
  42. while (count) { // <!--
  43. if (cur !== "-") {
  44. console.assert("fail")
  45. }
  46. index++
  47. cur = file.charAt(index)
  48. count--
  49. }
  50. // -->结束
  51. let content = ""
  52. cur = file.charAt(index)
  53. let isCEnd = false
  54. while (!isEnd(index)) {
  55. isCEnd = isCommentEnd(file, index)
  56. if (isCEnd) {
  57. break
  58. }
  59. content += cur
  60. index++
  61. cur = file.charAt(index)
  62. }
  63. if (isCEnd) { // -->
  64. index += 3
  65. }
  66. if (isEnd(index) && content === '') {
  67. return {
  68. type: "EOF",
  69. index
  70. }
  71. }
  72. return {
  73. type: "comment",
  74. content,
  75. index
  76. }
  77. } else { // <
  78. let tag = ""
  79. let cur = file.charAt(index)
  80. while (cur !== " " && cur !== ">" && !isEnd(index)) {
  81. tag += cur
  82. index++
  83. cur = file.charAt(index)
  84. }
  85. let attrs = []
  86. if (cur === " ") {
  87. while (file.charAt(index) === " ") {
  88. index++
  89. }
  90. let key = ""
  91. let value = ""
  92. cur = file.charAt(index)
  93. while (cur !== ">" && !isEnd(index)) {
  94. if (cur === " " && cur !== ">") {
  95. while (file.charAt(index) === " ") {
  96. index++
  97. cur = file.charAt(index)
  98. }
  99. }
  100. if (cur !== "=" && cur !== ">") {
  101. key += cur
  102. } else if (cur === "=" && cur !== ">") {
  103. index++
  104. cur = file.charAt(index)
  105. while (cur !== " " && !isEnd(index) && cur !== ">") {
  106. if (cur === '"') {
  107. index++
  108. cur = file.charAt(index)
  109. } else {
  110. value += cur
  111. index++
  112. cur = file.charAt(index)
  113. }
  114. }
  115. attrs.push({
  116. [key]: value
  117. })
  118. key = ""
  119. value = ""
  120. index--
  121. } else if (cur === ">") {
  122. break
  123. }
  124. index++
  125. cur = file.charAt(index)
  126. }
  127. }
  128. cur = file.charAt(index)
  129. while (cur !== ">" && !isEnd(index)) {
  130. index++
  131. cur = file.charAt(index)
  132. }
  133. if (file.charAt(index + 1) === "<") { // <xx></xx>
  134. index++
  135. }
  136. // if (isEnd(index)) {
  137. // return {
  138. // type: "EOF",
  139. // index
  140. // }
  141. // }
  142. return {
  143. type: "node",
  144. tag,
  145. index,
  146. children: [],
  147. attrs
  148. }
  149. }
  150. } else if (item === ">") { // >
  151. index++
  152. let content = ""
  153. let cur = file.charAt(index)
  154. while (cur !== "<" && !isEnd(index)) {
  155. content += cur
  156. index++
  157. cur = file.charAt(index)
  158. }
  159. if (isEnd(index) && content === '') {
  160. return {
  161. type: "EOF",
  162. index
  163. }
  164. }
  165. return {
  166. type: "text",
  167. content,
  168. index
  169. }
  170. } else {
  171. let content = ""
  172. let cur = file.charAt(index)
  173. while (cur !== "<" && !isEnd(index)) {
  174. content += cur
  175. index++
  176. cur = file.charAt(index)
  177. }
  178. if (isEnd(index) && content === '') {
  179. return {
  180. type: "EOF",
  181. index
  182. }
  183. }
  184. return {
  185. type: "text",
  186. content,
  187. index
  188. }
  189. }
  190. }
  191. export function parser(file: any) {
  192. let index = 0
  193. let root = {
  194. type: "root",
  195. children: []
  196. }
  197. let stack: any = [root]
  198. while (index < file.length) {
  199. let item = file.charAt(index)
  200. let token = lexer(item, index, file)
  201. index = token.index
  202. delete token.index
  203. if (token.type === "EOF") {
  204. return stack[0]
  205. }
  206. if (!token.closeTag) { // 非闭合标签
  207. stack[stack.length - 1].children.push(token) // 放入栈顶children处
  208. if (token.type === "node") { // 如果是标签节点,则放入栈中
  209. stack.push(token)
  210. }
  211. } else { // 闭合标签,栈顶标签出栈
  212. stack.pop()
  213. }
  214. }
  215. console.log(stack)
  216. return stack[0]
  217. }

ast2html.ts AST2HTML

  1. export function generate(ast: any) {
  2. let stack = []
  3. // 深度遍历
  4. function getAll(ast: any) {
  5. if (ast.children) {
  6. for (let item of ast.children) {
  7. // stack.push(item)
  8. getAll(item)
  9. stack.push(item)
  10. }
  11. }
  12. }
  13. getAll(ast)
  14. stack.push(ast)
  15. for (let index = 0; index < stack.length;) {
  16. if (stack[index].children) {
  17. if (stack[index].type === "node") {
  18. let aa = ""
  19. // let finds = []
  20. /*
  21. for (let i of stack[index].children) {
  22. finds.push(stack.indexOf(i))
  23. }
  24. for (let q of finds) {
  25. aa = `${aa}${stack[q].output}`
  26. }
  27. */
  28. for (let q of stack[index].children) {
  29. aa = `${aa}${q.output}`
  30. }
  31. let attrs = []
  32. if (stack[index].attrs) {
  33. for (let ll of stack[index].attrs) {
  34. for (let bb of Object.keys(ll)) {
  35. attrs.push(`${bb}="${ll[bb]}"`)
  36. }
  37. }
  38. }
  39. stack[index].output = `<${stack[index].tag}${attrs.length > 0 ? " " : ''}${attrs.join(" ")}>${aa}</${stack[index].tag}>`
  40. } else if (stack[index].type === "root") {
  41. let aa = ""
  42. for (let q of stack[index].children) {
  43. aa = `${aa}${q.output}`
  44. }
  45. stack[index].output = aa
  46. }
  47. } else {
  48. if (stack[index].type === "node") {
  49. let attrs = []
  50. if (stack[index].attrs) {
  51. for (let ll of stack[index].attrs) {
  52. for (let bb of Object.keys(ll)) {
  53. attrs.push(`${bb}="${ll[bb]}"`)
  54. }
  55. }
  56. }
  57. stack[index].output = `<${stack[index].tag}${attrs.length > 0 ? " " : ''}${attrs.join(" ")}></${stack[index].tag}>`
  58. } else if (stack[index].type === "text") {
  59. stack[index].output = stack[index].content
  60. } else if (stack[index].type === "comment") {
  61. stack[index].output = `<!--${stack[index].content}-->`
  62. }
  63. }
  64. index++
  65. }
  66. // return stack
  67. }
  68. // let stack = []
  69. // // 广度遍历
  70. // function levelOrder(ast) {
  71. // let queue = [];
  72. // queue.push(ast);
  73. // while (queue.length) {
  74. // let cur = queue.shift();
  75. // stack.push(cur)
  76. // if (cur.children)
  77. // queue.push(...cur.children)
  78. // }
  79. // }
  80. // levelOrder(ast)

最后

有时间我再加注释吧。