html 解析, html 转 json 对象,json 对象转 html
npm install -S html2obj输入的html
<!DOCTYPE html> <html lang="en"> <head> <title id='title-id'>测试标题</title> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="stylesheet" href="static/css/bootstrap.min.css"> <link rel="stylesheet" href="static/css/jquery-confirm.min.css"> <link rel="stylesheet" type="text/css" href="style.css"> <script src="static/js/jquery-3.2.1.min.js"></script> <style> .CodeMirror{ height: 100%; width: 100%; } </style> </head> <body role="document" style="background-color: #2c3e50;"> <h1>Heading</h1> <hr> <h2>&inr;</h2> <pre id='pre-id'> <h1>Heading</h1> <hr> <h2>&inr;</h2> </pre> <script> let highlightedLine = null; let editor; <!-- this should not be parsed separately --> function updateLength(){ const xmlData = editor.getValue(); $("#lengthxml")[0].innerText = xmlData.replace(/>\s*</g, "><").length; } </script> </body> </html>js 代码可参考 example/parse.example.js
var dom = new Html2Obj().parseHtml(html)[0] console.log(JSON.stringify(dom))输出结果
{ "tagName": "html", "child": [ { "tagName": "head", "child": [ { "tagName": "title", "child": [ { "tagName": "#text", "child": [ "测试标题" ], "attrs": {} } ], "attrs": { "id": "title-id" } }, { "tagName": "meta", "child": [], "attrs": { "charset": "UTF-8" } }, { "tagName": "meta", "child": [], "attrs": { "name": "viewport", "content": "width=device-width, initial-scale=1" } }, { "tagName": "link", "child": [], "attrs": { "rel": "stylesheet", "href": "static/css/bootstrap.min.css" } }, { "tagName": "link", "child": [], "attrs": { "rel": "stylesheet", "href": "static/css/jquery-confirm.min.css" } }, { "tagName": "link", "child": [], "attrs": { "rel": "stylesheet", "type": "text/css", "href": "style.css" } }, { "tagName": "script", "child": [], "attrs": { "src": "static/js/jquery-3.2.1.min.js" } }, { "tagName": "style", "child": [ { "tagName": "#text", "child": [ "\n .CodeMirror{\n height: 100%;\n width: 100%;\n }\n " ], "attrs": {} } ], "attrs": {} } ], "attrs": {} }, { "tagName": "body", "child": [ { "tagName": "h1", "child": [ { "tagName": "#text", "child": [ "Heading" ], "attrs": {} } ], "attrs": {} }, { "tagName": "hr", "child": [], "attrs": {} }, { "tagName": "h2", "child": [ { "tagName": "#text", "child": [ "&inr;" ], "attrs": {} } ], "attrs": {} }, { "tagName": "pre", "child": [ { "tagName": "#text", "child": [ "\n <h1>Heading</h1>\n <hr>\n <h2>&inr;</h2>\n " ], "attrs": {} } ], "attrs": { "id": "pre-id" } }, { "tagName": "script", "child": [ { "tagName": "#text", "child": [ "\n let highlightedLine = null;\n let editor;\n <!-- this should not be parsed separately -->\n function updateLength(){\n const xmlData = editor.getValue();\n $(\"#lengthxml\")[0].innerText = xmlData.replace(/>s*</g, \"><\").length;\n }\n " ], "attrs": {} } ], "attrs": {} } ], "attrs": { "role": "document", "style": "background-color: #2c3e50;" } } ], "attrs": { "lang": "en" } }目前支持根据属性id查找节点
console.log(dom.extractById('title-id').toText()) console.log(dom.extractById('pre-id').toHtml())- toText 返回节点内的文本内容
- toHtml 将节点还原成 html
{ "tagName": "div", "attrs": {}, "chid": [] }- tagName:标签名
- attrs:属性对象
- chid:子节点数组
文本节点 tagName 为
#text
var dom = new Html2Obj({ tagValueProcessor: function (tagName, val) { return val; }, attributeValueProcessor: function (attrName, val) { return val; }, nodePostProcessor(node) { if(node.tagName === 'h1'){ node.tagName = 'h2' } } }).parseHtml(html)[0]- tagValueProcessor 标签中如果有文本节点,会将文本节点内容(val) 经过此方法处理
- attributeValueProcessor 标签的属性处理时会调用此方法
- nodePostProcessor 标签节点创建后调用。注意:此时还未解析子节点,所以 child 是空,只有 tagName 和 attrs