Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
80684 views
1
var HTMLDecode = require('./htmlencoding').HTMLDecode;
2
3
function HtmlToDom(parser) {
4
5
if(parser && parser.write) {
6
// sax parser
7
this.appendHtmlToElement = function(html, element){
8
9
var currentElement = element, currentLevel = 0;
10
11
parser.onerror = function (e) {};
12
13
parser.ontext = function (t) {
14
var ownerDocument = currentElement.ownerDocument || currentElement;
15
var newText = ownerDocument.createTextNode(t);
16
currentElement.appendChild(newText);
17
};
18
19
parser.onopentag = function (node) {
20
var nodeName = node.name.toLowerCase(),
21
document = currentElement.ownerDocument || currentElement,
22
newElement = document.createElement(nodeName),
23
i = 0,
24
length = (node.attributes && node.attributes.length) ?
25
node.attributes.length :
26
0;
27
28
for (i in node.attributes) {
29
if (node.attributes.hasOwnProperty(i)) {
30
newElement.setAttribute(i, node.attributes[i]);
31
}
32
}
33
34
for (i=0; i<node.attributes.length; i++) {
35
newElement.setAttribute(i, node.attributes.item(i));
36
}
37
currentElement.appendChild(newElement);
38
currentElement = newElement;
39
};
40
41
parser.onclosetag = function(node) {
42
currentElement = currentElement.parentNode;
43
};
44
45
parser.write(html).close();
46
47
return element;
48
};
49
50
} else if (parser && (parser.ParseHtml || parser.DefaultHandler)) {
51
52
// Forgiving HTML parser
53
54
if (parser.ParseHtml) {
55
// davglass/node-htmlparser
56
} else if (parser.DefaultHandler){
57
// fb55/htmlparser2
58
59
parser.ParseHtml = function(rawHtml) {
60
var handler = new parser.DefaultHandler();
61
// Check if document is XML
62
var isXML = (/^<\?\s*xml.*version=["']1\.0["'].*\s*\?>/i).test(rawHtml);
63
var parserInstance = new parser.Parser(handler, {
64
xmlMode: isXML,
65
lowerCaseTags: !isXML,
66
lowerCaseAttributeNames: !isXML
67
});
68
69
parserInstance.includeLocation = false;
70
parserInstance.parseComplete(rawHtml);
71
return handler.dom;
72
};
73
}
74
75
this.appendHtmlToElement = function(html, element) {
76
77
if (typeof html !== 'string') {
78
html +='';
79
}
80
81
var parsed = parser.ParseHtml(html);
82
83
for (var i = 0; i < parsed.length; i++) {
84
setChild(element, parsed[i]);
85
}
86
87
return element;
88
};
89
90
} else if (parser && parser.moduleName == 'HTML5') { /* HTML5 parser */
91
this.appendHtmlToElement = function(html, element) {
92
93
if (typeof html !== 'string') {
94
html += '';
95
}
96
if (html.length > 0) {
97
if (element.nodeType == 9) {
98
new parser.Parser({document: element}).parse(html);
99
}
100
else {
101
var p = new parser.Parser({document: element.ownerDocument});
102
p.parse_fragment(html, element);
103
}
104
}
105
};
106
} else {
107
108
this.appendHtmlToElement = function(){
109
console.log('');
110
console.log('###########################################################');
111
console.log('# WARNING: No HTML parser could be found.');
112
console.log('# Element.innerHTML setter support has been disabled');
113
console.log('# Element.innerHTML getter support will still function');
114
console.log('# Download: http://github.com/tautologistics/node-htmlparser');
115
console.log('###########################################################');
116
console.log('');
117
};
118
119
}
120
};
121
122
// utility function for forgiving parser
123
function setChild(parent, node) {
124
125
var c, newNode, currentDocument = parent._ownerDocument || parent;
126
127
switch (node.type)
128
{
129
case 'tag':
130
case 'script':
131
case 'style':
132
try {
133
newNode = currentDocument.createElement(node.name);
134
if (node.location) {
135
newNode.sourceLocation = node.location;
136
newNode.sourceLocation.file = parent.sourceLocation.file;
137
}
138
} catch (err) {
139
currentDocument.raise('error', 'invalid markup', {
140
exception: err,
141
node : node
142
});
143
144
return null;
145
}
146
break;
147
148
case 'text':
149
// Decode HTML entities if we're not inside a <script> or <style> tag:
150
newNode = currentDocument.createTextNode(/^(?:script|style)$/i.test(parent.nodeName) ?
151
node.data :
152
HTMLDecode(node.data));
153
break;
154
155
case 'comment':
156
newNode = currentDocument.createComment(node.data);
157
break;
158
159
default:
160
return null;
161
break;
162
}
163
164
if (!newNode)
165
return null;
166
167
if (node.attribs) {
168
for (c in node.attribs) {
169
// catchin errors here helps with improperly escaped attributes
170
// but properly fixing parent should (can only?) be done in the htmlparser itself
171
try {
172
newNode.setAttribute(c, HTMLDecode(node.attribs[c]));
173
} catch(e2) { /* noop */ }
174
}
175
}
176
177
if (node.children) {
178
for (c = 0; c < node.children.length; c++) {
179
setChild(newNode, node.children[c]);
180
}
181
}
182
183
try{
184
return parent.appendChild(newNode);
185
}catch(err){
186
currentDocument.raise('error', err.message, {
187
exception: err,
188
node : node
189
});
190
return null;
191
}
192
}
193
194
exports.HtmlToDom = HtmlToDom;
195
196