initial commit
[JIRC.git] / node_modules / jsdom / node_modules / htmlparser / pulls / node-htmlparser / lib / node-htmlparser.js
CommitLineData
39c8b14f 1/***********************************************
2Copyright 2010, Chris Winberry <chris@winberry.net>. All rights reserved.
3Permission is hereby granted, free of charge, to any person obtaining a copy
4of this software and associated documentation files (the "Software"), to
5deal in the Software without restriction, including without limitation the
6rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7sell copies of the Software, and to permit persons to whom the Software is
8furnished to do so, subject to the following conditions:
9
10The above copyright notice and this permission notice shall be included in
11all copies or substantial portions of the Software.
12
13THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19IN THE SOFTWARE.
20***********************************************/
21/* v1.6.3 */
22
23(function () {
24
25 function runningInNode () {
26 return(
27 (typeof require) == "function"
28 &&
29 (typeof exports) == "object"
30 &&
31 (typeof module) == "object"
32 &&
33 (typeof __filename) == "string"
34 &&
35 (typeof __dirname) == "string"
36 );
37 }
38
39 if (!runningInNode()) {
40 if (!this.Tautologistics)
41 this.Tautologistics = {};
42 else if (this.Tautologistics.NodeHtmlParser)
43 return; //NodeHtmlParser already defined!
44 this.Tautologistics.NodeHtmlParser = {};
45 exports = this.Tautologistics.NodeHtmlParser;
46 }
47
48 //Types of elements found in the DOM
49 var ElementType = {
50 Text: "text" //Plain text
51 , Directive: "directive" //Special tag <!...>
52 , Comment: "comment" //Special tag <!--...-->
53 , Script: "script" //Special tag <script>...</script>
54 , Style: "style" //Special tag <style>...</style>
55 , Tag: "tag" //Any tag that isn't special
56 }
57
58 function Parser (handler) {
59 this.validateHandler(handler);
60 this._handler = handler;
61 this.reset();
62 }
63
64 //**"Static"**//
65 //Regular expressions used for cleaning up and parsing (stateless)
66 Parser._reTrim = /(^\s+|\s+$)/g; //Trim leading/trailing whitespace
67 Parser._reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents
68 Parser._reWhitespace = /\s/g; //Used to find any whitespace to split on
69 Parser._reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
70
71 //Regular expressions used for parsing (stateful)
72 Parser._reAttrib = //Find attributes in a tag
73 /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
74Parser._reTags = /[\<\>]/g; //Find tag markers
75
76//**Public**//
77//Methods//
78//Parses a complete HTML and pushes it to the handler
79Parser.prototype.parseComplete = function Parser$parseComplete (data) {
80 this.reset();
81 this.parseChunk(data);
82 this.done();
83}
84
85//Parses a piece of an HTML document
86Parser.prototype.parseChunk = function Parser$parseChunk (data) {
87 if (this._done)
88 this.handleError(new Error("Attempted to parse chunk after parsing already done"));
89 this._buffer += data; //FIXME: this can be a bottleneck
90 this.parseTags();
91}
92
93//Tells the parser that the HTML being parsed is complete
94Parser.prototype.done = function Parser$done () {
95 if (this._done)
96 return;
97 this._done = true;
98
99 //Push any unparsed text into a final element in the element list
100 if (this._buffer.length) {
101 var rawData = this._buffer;
102 this._buffer = "";
103 var element = {
104 raw: rawData
105 , data: (this._parseState == ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
106 , type: this._parseState
107 };
108 if (this._parseState == ElementType.Tag || this._parseState == ElementType.Script || this._parseState == ElementType.Style)
109 element.name = this.parseTagName(element.data);
110 this.parseAttribs(element);
111 this._elements.push(element);
112 }
113
114 this.writeHandler();
115 this._handler.done();
116}
117
118//Resets the parser to a blank state, ready to parse a new HTML document
119Parser.prototype.reset = function Parser$reset () {
120 this._buffer = "";
121 this._done = false;
122 this._elements = [];
123 this._elementsCurrent = 0;
124 this._current = 0;
125 this._next = 0;
126 this._parseState = ElementType.Text;
127 this._prevTagSep = '';
128 this._tagStack = [];
129 this._handler.reset();
130}
131
132//**Private**//
133//Properties//
134Parser.prototype._handler = null; //Handler for parsed elements
135Parser.prototype._buffer = null; //Buffer of unparsed data
136Parser.prototype._done = false; //Flag indicating whether parsing is done
137Parser.prototype._elements = null; //Array of parsed elements
138Parser.prototype._elementsCurrent = 0; //Pointer to last element in _elements that has been processed
139Parser.prototype._current = 0; //Position in data that has already been parsed
140Parser.prototype._next = 0; //Position in data of the next tag marker (<>)
141Parser.prototype._parseState = ElementType.Text; //Current type of element being parsed
142Parser.prototype._prevTagSep = ''; //Previous tag marker found
143//Stack of element types previously encountered; keeps track of when
144//parsing occurs inside a script/comment/style tag
145Parser.prototype._tagStack = null;
146
147//Methods//
148//Takes an array of elements and parses any found attributes
149Parser.prototype.parseTagAttribs = function Parser$parseTagAttribs (elements) {
150 var idxEnd = elements.length;
151 var idx = 0;
152
153 while (idx < idxEnd) {
154 var element = elements[idx++];
155 if (element.type == ElementType.Tag || element.type == ElementType.Script || element.type == ElementType.style)
156 this.parseAttribs(element);
157 }
158
159 return(elements);
160}
161
162//Takes an element and adds an "attribs" property for any element attributes found
163Parser.prototype.parseAttribs = function Parser$parseAttribs (element) {
164 //Only parse attributes for tags
165 if (element.type != ElementType.Script && element.type != ElementType.Style && element.type != ElementType.Tag)
166 return;
167
168 var tagName = element.data.split(Parser._reWhitespace, 1)[0];
169 var attribRaw = element.data.substring(tagName.length);
170 if (attribRaw.length < 1)
171 return;
172
173 var match;
174 Parser._reAttrib.lastIndex = 0;
175 while (match = Parser._reAttrib.exec(attribRaw)) {
176 if (element.attribs == undefined)
177 element.attribs = {};
178
179 if (typeof match[1] == "string" && match[1].length) {
180 element.attribs[match[1]] = match[2];
181 } else if (typeof match[3] == "string" && match[3].length) {
182 element.attribs[match[3].toString()] = match[4].toString();
183 } else if (typeof match[5] == "string" && match[5].length) {
184 element.attribs[match[5]] = match[6];
185 } else if (typeof match[7] == "string" && match[7].length) {
186 element.attribs[match[7]] = match[7];
187 }
188 }
189}
190
191//Extracts the base tag name from the data value of an element
192Parser.prototype.parseTagName = function Parser$parseTagName (data) {
193 if (data == null || data == "")
194 return("");
195 var match = Parser._reTagName.exec(data);
196 if (!match)
197 return("");
198 return((match[1] ? "/" : "") + match[2]);
199}
200
201//Parses through HTML text and returns an array of found elements
202//I admit, this function is rather large but splitting up had an noticeable impact on speed
203Parser.prototype.parseTags = function Parser$parseTags () {
204 var bufferEnd = this._buffer.length - 1;
205 while (Parser._reTags.test(this._buffer)) {
206 this._next = Parser._reTags.lastIndex - 1;
207 var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
208 var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse
209
210 //A new element to eventually be appended to the element list
211 var element = {
212 raw: rawData
213 , data: (this._parseState == ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
214 , type: this._parseState
215 };
216
217 var elementName = this.parseTagName(element.data);
218
219 //This section inspects the current tag stack and modifies the current
220 //element if we're actually parsing a special area (script/comment/style tag)
221 if (this._tagStack.length) { //We're parsing inside a script/comment/style tag
222 if (this._tagStack[this._tagStack.length - 1] == ElementType.Script) { //We're currently in a script tag
223 if (elementName == "/script") //Actually, we're no longer in a script tag, so pop it off the stack
224 this._tagStack.pop();
225 else { //Not a closing script tag
226 if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
227 //All data from here to script close is now a text element
228 element.type = ElementType.Text;
229 //If the previous element is text, append the current text to it
230 if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) {
231 var prevElement = this._elements[this._elements.length - 1];
232 prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
233 element.raw = element.data = ""; //This causes the current element to not be added to the element list
234 }
235 }
236 }
237 }
238 else if (this._tagStack[this._tagStack.length - 1] == ElementType.Style) { //We're currently in a style tag
239 if (elementName == "/style") //Actually, we're no longer in a style tag, so pop it off the stack
240 this._tagStack.pop();
241 else {
242 if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
243 //All data from here to style close is now a text element
244 element.type = ElementType.Text;
245 //If the previous element is text, append the current text to it
246 if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) {
247 if (element.raw != "") {
248 var prevElement = this._elements[this._elements.length - 1];
249 prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
250 element.raw = element.data = ""; //This causes the current element to not be added to the element list
251 }
252 else{ //Element is empty, so just append the last tag marker found
253 if (prevElement) {
254 prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep;
255 }
256 }
257 }
258 else //The previous element was not text
259 if (element.raw != "")
260 element.raw = element.data = element.raw;
261 }
262 }
263 }
264 else if (this._tagStack[this._tagStack.length - 1] == ElementType.Comment) { //We're currently in a comment tag
265 var rawLen = element.raw.length;
266 if (element.raw.charAt(rawLen - 2) == "-" && element.raw.charAt(rawLen - 1) == "-" && tagSep == ">") {
267 //Actually, we're no longer in a style tag, so pop it off the stack
268 this._tagStack.pop();
269 //If the previous element is a comment, append the current text to it
270 if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Comment) {
271 var prevElement = this._elements[this._elements.length - 1];
272 prevElement.raw = prevElement.data = (prevElement.raw + element.raw).replace(Parser._reTrimComment, "");
273 element.raw = element.data = ""; //This causes the current element to not be added to the element list
274 element.type = ElementType.Text;
275 }
276 else //Previous element not a comment
277 element.type = ElementType.Comment; //Change the current element's type to a comment
278 }
279 else { //Still in a comment tag
280 element.type = ElementType.Comment;
281 //If the previous element is a comment, append the current text to it
282 if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Comment) {
283 var prevElement = this._elements[this._elements.length - 1];
284 prevElement.raw = prevElement.data = prevElement.raw + element.raw + tagSep;
285 element.raw = element.data = ""; //This causes the current element to not be added to the element list
286 element.type = ElementType.Text;
287 }
288 else
289 element.raw = element.data = element.raw + tagSep;
290 }
291 }
292 }
293
294 //Processing of non-special tags
295 if (element.type == ElementType.Tag) {
296 element.name = elementName;
297
298 if (element.raw.indexOf("!--") == 0) { //This tag is really comment
299 element.type = ElementType.Comment;
300 delete element["name"];
301 var rawLen = element.raw.length;
302 //Check if the comment is terminated in the current element
303 if (element.raw.charAt(rawLen - 1) == "-" && element.raw.charAt(rawLen - 2) == "-" && tagSep == ">")
304 element.raw = element.data = element.raw.replace(Parser._reTrimComment, "");
305 else { //It's not so push the comment onto the tag stack
306 element.raw += tagSep;
307 this._tagStack.push(ElementType.Comment);
308 }
309 }
310 else if (element.raw.indexOf("!") == 0 || element.raw.indexOf("?") == 0) {
311 element.type = ElementType.Directive;
312 //TODO: what about CDATA?
313 }
314 else if (element.name == "script") {
315 element.type = ElementType.Script;
316 //Special tag, push onto the tag stack if not terminated
317 if (element.data.charAt(element.data.length - 1) != "/")
318 this._tagStack.push(ElementType.Script);
319 }
320 else if (element.name == "/script")
321 element.type = ElementType.Script;
322 else if (element.name == "style") {
323 element.type = ElementType.Style;
324 //Special tag, push onto the tag stack if not terminated
325 if (element.data.charAt(element.data.length - 1) != "/")
326 this._tagStack.push(ElementType.Style);
327 }
328 else if (element.name == "/style")
329 element.type = ElementType.Style;
330 if (element.name && element.name.charAt(0) == "/")
331 element.data = element.name;
332 }
333
334 //Add all tags and non-empty text elements to the element list
335 if (element.raw != "" || element.type != ElementType.Text) {
336 this.parseAttribs(element);
337 this._elements.push(element);
338 //If tag self-terminates, add an explicit, separate closing tag
339 if (
340 element.type != ElementType.Text
341 &&
342 element.type != ElementType.Comment
343 &&
344 element.type != ElementType.Directive
345 &&
346 element.data.charAt(element.data.length - 1) == "/"
347 )
348 this._elements.push({
349 raw: "/" + element.name
350 , data: "/" + element.name
351 , name: "/" + element.name
352 , type: element.type
353 });
354 }
355 this._parseState = (tagSep == "<") ? ElementType.Tag : ElementType.Text;
356 this._current = this._next + 1;
357 this._prevTagSep = tagSep;
358 }
359
360 this._buffer = (this._current <= bufferEnd) ? this._buffer.substring(this._current) : "";
361 this._current = 0;
362
363 this.writeHandler();
364}
365
366//Checks the handler to make it is an object with the right "interface"
367Parser.prototype.validateHandler = function Parser$validateHandler (handler) {
368 if ((typeof handler) != "object")
369 throw new Error("Handler is not an object");
370 if ((typeof handler.reset) != "function")
371 throw new Error("Handler method 'reset' is invalid");
372 if ((typeof handler.done) != "function")
373 throw new Error("Handler method 'done' is invalid");
374 if ((typeof handler.writeTag) != "function")
375 throw new Error("Handler method 'writeTag' is invalid");
376 if ((typeof handler.writeText) != "function")
377 throw new Error("Handler method 'writeText' is invalid");
378 if ((typeof handler.writeComment) != "function")
379 throw new Error("Handler method 'writeComment' is invalid");
380 if ((typeof handler.writeDirective) != "function")
381 throw new Error("Handler method 'writeDirective' is invalid");
382}
383
384//Writes parsed elements out to the handler
385Parser.prototype.writeHandler = function Parser$writeHandler (forceFlush) {
386 forceFlush = !!forceFlush;
387 if (this._tagStack.length && !forceFlush)
388 return;
389 while (this._elements.length) {
390 var element = this._elements.shift();
391 switch (element.type) {
392 case ElementType.Comment:
393 this._handler.writeComment(element);
394 break;
395 case ElementType.Directive:
396 this._handler.writeDirective(element);
397 break;
398 case ElementType.Text:
399 this._handler.writeText(element);
400 break;
401 default:
402 this._handler.writeTag(element);
403 break;
404 }
405 }
406}
407
408Parser.prototype.handleError = function Parser$handleError (error) {
409 if ((typeof this._handler.error) == "function")
410 this._handler.error(error);
411 else
412 throw error;
413}
414
415//TODO: make this a trully streamable handler
416function RssHandler (callback) {
417 RssHandler.super_.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
418}
419inherits(RssHandler, DefaultHandler);
420
421RssHandler.prototype.done = function RssHandler$done () {
422 var feed = { };
423 var feedRoot;
424
425 var found = DomUtils.getElementsByTagName(function (value) { return(value == "rss" || value == "feed"); }, this.dom, false);
426 if (found.length) {
427 feedRoot = found[0];
428 }
429 if (feedRoot) {
430 if (feedRoot.name == "rss") {
431 feed.type = "rss";
432 feedRoot = feedRoot.children[0]; //<channel/>
433 feed.id = "";
434 try {
435 feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
436 } catch (ex) { }
437 try {
438 feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].children[0].data;
439 } catch (ex) { }
440 try {
441 feed.description = DomUtils.getElementsByTagName("description", feedRoot.children, false)[0].children[0].data;
442 } catch (ex) { }
443 try {
444 feed.updated = new Date(DomUtils.getElementsByTagName("lastBuildDate", feedRoot.children, false)[0].children[0].data);
445 } catch (ex) { }
446 try {
447 feed.author = DomUtils.getElementsByTagName("managingEditor", feedRoot.children, false)[0].children[0].data;
448 } catch (ex) { }
449 feed.items = [];
450 DomUtils.getElementsByTagName("item", feedRoot.children).forEach(function (item, index, list) {
451 var entry = {};
452 try {
453 entry.id = DomUtils.getElementsByTagName("guid", item.children, false)[0].children[0].data;
454 } catch (ex) { }
455 try {
456 entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
457 } catch (ex) { }
458 try {
459 entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].children[0].data;
460 } catch (ex) { }
461 try {
462 entry.description = DomUtils.getElementsByTagName("description", item.children, false)[0].children[0].data;
463 } catch (ex) { }
464 try {
465 entry.pubDate = new Date(DomUtils.getElementsByTagName("pubDate", item.children, false)[0].children[0].data);
466 } catch (ex) { }
467 feed.items.push(entry);
468 });
469 } else {
470 feed.type = "atom";
471 try {
472 feed.id = DomUtils.getElementsByTagName("id", feedRoot.children, false)[0].children[0].data;
473 } catch (ex) { }
474 try {
475 feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
476 } catch (ex) { }
477 try {
478 feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].attribs.href;
479 } catch (ex) { }
480 try {
481 feed.description = DomUtils.getElementsByTagName("subtitle", feedRoot.children, false)[0].children[0].data;
482 } catch (ex) { }
483 try {
484 feed.updated = new Date(DomUtils.getElementsByTagName("updated", feedRoot.children, false)[0].children[0].data);
485 } catch (ex) { }
486 try {
487 feed.author = DomUtils.getElementsByTagName("email", feedRoot.children, true)[0].children[0].data;
488 } catch (ex) { }
489 feed.items = [];
490 DomUtils.getElementsByTagName("entry", feedRoot.children).forEach(function (item, index, list) {
491 var entry = {};
492 try {
493 entry.id = DomUtils.getElementsByTagName("id", item.children, false)[0].children[0].data;
494 } catch (ex) { }
495 try {
496 entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
497 } catch (ex) { }
498 try {
499 entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].attribs.href;
500 } catch (ex) { }
501 try {
502 entry.description = DomUtils.getElementsByTagName("summary", item.children, false)[0].children[0].data;
503 } catch (ex) { }
504 try {
505 entry.pubDate = new Date(DomUtils.getElementsByTagName("updated", item.children, false)[0].children[0].data);
506 } catch (ex) { }
507 feed.items.push(entry);
508 });
509 }
510
511 this.dom = feed;
512 }
513 RssHandler.super_.prototype.done.call(this);
514}
515
516///////////////////////////////////////////////////
517
518function DefaultHandler (callback, options) {
519 this.reset();
520 this._options = options ? options : { };
521 if (this._options.ignoreWhitespace == undefined)
522 this._options.ignoreWhitespace = false; //Keep whitespace-only text nodes
523 if (this._options.verbose == undefined)
524 this._options.verbose = true; //Keep data property for tags and raw property for all
525 if (this._options.enforceEmptyTags == undefined)
526 this._options.enforceEmptyTags = true; //Don't allow children for HTML tags defined as empty in spec
527 if ((typeof callback) == "function")
528 this._callback = callback;
529}
530
531//**"Static"**//
532//HTML Tags that shouldn't contain child nodes
533DefaultHandler._emptyTags = {
534 area: 1
535 , base: 1
536 , basefont: 1
537 , br: 1
538 , col: 1
539 , frame: 1
540 , hr: 1
541 , img: 1
542 , input: 1
543 , isindex: 1
544 , link: 1
545 , meta: 1
546 , param: 1
547 , embed: 1
548}
549//Regex to detect whitespace only text nodes
550DefaultHandler.reWhitespace = /^\s*$/;
551
552//**Public**//
553//Properties//
554DefaultHandler.prototype.dom = null; //The hierarchical object containing the parsed HTML
555//Methods//
556//Resets the handler back to starting state
557DefaultHandler.prototype.reset = function DefaultHandler$reset() {
558 this.dom = [];
559 this._done = false;
560 this._tagStack = [];
561 this._tagStack.last = function DefaultHandler$_tagStack$last () {
562 return(this.length ? this[this.length - 1] : null);
563 }
564}
565//Signals the handler that parsing is done
566DefaultHandler.prototype.done = function DefaultHandler$done () {
567 this._done = true;
568 this.handleCallback(null);
569}
570DefaultHandler.prototype.writeTag = function DefaultHandler$writeTag (element) {
571 this.handleElement(element);
572}
573DefaultHandler.prototype.writeText = function DefaultHandler$writeText (element) {
574 if (this._options.ignoreWhitespace)
575 if (DefaultHandler.reWhitespace.test(element.data))
576 return;
577 this.handleElement(element);
578}
579DefaultHandler.prototype.writeComment = function DefaultHandler$writeComment (element) {
580 this.handleElement(element);
581}
582DefaultHandler.prototype.writeDirective = function DefaultHandler$writeDirective (element) {
583 this.handleElement(element);
584}
585DefaultHandler.prototype.error = function DefaultHandler$error (error) {
586 this.handleCallback(error);
587}
588
589//**Private**//
590//Properties//
591DefaultHandler.prototype._options = null; //Handler options for how to behave
592DefaultHandler.prototype._callback = null; //Callback to respond to when parsing done
593DefaultHandler.prototype._done = false; //Flag indicating whether handler has been notified of parsing completed
594DefaultHandler.prototype._tagStack = null; //List of parents to the currently element being processed
595//Methods//
596DefaultHandler.prototype.handleCallback = function DefaultHandler$handleCallback (error) {
597 if ((typeof this._callback) != "function")
598 if (error)
599 throw error;
600 else
601 return;
602 this._callback(error, this.dom);
603}
604DefaultHandler.prototype.handleElement = function DefaultHandler$handleElement (element) {
605 if (this._done)
606 this.handleCallback(new Error("Writing to the handler after done() called is not allowed without a reset()"));
607 if (!this._options.verbose) {
608 // element.raw = null; //FIXME: Not clean
609 //FIXME: Serious performance problem using delete
610 delete element.raw;
611 if (element.type == "tag" || element.type == "script" || element.type == "style")
612 delete element.data;
613 }
614 if (!this._tagStack.last()) { //There are no parent elements
615 //If the element can be a container, add it to the tag stack and the top level list
616 if (element.type != ElementType.Text && element.type != ElementType.Comment && element.type != ElementType.Directive) {
617 if (element.name.charAt(0) != "/") { //Ignore closing tags that obviously don't have an opening tag
618 this.dom.push(element);
619 if (!this._options.enforceEmptyTags || !DefaultHandler._emptyTags[element.name]) { //Don't add tags to the tag stack that can't have children
620 this._tagStack.push(element);
621 }
622 }
623 }
624 else //Otherwise just add to the top level list
625 this.dom.push(element);
626 }
627 else { //There are parent elements
628 //If the element can be a container, add it as a child of the element
629 //on top of the tag stack and then add it to the tag stack
630 if (element.type != ElementType.Text && element.type != ElementType.Comment && element.type != ElementType.Directive) {
631 if (element.name.charAt(0) == "/") {
632 //This is a closing tag, scan the tagStack to find the matching opening tag
633 //and pop the stack up to the opening tag's parent
634 var baseName = element.name.substring(1);
635 if (!this._options.enforceEmptyTags || !DefaultHandler._emptyTags[baseName]) {
636 var pos = this._tagStack.length - 1;
637 while (pos > -1 && this._tagStack[pos--].name != baseName) { }
638 if (pos > -1 || this._tagStack[0].name == baseName)
639 while (pos < this._tagStack.length - 1)
640 this._tagStack.pop();
641 }
642 }
643 else { //This is not a closing tag
644 if (!this._tagStack.last().children)
645 this._tagStack.last().children = [];
646 this._tagStack.last().children.push(element);
647 if (!this._options.enforceEmptyTags || !DefaultHandler._emptyTags[element.name]) //Don't add tags to the tag stack that can't have children
648 this._tagStack.push(element);
649 }
650 }
651 else { //This is not a container element
652 if (!this._tagStack.last().children)
653 this._tagStack.last().children = [];
654 this._tagStack.last().children.push(element);
655 }
656 }
657}
658
659var DomUtils = {
660 testElement: function DomUtils$testElement (options, element) {
661 if (!element) {
662 return false;
663 }
664
665 for (var key in options) {
666 if (key == "tag_name") {
667 if (element.type != "tag" && element.type != "script" && element.type != "style") {
668 return false;
669 }
670 if (!options["tag_name"](element.name)) {
671 return false;
672 }
673 } else if (key == "tag_type") {
674 if (!options["tag_type"](element.type)) {
675 return false;
676 }
677 } else if (key == "tag_contains") {
678 if (element.type != "text" && element.type != "comment" && element.type != "directive") {
679 return false;
680 }
681 if (!options["tag_contains"](element.data)) {
682 return false;
683 }
684 } else {
685 if (!element.attribs || !options[key](element.attribs[key])) {
686 return false;
687 }
688 }
689 }
690
691 return true;
692 }
693
694 , getElements: function DomUtils$getElements (options, currentElement, recurse, limit) {
695 recurse = (recurse === undefined || recurse === null) || !!recurse;
696 limit = isNaN(parseInt(limit)) ? -1 : parseInt(limit);
697
698 if (!currentElement) {
699 return([]);
700 }
701
702 var found = [];
703 var elementList;
704
705 function getTest (checkVal) {
706 return(function (value) { return(value == checkVal); });
707 }
708 for (var key in options) {
709 if ((typeof options[key]) != "function") {
710 options[key] = getTest(options[key]);
711 }
712 }
713
714 if (DomUtils.testElement(options, currentElement)) {
715 found.push(currentElement);
716 }
717
718 if (limit >= 0 && found.length >= limit) {
719 return(found);
720 }
721
722 if (recurse && currentElement.children) {
723 elementList = currentElement.children;
724 } else if (currentElement instanceof Array) {
725 elementList = currentElement;
726 } else {
727 return(found);
728 }
729
730 for (var i = 0; i < elementList.length; i++) {
731 found = found.concat(DomUtils.getElements(options, elementList[i], recurse, limit));
732 if (limit >= 0 && found.length >= limit) {
733 break;
734 }
735 }
736
737 return(found);
738 }
739
740 , getElementById: function DomUtils$getElementById (id, currentElement, recurse) {
741 var result = DomUtils.getElements({ id: id }, currentElement, recurse, 1);
742 return(result.length ? result[0] : null);
743 }
744
745 , getElementsByTagName: function DomUtils$getElementsByTagName (name, currentElement, recurse, limit) {
746 return(DomUtils.getElements({ tag_name: name }, currentElement, recurse, limit));
747 }
748
749 , getElementsByTagType: function DomUtils$getElementsByTagType (type, currentElement, recurse, limit) {
750 return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit));
751 }
752}
753
754function inherits (ctor, superCtor) {
755 var tempCtor = function(){};
756 tempCtor.prototype = superCtor.prototype;
757 ctor.super_ = superCtor;
758 ctor.prototype = new tempCtor();
759 ctor.prototype.constructor = ctor;
760}
761
762exports.Parser = Parser;
763
764exports.DefaultHandler = DefaultHandler;
765
766exports.RssHandler = RssHandler;
767
768exports.ElementType = ElementType;
769
770exports.DomUtils = DomUtils;
771
772})();