1 /*
  2 Copyright (c) 2003-2012, CKSource - Frederico Knabben. All rights reserved.
  3 For licensing, see LICENSE.html or http://ckeditor.com/license
  4 */
  5 
  6 /**
  7  * Creates a {@link CKEDITOR.htmlParser} class instance.
  8  * @class Provides an "event like" system to parse strings of HTML data.
  9  * @example
 10  * var parser = new CKEDITOR.htmlParser();
 11  * parser.onTagOpen = function( tagName, attributes, selfClosing )
 12  *     {
 13  *         alert( tagName );
 14  *     };
 15  * parser.parse( '<p>Some <b>text</b>.</p>' );
 16  */
 17 CKEDITOR.htmlParser = function()
 18 {
 19 	this._ =
 20 	{
 21 		htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:"[^"]*")|(?:\'[^\']*\')|[^"\'>])*)\\/?>))', 'g' )
 22 	};
 23 };
 24 
 25 (function()
 26 {
 27 	var attribsRegex	= /([\w\-:.]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g,
 28 		emptyAttribs	= {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1};
 29 
 30 	CKEDITOR.htmlParser.prototype =
 31 	{
 32 		/**
 33 		 * Function to be fired when a tag opener is found. This function
 34 		 * should be overriden when using this class.
 35 		 * @param {String} tagName The tag name. The name is guarantted to be
 36 		 *		lowercased.
 37 		 * @param {Object} attributes An object containing all tag attributes. Each
 38 		 *		property in this object represent and attribute name and its
 39 		 *		value is the attribute value.
 40 		 * @param {Boolean} selfClosing true if the tag closes itself, false if the
 41 		 * 		tag doesn't.
 42 		 * @example
 43 		 * var parser = new CKEDITOR.htmlParser();
 44 		 * parser.onTagOpen = function( tagName, attributes, selfClosing )
 45 		 *     {
 46 		 *         alert( tagName );  // e.g. "b"
 47 		 *     });
 48 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 49 		 */
 50 		onTagOpen	: function() {},
 51 
 52 		/**
 53 		 * Function to be fired when a tag closer is found. This function
 54 		 * should be overriden when using this class.
 55 		 * @param {String} tagName The tag name. The name is guarantted to be
 56 		 *		lowercased.
 57 		 * @example
 58 		 * var parser = new CKEDITOR.htmlParser();
 59 		 * parser.onTagClose = function( tagName )
 60 		 *     {
 61 		 *         alert( tagName );  // e.g. "b"
 62 		 *     });
 63 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 64 		 */
 65 		onTagClose	: function() {},
 66 
 67 		/**
 68 		 * Function to be fired when text is found. This function
 69 		 * should be overriden when using this class.
 70 		 * @param {String} text The text found.
 71 		 * @example
 72 		 * var parser = new CKEDITOR.htmlParser();
 73 		 * parser.onText = function( text )
 74 		 *     {
 75 		 *         alert( text );  // e.g. "Hello"
 76 		 *     });
 77 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 78 		 */
 79 		onText		: function() {},
 80 
 81 		/**
 82 		 * Function to be fired when CDATA section is found. This function
 83 		 * should be overriden when using this class.
 84 		 * @param {String} cdata The CDATA been found.
 85 		 * @example
 86 		 * var parser = new CKEDITOR.htmlParser();
 87 		 * parser.onCDATA = function( cdata )
 88 		 *     {
 89 		 *         alert( cdata );  // e.g. "var hello;"
 90 		 *     });
 91 		 * parser.parse( "<script>var hello;</script>" );
 92 		 */
 93 		onCDATA		: function() {},
 94 
 95 		/**
 96 		 * Function to be fired when a commend is found. This function
 97 		 * should be overriden when using this class.
 98 		 * @param {String} comment The comment text.
 99 		 * @example
100 		 * var parser = new CKEDITOR.htmlParser();
101 		 * parser.onComment = function( comment )
102 		 *     {
103 		 *         alert( comment );  // e.g. " Example "
104 		 *     });
105 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
106 		 */
107 		onComment	: function() {},
108 
109 		/**
110 		 * Parses text, looking for HTML tokens, like tag openers or closers,
111 		 * or comments. This function fires the onTagOpen, onTagClose, onText
112 		 * and onComment function during its execution.
113 		 * @param {String} html The HTML to be parsed.
114 		 * @example
115 		 * var parser = new CKEDITOR.htmlParser();
116 		 * // The onTagOpen, onTagClose, onText and onComment should be overriden
117 		 * // at this point.
118 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
119 		 */
120 		parse : function( html )
121 		{
122 			var parts,
123 				tagName,
124 				nextIndex = 0,
125 				cdata;	// The collected data inside a CDATA section.
126 
127 			while ( ( parts = this._.htmlPartsRegex.exec( html ) ) )
128 			{
129 				var tagIndex = parts.index;
130 				if ( tagIndex > nextIndex )
131 				{
132 					var text = html.substring( nextIndex, tagIndex );
133 
134 					if ( cdata )
135 						cdata.push( text );
136 					else
137 						this.onText( text );
138 				}
139 
140 				nextIndex = this._.htmlPartsRegex.lastIndex;
141 
142 				/*
143 				 "parts" is an array with the following items:
144 					0 : The entire match for opening/closing tags and comments.
145 					1 : Group filled with the tag name for closing tags.
146 					2 : Group filled with the comment text.
147 					3 : Group filled with the tag name for opening tags.
148 					4 : Group filled with the attributes part of opening tags.
149 				 */
150 
151 				// Closing tag
152 				if ( ( tagName = parts[ 1 ] ) )
153 				{
154 					tagName = tagName.toLowerCase();
155 
156 					if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] )
157 					{
158 						// Send the CDATA data.
159 						this.onCDATA( cdata.join('') );
160 						cdata = null;
161 					}
162 
163 					if ( !cdata )
164 					{
165 						this.onTagClose( tagName );
166 						continue;
167 					}
168 				}
169 
170 				// If CDATA is enabled, just save the raw match.
171 				if ( cdata )
172 				{
173 					cdata.push( parts[ 0 ] );
174 					continue;
175 				}
176 
177 				// Opening tag
178 				if ( ( tagName = parts[ 3 ] ) )
179 				{
180 					tagName = tagName.toLowerCase();
181 
182 					// There are some tag names that can break things, so let's
183 					// simply ignore them when parsing. (#5224)
184 					if ( /="/.test( tagName ) )
185 						continue;
186 
187 					var attribs = {},
188 						attribMatch,
189 						attribsPart = parts[ 4 ],
190 						selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' );
191 
192 					if ( attribsPart )
193 					{
194 						while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) )
195 						{
196 							var attName = attribMatch[1].toLowerCase(),
197 								attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || '';
198 
199 							if ( !attValue && emptyAttribs[ attName ] )
200 								attribs[ attName ] = attName;
201 							else
202 								attribs[ attName ] = attValue;
203 						}
204 					}
205 
206 					this.onTagOpen( tagName, attribs, selfClosing );
207 
208 					// Open CDATA mode when finding the appropriate tags.
209 					if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] )
210 						cdata = [];
211 
212 					continue;
213 				}
214 
215 				// Comment
216 				if ( ( tagName = parts[ 2 ] ) )
217 					this.onComment( tagName );
218 			}
219 
220 			if ( html.length > nextIndex )
221 				this.onText( html.substring( nextIndex, html.length ) );
222 		}
223 	};
224 })();
225