1 /*
  2 Copyright (c) 2003-2009, CKSource - Frederico Knabben. All rights reserved.
  3 For licensing, see LICENSE.html or http://ckeditor.com/license
  4 */
  5
  6 /**
  7  * HTML text parser.
  8  * @constructor
  9  * @example
 10  */
 11 CKEDITOR.htmlParser = function()
 12 {
 13 	this._ =
 14 	{
 15 		htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:[^"\'>]+)|(?:"[^"]*")|(?:\'[^\']*\'))*)\\/?>))', 'g' )
 16 	};
 17 };
 18
 19 (function()
 20 {
 21 	var attribsRegex	= /([\w:]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g,
 22 		emptyAttribs	= {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1};
 23
 24 	CKEDITOR.htmlParser.prototype =
 25 	{
 26 		/**
 27 		 * Function to be fired when a tag opener is found. This function
 28 		 * should be overriden when using this class.
 29 		 * @param {String} tagName The tag name. The name is guarantted to be
 30 		 *		lowercased.
 31 		 * @param {Object} attributes An object containing all tag attributes. Each
 32 		 *		property in this object represent and attribute name and its
 33 		 *		value is the attribute value.
 34 		 * @param {Boolean} selfClosing true if the tag closes itself, false if the
 35 		 * 		tag doesn't.
 36 		 * @example
 37 		 * var parser = new CKEDITOR.htmlParser();
 38 		 * parser.onTagOpen = function( tagName, attributes, selfClosing )
 39 		 *     {
 40 		 *         alert( tagName );  // e.g. "b"
 41 		 *     });
 42 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 43 		 */
 44 		onTagOpen	: function() {},
 45
 46 		/**
 47 		 * Function to be fired when a tag closer is found. This function
 48 		 * should be overriden when using this class.
 49 		 * @param {String} tagName The tag name. The name is guarantted to be
 50 		 *		lowercased.
 51 		 * @example
 52 		 * var parser = new CKEDITOR.htmlParser();
 53 		 * parser.onTagClose = function( tagName )
 54 		 *     {
 55 		 *         alert( tagName );  // e.g. "b"
 56 		 *     });
 57 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 58 		 */
 59 		onTagClose	: function() {},
 60
 61 		/**
 62 		 * Function to be fired when text is found. This function
 63 		 * should be overriden when using this class.
 64 		 * @param {String} text The text found.
 65 		 * @example
 66 		 * var parser = new CKEDITOR.htmlParser();
 67 		 * parser.onText = function( text )
 68 		 *     {
 69 		 *         alert( text );  // e.g. "Hello"
 70 		 *     });
 71 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 72 		 */
 73 		onText		: function() {},
 74
 75 		/**
 76 		 * Function to be fired when CDATA section is found. This function
 77 		 * should be overriden when using this class.
 78 		 * @param {String} cdata The CDATA been found.
 79 		 * @example
 80 		 * var parser = new CKEDITOR.htmlParser();
 81 		 * parser.onCDATA = function( cdata )
 82 		 *     {
 83 		 *         alert( cdata );  // e.g. "var hello;"
 84 		 *     });
 85 		 * parser.parse( "<script>var hello;</script>" );
 86 		 */
 87 		onCDATA		: function() {},
 88
 89 		/**
 90 		 * Function to be fired when a commend is found. This function
 91 		 * should be overriden when using this class.
 92 		 * @param {String} comment The comment text.
 93 		 * @example
 94 		 * var parser = new CKEDITOR.htmlParser();
 95 		 * parser.onText = function( comment )
 96 		 *     {
 97 		 *         alert( comment );  // e.g. " Example "
 98 		 *     });
 99 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
100 		 */
101 		onComment	: function() {},
102
103 		/**
104 		 * Parses text, looking for HTML tokens, like tag openers or closers,
105 		 * or comments. This function fires the onTagOpen, onTagClose, onText
106 		 * and onComment function during its execution.
107 		 * @param {String} html The HTML to be parsed.
108 		 * @example
109 		 * var parser = new CKEDITOR.htmlParser();
110 		 * // The onTagOpen, onTagClose, onText and onComment should be overriden
111 		 * // at this point.
112 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
113 		 */
114 		parse : function( html )
115 		{
116 			var parts,
117 				tagName,
118 				nextIndex = 0,
119 				cdata;	// The collected data inside a CDATA section.
120
121 			while ( ( parts = this._.htmlPartsRegex.exec( html ) ) )
122 			{
123 				var tagIndex = parts.index;
124 				if ( tagIndex > nextIndex )
125 				{
126 					var text = html.substring( nextIndex, tagIndex );
127
128 					if ( cdata )
129 						cdata.push( text );
130 					else
131 						this.onText( text );
132 				}
133
134 				nextIndex = this._.htmlPartsRegex.lastIndex;
135
136 				/*
137 				 "parts" is an array with the following items:
138 					0 : The entire match for opening/closing tags and comments.
139 					1 : Group filled with the tag name for closing tags.
140 					2 : Group filled with the comment text.
141 					3 : Group filled with the tag name for opening tags.
142 					4 : Group filled with the attributes part of opening tags.
143 				 */
144
145 				// Closing tag
146 				if ( ( tagName = parts[ 1 ] ) )
147 				{
148 					tagName = tagName.toLowerCase();
149
150 					if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] )
151 					{
152 						// Send the CDATA data.
153 						this.onCDATA( cdata.join('') );
154 						cdata = null;
155 					}
156
157 					if ( !cdata )
158 					{
159 						this.onTagClose( tagName );
160 						continue;
161 					}
162 				}
163
164 				// If CDATA is enabled, just save the raw match.
165 				if ( cdata )
166 				{
167 					cdata.push( parts[ 0 ] );
168 					continue;
169 				}
170
171 				// Opening tag
172 				if ( ( tagName = parts[ 3 ] ) )
173 				{
174 					tagName = tagName.toLowerCase();
175 					var attribs = {},
176 						attribMatch,
177 						attribsPart = parts[ 4 ],
178 						selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' );
179
180 					if ( attribsPart )
181 					{
182 						while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) )
183 						{
184 							var attName = attribMatch[1].toLowerCase(),
185 								attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || '';
186
187 							if ( !attValue && emptyAttribs[ attName ] )
188 								attribs[ attName ] = attName;
189 							else
190 								attribs[ attName ] = attValue;
191 						}
192 					}
193
194 					this.onTagOpen( tagName, attribs, selfClosing );
195
196 					// Open CDATA mode when finding the appropriate tags.
197 					if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] )
198 						cdata = [];
199
200 					continue;
201 				}
202
203 				// Comment
204 				if( ( tagName = parts[ 2 ] ) )
205 					this.onComment( tagName );
206 			}
207
208 			if ( html.length > nextIndex )
209 				this.onText( html.substring( nextIndex, html.length ) );
210 		}
211 	};
212 })();
213