1 /*
  2 Copyright (c) 2003-2009, CKSource - Frederico Knabben. All rights reserved.
  3 For licensing, see LICENSE.html or http://ckeditor.com/license
  4 */
  5
  6 /**
  7  * A lightweight representation of an HTML DOM structure.
  8  * @constructor
  9  * @example
 10  */
 11 CKEDITOR.htmlParser.fragment = function()
 12 {
 13 	/**
 14 	 * The nodes contained in the root of this fragment.
 15 	 * @type Array
 16 	 * @example
 17 	 * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );
 18 	 * alert( fragment.children.length );  "2"
 19 	 */
 20 	this.children = [];
 21
 22 	/**
 23 	 * Get the fragment parent. Should always be null.
 24 	 * @type Object
 25 	 * @default null
 26 	 * @example
 27 	 */
 28 	this.parent = null;
 29
 30 	/** @private */
 31 	this._ =
 32 	{
 33 		isBlockLike : true,
 34 		hasInlineStarted : false
 35 	};
 36 };
 37
 38 (function()
 39 {
 40 	// Elements which the end tag is marked as optional in the HTML 4.01 DTD
 41 	// (expect empty elements).
 42 	var optionalClose = {colgroup:1,dd:1,dt:1,li:1,option:1,p:1,td:1,tfoot:1,th:1,thead:1,tr:1};
 43
 44 	// Block-level elements whose internal structure should be respected during
 45 	// parser fixing.
 46 	var nonBreakingBlocks = CKEDITOR.tools.extend(
 47 			{table:1,ul:1,ol:1,dl:1},
 48 			CKEDITOR.dtd.table, CKEDITOR.dtd.ul, CKEDITOR.dtd.ol, CKEDITOR.dtd.dl );
 49
 50 	/**
 51 	 * Creates a {@link CKEDITOR.htmlParser.fragment} from an HTML string.
 52 	 * @param {String} fragmentHtml The HTML to be parsed, filling the fragment.
 53 	 * @param {Number} [fixForBody=false] Wrap body with specified element if needed.
 54 	 * @returns CKEDITOR.htmlParser.fragment The fragment created.
 55 	 * @example
 56 	 * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<b>Sample</b> Text' );
 57 	 * alert( fragment.children[0].name );  "b"
 58 	 * alert( fragment.children[1].value );  " Text"
 59 	 */
 60 	CKEDITOR.htmlParser.fragment.fromHtml = function( fragmentHtml, fixForBody )
 61 	{
 62 		var parser = new CKEDITOR.htmlParser(),
 63 			html = [],
 64 			fragment = new CKEDITOR.htmlParser.fragment(),
 65 			pendingInline = [],
 66 			currentNode = fragment,
 67 		    // Indicate we're inside a <pre> element, spaces should be touched differently.
 68 			inPre = false,
 69 			returnPoint;
 70
 71 		function checkPending( newTagName )
 72 		{
 73 			if ( pendingInline.length > 0 )
 74 			{
 75 				for ( var i = 0 ; i < pendingInline.length ; i++ )
 76 				{
 77 					var pendingElement = pendingInline[ i ],
 78 						pendingName = pendingElement.name,
 79 						pendingDtd = CKEDITOR.dtd[ pendingName ],
 80 						currentDtd = currentNode.name && CKEDITOR.dtd[ currentNode.name ];
 81
 82 					if ( ( !currentDtd || currentDtd[ pendingName ] ) && ( !newTagName || !pendingDtd || pendingDtd[ newTagName ] || !CKEDITOR.dtd[ newTagName ] ) )
 83 					{
 84 						// Get a clone for the pending element.
 85 						pendingElement = pendingElement.clone();
 86
 87 						// Add it to the current node and make it the current,
 88 						// so the new element will be added inside of it.
 89 						pendingElement.parent = currentNode;
 90 						currentNode = pendingElement;
 91
 92 						// Remove the pending element (back the index by one
 93 						// to properly process the next entry).
 94 						pendingInline.splice( i, 1 );
 95 						i--;
 96 					}
 97 				}
 98 			}
 99 		}
100
101 		function addElement( element, target, enforceCurrent )
102 		{
103 			target = target || currentNode || fragment;
104
105 			// If the target is the fragment and this element can't go inside
106 			// body (if fixForBody).
107 			if ( fixForBody && !target.type && !CKEDITOR.dtd.$body[ element.name ] )
108 			{
109 				var savedCurrent = currentNode;
110
111 				// Create a <p> in the fragment.
112 				currentNode = target;
113 				parser.onTagOpen( fixForBody, {} );
114
115 				// The new target now is the <p>.
116 				target = currentNode;
117
118 				if ( enforceCurrent )
119 					currentNode = savedCurrent;
120 			}
121
122 			// Rtrim empty spaces on block end boundary. (#3585)
123 			if ( element._.isBlockLike
124 				 && !inPre )
125 			{
126
127 				var length = element.children.length,
128 					lastChild = element.children[ length - 1 ],
129 					text;
130 				if ( lastChild && lastChild.type == CKEDITOR.NODE_TEXT )
131 				{
132 					if ( !( text = CKEDITOR.tools.rtrim( lastChild.value ) ) )
133 						element.children.length = length -1;
134 					else
135 						lastChild.value = text;
136 				}
137 			}
138
139 			target.add( element );
140
141 			if ( element.returnPoint )
142 			{
143 				currentNode = element.returnPoint;
144 				delete element.returnPoint;
145 			}
146 		}
147
148 		parser.onTagOpen = function( tagName, attributes, selfClosing )
149 		{
150 			var element = new CKEDITOR.htmlParser.element( tagName, attributes );
151
152 			// "isEmpty" will be always "false" for unknown elements, so we
153 			// must force it if the parser has identified it as a selfClosing tag.
154 			if ( element.isUnknown && selfClosing )
155 				element.isEmpty = true;
156
157 			// This is a tag to be removed if empty, so do not add it immediately.
158 			if ( CKEDITOR.dtd.$removeEmpty[ tagName ] )
159 			{
160 				pendingInline.push( element );
161 				return;
162 			}
163 			else if ( tagName == 'pre' )
164 				inPre = true;
165 			else if ( tagName == 'br' && inPre )
166 			{
167 				currentNode.add( new CKEDITOR.htmlParser.text( '\n' ) );
168 				return;
169 			}
170
171 			var currentName = currentNode.name,
172 				currentDtd = ( currentName && CKEDITOR.dtd[ currentName ] ) || ( currentNode._.isBlockLike ? CKEDITOR.dtd.div : CKEDITOR.dtd.span );
173
174 			// If the element cannot be child of the current element.
175 			if ( !element.isUnknown && !currentNode.isUnknown && !currentDtd[ tagName ] )
176 			{
177 				// If this is the fragment node, just ignore this tag and add
178 				// its children.
179 				if ( !currentName )
180 					return;
181
182 				var reApply = false;
183
184 				// If the element name is the same as the current element name,
185 				// then just close the current one and append the new one to the
186 				// parent. This situation usually happens with <p>, <li>, <dt> and
187 				// <dd>, specially in IE. Do not enter in this if block in this case.
188 				if ( tagName == currentName )
189 				{
190 					addElement( currentNode, currentNode.parent );
191 				}
192 				else
193 				{
194 					if ( nonBreakingBlocks[ currentName ] )
195 					{
196 						if ( !returnPoint )
197 							returnPoint = currentNode;
198 					}
199 					else
200 					{
201 						addElement( currentNode, currentNode.parent, true );
202
203 						if ( !optionalClose[ currentName ] )
204 						{
205 							// The current element is an inline element, which
206 							// cannot hold the new one. Put it in the pending list,
207 							// and try adding the new one after it.
208 							pendingInline.unshift( currentNode );
209 						}
210 					}
211
212 					reApply = true;
213 				}
214
215 				// In any of the above cases, we'll be adding, or trying to
216 				// add it to the parent.
217 				currentNode = currentNode.returnPoint || currentNode.parent;
218
219 				if ( reApply )
220 				{
221 					parser.onTagOpen.apply( this, arguments );
222 					return;
223 				}
224 			}
225
226 			checkPending( tagName );
227
228 			element.parent = currentNode;
229 			element.returnPoint = returnPoint;
230 			returnPoint = 0;
231
232 			if ( element.isEmpty )
233 				addElement( element );
234 			else
235 				currentNode = element;
236 		};
237
238 		parser.onTagClose = function( tagName )
239 		{
240 			var index = 0,
241 				pendingAdd = [],
242 				candidate = currentNode;
243
244 			while ( candidate.type && candidate.name != tagName )
245 			{
246 				// If this is an inline element, add it to the pending list, so
247 				// it will continue after the closing tag.
248 				if ( !candidate._.isBlockLike )
249 				{
250 					pendingInline.unshift( candidate );
251
252 					// Increase the index, so it will not get checked again in
253 					// the pending list check that follows.
254 					index++;
255 				}
256
257 				// This node should be added to it's parent at this point. But,
258 				// it should happen only if the closing tag is really closing
259 				// one of the nodes. So, for now, we just cache it.
260 				pendingAdd.push( candidate );
261
262 				candidate = candidate.parent;
263 			}
264
265 			if ( candidate.type )
266 			{
267 				// Add all elements that have been found in the above loop.
268 				for ( var i = 0 ; i < pendingAdd.length ; i++ )
269 				{
270 					var node = pendingAdd[ i ];
271 					addElement( node, node.parent );
272 				}
273
274 				currentNode = candidate;
275
276 				if( currentNode.name == 'pre' )
277 					inPre = false;
278
279 				addElement( candidate, candidate.parent );
280
281 				// The parent should start receiving new nodes now, except if
282 				// addElement changed the currentNode.
283 				if ( candidate == currentNode )
284 					currentNode = currentNode.parent;
285 			}
286
287 			// Check if there is any pending tag to be closed.
288 			for ( ; index < pendingInline.length ; index++ )
289 			{
290 				// If found, just remove it from the list.
291 				if ( tagName == pendingInline[ index ].name )
292 				{
293 					pendingInline.splice( index, 1 );
294
295 					// Decrease the index so we continue from the next one.
296 					index--;
297 				}
298 			}
299 		};
300
301 		parser.onText = function( text )
302 		{
303 			// Trim empty spaces at beginning of element contents except <pre>.
304 			if ( !currentNode._.hasInlineStarted && !inPre )
305 			{
306 				text = CKEDITOR.tools.ltrim( text );
307
308 				if ( text.length === 0 )
309 					return;
310 			}
311
312 			checkPending();
313
314 			if ( fixForBody && !currentNode.type )
315 				this.onTagOpen( fixForBody, {} );
316
317 			// Shrinking consequential spaces into one single for all elements
318 			// text contents.
319 			if ( !inPre )
320 				text = text.replace( /[\t\r\n ]{2,}|[\t\r\n]/g, ' ' );
321
322 			currentNode.add( new CKEDITOR.htmlParser.text( text ) );
323 		};
324
325 		parser.onCDATA = function( cdata )
326 		{
327 			currentNode.add( new CKEDITOR.htmlParser.cdata( cdata ) );
328 		};
329
330 		parser.onComment = function( comment )
331 		{
332 			currentNode.add( new CKEDITOR.htmlParser.comment( comment ) );
333 		};
334
335 		// Parse it.
336 		parser.parse( fragmentHtml );
337
338 		// Close all pending nodes.
339 		while ( currentNode.type )
340 		{
341 			var parent = currentNode.parent,
342 				node = currentNode;
343
344 			if ( fixForBody && !parent.type && !CKEDITOR.dtd.$body[ node.name ] )
345 			{
346 				currentNode = parent;
347 				parser.onTagOpen( fixForBody, {} );
348 				parent = currentNode;
349 			}
350
351 			parent.add( node );
352 			currentNode = parent;
353 		}
354
355 		return fragment;
356 	};
357
358 	CKEDITOR.htmlParser.fragment.prototype =
359 	{
360 		/**
361 		 * Adds a node to this fragment.
362 		 * @param {Object} node The node to be added. It can be any of of the
363 		 *		following types: {@link CKEDITOR.htmlParser.element},
364 		 *		{@link CKEDITOR.htmlParser.text} and
365 		 *		{@link CKEDITOR.htmlParser.comment}.
366 		 * @example
367 		 */
368 		add : function( node )
369 		{
370 			var len = this.children.length,
371 				previous = len > 0 && this.children[ len - 1 ] || null;
372
373 			if ( previous )
374 			{
375 				// If the block to be appended is following text, trim spaces at
376 				// the right of it.
377 				if ( node._.isBlockLike && previous.type == CKEDITOR.NODE_TEXT )
378 				{
379 					previous.value = CKEDITOR.tools.rtrim( previous.value );
380
381 					// If we have completely cleared the previous node.
382 					if ( previous.value.length === 0 )
383 					{
384 						// Remove it from the list and add the node again.
385 						this.children.pop();
386 						this.add( node );
387 						return;
388 					}
389 				}
390
391 				previous.next = node;
392 			}
393
394 			node.previous = previous;
395 			node.parent = this;
396
397 			this.children.push( node );
398
399 			this._.hasInlineStarted = node.type == CKEDITOR.NODE_TEXT || ( node.type == CKEDITOR.NODE_ELEMENT && !node._.isBlockLike );
400 		},
401
402 		/**
403 		 * Writes the fragment HTML to a CKEDITOR.htmlWriter.
404 		 * @param {CKEDITOR.htmlWriter} writer The writer to which write the HTML.
405 		 * @example
406 		 * var writer = new CKEDITOR.htmlWriter();
407 		 * var fragment = CKEDITOR.htmlParser.fragment.fromHtml( '<P><B>Example' );
408 		 * fragment.writeHtml( writer )
409 		 * alert( writer.getHtml() );  "<p><b>Example</b></p>"
410 		 */
411 		writeHtml : function( writer, filter )
412 		{
413 			for ( var i = 0, len = this.children.length ; i < len ; i++ )
414 				this.children[i].writeHtml( writer, filter );
415 		}
416 	};
417 })();
418