1 /* 2 Copyright (c) 2003-2009, CKSource - Frederico Knabben. All rights reserved. 3 For licensing, see LICENSE.html or http://ckeditor.com/license 4 */ 5 6 /** 7 * HTML text parser. 8 * @constructor 9 * @example 10 */ 11 CKEDITOR.htmlParser = function() 12 { 13 this._ = 14 { 15 htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:[^"\'>]+)|(?:"[^"]*")|(?:\'[^\']*\'))*)\\/?>))', 'g' ) 16 }; 17 }; 18 19 (function() 20 { 21 var attribsRegex = /([\w:]+)(?:(?:\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+)))|(?=\s|$))/g, 22 emptyAttribs = {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1}; 23 24 CKEDITOR.htmlParser.prototype = 25 { 26 /** 27 * Function to be fired when a tag opener is found. This function 28 * should be overriden when using this class. 29 * @param {String} tagName The tag name. The name is guarantted to be 30 * lowercased. 31 * @param {Object} attributes An object containing all tag attributes. Each 32 * property in this object represent and attribute name and its 33 * value is the attribute value. 34 * @param {Boolean} selfClosing true if the tag closes itself, false if the 35 * tag doesn't. 36 * @example 37 * var parser = new CKEDITOR.htmlParser(); 38 * parser.onTagOpen = function( tagName, attributes, selfClosing ) 39 * { 40 * alert( tagName ); // e.g. "b" 41 * }); 42 * parser.parse( "<!-- Example --><b>Hello</b>" ); 43 */ 44 onTagOpen : function() {}, 45 46 /** 47 * Function to be fired when a tag closer is found. This function 48 * should be overriden when using this class. 49 * @param {String} tagName The tag name. The name is guarantted to be 50 * lowercased. 51 * @example 52 * var parser = new CKEDITOR.htmlParser(); 53 * parser.onTagClose = function( tagName ) 54 * { 55 * alert( tagName ); // e.g. "b" 56 * }); 57 * parser.parse( "<!-- Example --><b>Hello</b>" ); 58 */ 59 onTagClose : function() {}, 60 61 /** 62 * Function to be fired when text is found. This function 63 * should be overriden when using this class. 64 * @param {String} text The text found. 65 * @example 66 * var parser = new CKEDITOR.htmlParser(); 67 * parser.onText = function( text ) 68 * { 69 * alert( text ); // e.g. "Hello" 70 * }); 71 * parser.parse( "<!-- Example --><b>Hello</b>" ); 72 */ 73 onText : function() {}, 74 75 /** 76 * Function to be fired when CDATA section is found. This function 77 * should be overriden when using this class. 78 * @param {String} cdata The CDATA been found. 79 * @example 80 * var parser = new CKEDITOR.htmlParser(); 81 * parser.onCDATA = function( cdata ) 82 * { 83 * alert( cdata ); // e.g. "var hello;" 84 * }); 85 * parser.parse( "<script>var hello;</script>" ); 86 */ 87 onCDATA : function() {}, 88 89 /** 90 * Function to be fired when a commend is found. This function 91 * should be overriden when using this class. 92 * @param {String} comment The comment text. 93 * @example 94 * var parser = new CKEDITOR.htmlParser(); 95 * parser.onText = function( comment ) 96 * { 97 * alert( comment ); // e.g. " Example " 98 * }); 99 * parser.parse( "<!-- Example --><b>Hello</b>" ); 100 */ 101 onComment : function() {}, 102 103 /** 104 * Parses text, looking for HTML tokens, like tag openers or closers, 105 * or comments. This function fires the onTagOpen, onTagClose, onText 106 * and onComment function during its execution. 107 * @param {String} html The HTML to be parsed. 108 * @example 109 * var parser = new CKEDITOR.htmlParser(); 110 * // The onTagOpen, onTagClose, onText and onComment should be overriden 111 * // at this point. 112 * parser.parse( "<!-- Example --><b>Hello</b>" ); 113 */ 114 parse : function( html ) 115 { 116 var parts, 117 tagName, 118 nextIndex = 0, 119 cdata; // The collected data inside a CDATA section. 120 121 while ( ( parts = this._.htmlPartsRegex.exec( html ) ) ) 122 { 123 var tagIndex = parts.index; 124 if ( tagIndex > nextIndex ) 125 { 126 var text = html.substring( nextIndex, tagIndex ); 127 128 if ( cdata ) 129 cdata.push( text ); 130 else 131 this.onText( text ); 132 } 133 134 nextIndex = this._.htmlPartsRegex.lastIndex; 135 136 /* 137 "parts" is an array with the following items: 138 0 : The entire match for opening/closing tags and comments. 139 1 : Group filled with the tag name for closing tags. 140 2 : Group filled with the comment text. 141 3 : Group filled with the tag name for opening tags. 142 4 : Group filled with the attributes part of opening tags. 143 */ 144 145 // Closing tag 146 if ( ( tagName = parts[ 1 ] ) ) 147 { 148 tagName = tagName.toLowerCase(); 149 150 if ( cdata && CKEDITOR.dtd.$cdata[ tagName ] ) 151 { 152 // Send the CDATA data. 153 this.onCDATA( cdata.join('') ); 154 cdata = null; 155 } 156 157 if ( !cdata ) 158 { 159 this.onTagClose( tagName ); 160 continue; 161 } 162 } 163 164 // If CDATA is enabled, just save the raw match. 165 if ( cdata ) 166 { 167 cdata.push( parts[ 0 ] ); 168 continue; 169 } 170 171 // Opening tag 172 if ( ( tagName = parts[ 3 ] ) ) 173 { 174 tagName = tagName.toLowerCase(); 175 var attribs = {}, 176 attribMatch, 177 attribsPart = parts[ 4 ], 178 selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' ); 179 180 if ( attribsPart ) 181 { 182 while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) ) 183 { 184 var attName = attribMatch[1].toLowerCase(), 185 attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || ''; 186 187 if ( !attValue && emptyAttribs[ attName ] ) 188 attribs[ attName ] = attName; 189 else 190 attribs[ attName ] = attValue; 191 } 192 } 193 194 this.onTagOpen( tagName, attribs, selfClosing ); 195 196 // Open CDATA mode when finding the appropriate tags. 197 if ( !cdata && CKEDITOR.dtd.$cdata[ tagName ] ) 198 cdata = []; 199 200 continue; 201 } 202 203 // Comment 204 if( ( tagName = parts[ 2 ] ) ) 205 this.onComment( tagName ); 206 } 207 208 if ( html.length > nextIndex ) 209 this.onText( html.substring( nextIndex, html.length ) ); 210 } 211 }; 212 })(); 213