beautify-html.js 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006
  1. /*jshint curly:true, eqeqeq:true, laxbreak:true, noempty:false */
  2. /*
  3. The MIT License (MIT)
  4. Copyright (c) 2007-2013 Einar Lielmanis and contributors.
  5. Permission is hereby granted, free of charge, to any person
  6. obtaining a copy of this software and associated documentation files
  7. (the "Software"), to deal in the Software without restriction,
  8. including without limitation the rights to use, copy, modify, merge,
  9. publish, distribute, sublicense, and/or sell copies of the Software,
  10. and to permit persons to whom the Software is furnished to do so,
  11. subject to the following conditions:
  12. The above copyright notice and this permission notice shall be
  13. included in all copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  15. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  17. NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  18. BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  19. ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  20. CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21. SOFTWARE.
  22. Style HTML
  23. ---------------
  24. Written by Nochum Sossonko, (nsossonko@hotmail.com)
  25. Based on code initially developed by: Einar Lielmanis, <einar@jsbeautifier.org>
  26. http://jsbeautifier.org/
  27. Usage:
  28. style_html(html_source);
  29. style_html(html_source, options);
  30. The options are:
  31. indent_inner_html (default false) — indent <head> and <body> sections,
  32. indent_size (default 4) — indentation size,
  33. indent_char (default space) — character to indent with,
  34. wrap_line_length (default 250) - maximum amount of characters per line (0 = disable)
  35. brace_style (default "collapse") - "collapse" | "expand" | "end-expand" | "none"
  36. put braces on the same line as control statements (default), or put braces on own line (Allman / ANSI style), or just put end braces on own line, or attempt to keep them where they are.
  37. unformatted (defaults to inline tags) - list of tags, that shouldn't be reformatted
  38. indent_scripts (default normal) - "keep"|"separate"|"normal"
  39. preserve_newlines (default true) - whether existing line breaks before elements should be preserved
  40. Only works before elements, not inside tags or for text.
  41. max_preserve_newlines (default unlimited) - maximum number of line breaks to be preserved in one chunk
  42. indent_handlebars (default false) - format and indent {{#foo}} and {{/foo}}
  43. end_with_newline (false) - end with a newline
  44. extra_liners (default [head,body,/html]) -List of tags that should have an extra newline before them.
  45. e.g.
  46. style_html(html_source, {
  47. 'indent_inner_html': false,
  48. 'indent_size': 2,
  49. 'indent_char': ' ',
  50. 'wrap_line_length': 78,
  51. 'brace_style': 'expand',
  52. 'preserve_newlines': true,
  53. 'max_preserve_newlines': 5,
  54. 'indent_handlebars': false,
  55. 'extra_liners': ['/html']
  56. });
  57. */
  58. (function() {
  59. // function trim(s) {
  60. // return s.replace(/^\s+|\s+$/g, '');
  61. // }
  62. function ltrim(s) {
  63. return s.replace(/^\s+/g, '');
  64. }
  65. function rtrim(s) {
  66. return s.replace(/\s+$/g, '');
  67. }
  68. function style_html(html_source, options, js_beautify, css_beautify) {
  69. //Wrapper function to invoke all the necessary constructors and deal with the output.
  70. var multi_parser,
  71. indent_inner_html,
  72. indent_size,
  73. indent_character,
  74. wrap_line_length,
  75. brace_style,
  76. unformatted,
  77. preserve_newlines,
  78. max_preserve_newlines,
  79. indent_handlebars,
  80. wrap_attributes,
  81. wrap_attributes_indent_size,
  82. end_with_newline,
  83. extra_liners,
  84. eol;
  85. options = options || {};
  86. // backwards compatibility to 1.3.4
  87. if ((options.wrap_line_length === undefined || parseInt(options.wrap_line_length, 10) === 0) &&
  88. (options.max_char !== undefined && parseInt(options.max_char, 10) !== 0)) {
  89. options.wrap_line_length = options.max_char;
  90. }
  91. indent_inner_html = (options.indent_inner_html === undefined) ? false : options.indent_inner_html;
  92. indent_size = (options.indent_size === undefined) ? 4 : parseInt(options.indent_size, 10);
  93. indent_character = (options.indent_char === undefined) ? ' ' : options.indent_char;
  94. brace_style = (options.brace_style === undefined) ? 'collapse' : options.brace_style;
  95. wrap_line_length = parseInt(options.wrap_line_length, 10) === 0 ? 32786 : parseInt(options.wrap_line_length || 250, 10);
  96. unformatted = options.unformatted || [
  97. // https://www.w3.org/TR/html5/dom.html#phrasing-content
  98. 'a', 'abbr', 'area', 'audio', 'b', 'bdi', 'bdo', 'br', 'button', 'canvas', 'cite',
  99. 'code', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', 'iframe', 'img',
  100. 'input', 'ins', 'kbd', 'keygen', 'label', 'map', 'mark', 'math', 'meter', 'noscript',
  101. 'object', 'output', 'progress', 'q', 'ruby', 's', 'samp', /* 'script', */ 'select', 'small',
  102. 'span', 'strong', 'sub', 'sup', 'svg', 'template', 'textarea', 'time', 'u', 'var',
  103. 'video', 'wbr', 'text',
  104. // prexisting - not sure of full effect of removing, leaving in
  105. 'acronym', 'address', 'big', 'dt', 'ins', 'small', 'strike', 'tt',
  106. 'pre',
  107. 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'
  108. ];
  109. preserve_newlines = (options.preserve_newlines === undefined) ? true : options.preserve_newlines;
  110. max_preserve_newlines = preserve_newlines ?
  111. (isNaN(parseInt(options.max_preserve_newlines, 10)) ? 32786 : parseInt(options.max_preserve_newlines, 10)) :
  112. 0;
  113. indent_handlebars = (options.indent_handlebars === undefined) ? false : options.indent_handlebars;
  114. wrap_attributes = (options.wrap_attributes === undefined) ? 'auto' : options.wrap_attributes;
  115. wrap_attributes_indent_size = (isNaN(parseInt(options.wrap_attributes_indent_size, 10))) ? indent_size : parseInt(options.wrap_attributes_indent_size, 10);
  116. end_with_newline = (options.end_with_newline === undefined) ? false : options.end_with_newline;
  117. extra_liners = (typeof options.extra_liners === 'object') && options.extra_liners ?
  118. options.extra_liners.concat() : (typeof options.extra_liners === 'string') ?
  119. options.extra_liners.split(',') : 'head,body,/html'.split(',');
  120. eol = options.eol ? options.eol : '\n';
  121. if (options.indent_with_tabs) {
  122. indent_character = '\t';
  123. indent_size = 1;
  124. }
  125. eol = eol.replace(/\\r/, '\r').replace(/\\n/, '\n');
  126. function Parser() {
  127. this.pos = 0; //Parser position
  128. this.token = '';
  129. this.current_mode = 'CONTENT'; //reflects the current Parser mode: TAG/CONTENT
  130. this.tags = { //An object to hold tags, their position, and their parent-tags, initiated with default values
  131. parent: 'parent1',
  132. parentcount: 1,
  133. parent1: ''
  134. };
  135. this.tag_type = '';
  136. this.token_text = this.last_token = this.last_text = this.token_type = '';
  137. this.newlines = 0;
  138. this.indent_content = indent_inner_html;
  139. this.Utils = { //Uilities made available to the various functions
  140. whitespace: "\n\r\t ".split(''),
  141. single_token: [
  142. // HTLM void elements - aka self-closing tags - aka singletons
  143. // https://www.w3.org/html/wg/drafts/html/master/syntax.html#void-elements
  144. 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen',
  145. 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr',
  146. // NOTE: Optional tags - are not understood.
  147. // https://www.w3.org/TR/html5/syntax.html#optional-tags
  148. // The rules for optional tags are too complex for a simple list
  149. // Also, the content of these tags should still be indented in many cases.
  150. // 'li' is a good exmple.
  151. // Doctype and xml elements
  152. '!doctype', '?xml',
  153. // ?php tag
  154. '?php',
  155. // other tags that were in this list, keeping just in case
  156. 'basefont', 'isindex'
  157. ],
  158. extra_liners: extra_liners, //for tags that need a line of whitespace before them
  159. in_array: function(what, arr) {
  160. for (var i = 0; i < arr.length; i++) {
  161. if (what === arr[i]) {
  162. return true;
  163. }
  164. }
  165. return false;
  166. }
  167. };
  168. // Return true if the given text is composed entirely of whitespace.
  169. this.is_whitespace = function(text) {
  170. for (var n = 0; n < text.length; n++) {
  171. if (!this.Utils.in_array(text.charAt(n), this.Utils.whitespace)) {
  172. return false;
  173. }
  174. }
  175. return true;
  176. };
  177. this.traverse_whitespace = function() {
  178. var input_char = '';
  179. input_char = this.input.charAt(this.pos);
  180. if (this.Utils.in_array(input_char, this.Utils.whitespace)) {
  181. this.newlines = 0;
  182. while (this.Utils.in_array(input_char, this.Utils.whitespace)) {
  183. if (preserve_newlines && input_char === '\n' && this.newlines <= max_preserve_newlines) {
  184. this.newlines += 1;
  185. }
  186. this.pos++;
  187. input_char = this.input.charAt(this.pos);
  188. }
  189. return true;
  190. }
  191. return false;
  192. };
  193. // Append a space to the given content (string array) or, if we are
  194. // at the wrap_line_length, append a newline/indentation.
  195. // return true if a newline was added, false if a space was added
  196. this.space_or_wrap = function(content) {
  197. if (this.line_char_count >= this.wrap_line_length) { //insert a line when the wrap_line_length is reached
  198. this.print_newline(false, content);
  199. this.print_indentation(content);
  200. return true;
  201. } else {
  202. this.line_char_count++;
  203. content.push(' ');
  204. return false;
  205. }
  206. };
  207. this.get_content = function() { //function to capture regular content between tags
  208. var input_char = '',
  209. content = [];
  210. while (this.input.charAt(this.pos) !== '<') {
  211. if (this.pos >= this.input.length) {
  212. return content.length ? content.join('') : ['', 'TK_EOF'];
  213. }
  214. if (this.traverse_whitespace()) {
  215. this.space_or_wrap(content);
  216. continue;
  217. }
  218. if (indent_handlebars) {
  219. // Handlebars parsing is complicated.
  220. // {{#foo}} and {{/foo}} are formatted tags.
  221. // {{something}} should get treated as content, except:
  222. // {{else}} specifically behaves like {{#if}} and {{/if}}
  223. var peek3 = this.input.substr(this.pos, 3);
  224. if (peek3 === '{{#' || peek3 === '{{/') {
  225. // These are tags and not content.
  226. break;
  227. } else if (peek3 === '{{!') {
  228. return [this.get_tag(), 'TK_TAG_HANDLEBARS_COMMENT'];
  229. } else if (this.input.substr(this.pos, 2) === '{{') {
  230. if (this.get_tag(true) === '{{else}}') {
  231. break;
  232. }
  233. }
  234. }
  235. input_char = this.input.charAt(this.pos);
  236. this.pos++;
  237. this.line_char_count++;
  238. content.push(input_char); //letter at-a-time (or string) inserted to an array
  239. }
  240. return content.length ? content.join('') : '';
  241. };
  242. this.get_contents_to = function(name) { //get the full content of a script or style to pass to js_beautify
  243. if (this.pos === this.input.length) {
  244. return ['', 'TK_EOF'];
  245. }
  246. var content = '';
  247. var reg_match = new RegExp('</' + name + '\\s*>', 'igm');
  248. reg_match.lastIndex = this.pos;
  249. var reg_array = reg_match.exec(this.input);
  250. var end_script = reg_array ? reg_array.index : this.input.length; //absolute end of script
  251. if (this.pos < end_script) { //get everything in between the script tags
  252. content = this.input.substring(this.pos, end_script);
  253. this.pos = end_script;
  254. }
  255. return content;
  256. };
  257. this.record_tag = function(tag) { //function to record a tag and its parent in this.tags Object
  258. if (this.tags[tag + 'count']) { //check for the existence of this tag type
  259. this.tags[tag + 'count']++;
  260. this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
  261. } else { //otherwise initialize this tag type
  262. this.tags[tag + 'count'] = 1;
  263. this.tags[tag + this.tags[tag + 'count']] = this.indent_level; //and record the present indent level
  264. }
  265. this.tags[tag + this.tags[tag + 'count'] + 'parent'] = this.tags.parent; //set the parent (i.e. in the case of a div this.tags.div1parent)
  266. this.tags.parent = tag + this.tags[tag + 'count']; //and make this the current parent (i.e. in the case of a div 'div1')
  267. };
  268. this.retrieve_tag = function(tag) { //function to retrieve the opening tag to the corresponding closer
  269. if (this.tags[tag + 'count']) { //if the openener is not in the Object we ignore it
  270. var temp_parent = this.tags.parent; //check to see if it's a closable tag.
  271. while (temp_parent) { //till we reach '' (the initial value);
  272. if (tag + this.tags[tag + 'count'] === temp_parent) { //if this is it use it
  273. break;
  274. }
  275. temp_parent = this.tags[temp_parent + 'parent']; //otherwise keep on climbing up the DOM Tree
  276. }
  277. if (temp_parent) { //if we caught something
  278. this.indent_level = this.tags[tag + this.tags[tag + 'count']]; //set the indent_level accordingly
  279. this.tags.parent = this.tags[temp_parent + 'parent']; //and set the current parent
  280. }
  281. delete this.tags[tag + this.tags[tag + 'count'] + 'parent']; //delete the closed tags parent reference...
  282. delete this.tags[tag + this.tags[tag + 'count']]; //...and the tag itself
  283. if (this.tags[tag + 'count'] === 1) {
  284. delete this.tags[tag + 'count'];
  285. } else {
  286. this.tags[tag + 'count']--;
  287. }
  288. }
  289. };
  290. this.indent_to_tag = function(tag) {
  291. // Match the indentation level to the last use of this tag, but don't remove it.
  292. if (!this.tags[tag + 'count']) {
  293. return;
  294. }
  295. var temp_parent = this.tags.parent;
  296. while (temp_parent) {
  297. if (tag + this.tags[tag + 'count'] === temp_parent) {
  298. break;
  299. }
  300. temp_parent = this.tags[temp_parent + 'parent'];
  301. }
  302. if (temp_parent) {
  303. this.indent_level = this.tags[tag + this.tags[tag + 'count']];
  304. }
  305. };
  306. this.get_tag = function(peek) { //function to get a full tag and parse its type
  307. var input_char = '',
  308. content = [],
  309. comment = '',
  310. space = false,
  311. first_attr = true,
  312. tag_start, tag_end,
  313. tag_start_char,
  314. orig_pos = this.pos,
  315. orig_line_char_count = this.line_char_count;
  316. peek = peek !== undefined ? peek : false;
  317. do {
  318. if (this.pos >= this.input.length) {
  319. if (peek) {
  320. this.pos = orig_pos;
  321. this.line_char_count = orig_line_char_count;
  322. }
  323. return content.length ? content.join('') : ['', 'TK_EOF'];
  324. }
  325. input_char = this.input.charAt(this.pos);
  326. this.pos++;
  327. if (this.Utils.in_array(input_char, this.Utils.whitespace)) { //don't want to insert unnecessary space
  328. space = true;
  329. continue;
  330. }
  331. if (input_char === "'" || input_char === '"') {
  332. input_char += this.get_unformatted(input_char);
  333. space = true;
  334. }
  335. if (input_char === '=') { //no space before =
  336. space = false;
  337. }
  338. if (content.length && content[content.length - 1] !== '=' && input_char !== '>' && space) {
  339. //no space after = or before >
  340. var wrapped = this.space_or_wrap(content);
  341. var indentAttrs = wrapped && input_char !== '/' && wrap_attributes !== 'force';
  342. space = false;
  343. if (!first_attr && wrap_attributes === 'force' && input_char !== '/') {
  344. this.print_newline(false, content);
  345. this.print_indentation(content);
  346. indentAttrs = true;
  347. }
  348. if (indentAttrs) {
  349. //indent attributes an auto or forced line-wrap
  350. for (var count = 0; count < wrap_attributes_indent_size; count++) {
  351. content.push(indent_character);
  352. }
  353. }
  354. for (var i = 0; i < content.length; i++) {
  355. if (content[i] === ' ') {
  356. first_attr = false;
  357. break;
  358. }
  359. }
  360. }
  361. if (indent_handlebars && tag_start_char === '<') {
  362. // When inside an angle-bracket tag, put spaces around
  363. // handlebars not inside of strings.
  364. if ((input_char + this.input.charAt(this.pos)) === '{{') {
  365. input_char += this.get_unformatted('}}');
  366. if (content.length && content[content.length - 1] !== ' ' && content[content.length - 1] !== '<') {
  367. input_char = ' ' + input_char;
  368. }
  369. space = true;
  370. }
  371. }
  372. if (input_char === '<' && !tag_start_char) {
  373. tag_start = this.pos - 1;
  374. tag_start_char = '<';
  375. }
  376. if (indent_handlebars && !tag_start_char) {
  377. if (content.length >= 2 && content[content.length - 1] === '{' && content[content.length - 2] === '{') {
  378. if (input_char === '#' || input_char === '/' || input_char === '!') {
  379. tag_start = this.pos - 3;
  380. } else {
  381. tag_start = this.pos - 2;
  382. }
  383. tag_start_char = '{';
  384. }
  385. }
  386. this.line_char_count++;
  387. content.push(input_char); //inserts character at-a-time (or string)
  388. if (content[1] && (content[1] === '!' || content[1] === '?' || content[1] === '%')) { //if we're in a comment, do something special
  389. // We treat all comments as literals, even more than preformatted tags
  390. // we just look for the appropriate close tag
  391. content = [this.get_comment(tag_start)];
  392. break;
  393. }
  394. if (indent_handlebars && content[1] && content[1] === '{' && content[2] && content[2] === '!') { //if we're in a comment, do something special
  395. // We treat all comments as literals, even more than preformatted tags
  396. // we just look for the appropriate close tag
  397. content = [this.get_comment(tag_start)];
  398. break;
  399. }
  400. if (indent_handlebars && tag_start_char === '{' && content.length > 2 && content[content.length - 2] === '}' && content[content.length - 1] === '}') {
  401. break;
  402. }
  403. } while (input_char !== '>');
  404. var tag_complete = content.join('');
  405. var tag_index;
  406. var tag_offset;
  407. if (tag_complete.indexOf(' ') !== -1) { //if there's whitespace, thats where the tag name ends
  408. tag_index = tag_complete.indexOf(' ');
  409. } else if (tag_complete.charAt(0) === '{') {
  410. tag_index = tag_complete.indexOf('}');
  411. } else { //otherwise go with the tag ending
  412. tag_index = tag_complete.indexOf('>');
  413. }
  414. if (tag_complete.charAt(0) === '<' || !indent_handlebars) {
  415. tag_offset = 1;
  416. } else {
  417. tag_offset = tag_complete.charAt(2) === '#' ? 3 : 2;
  418. }
  419. var tag_check = tag_complete.substring(tag_offset, tag_index).toLowerCase();
  420. if (tag_complete.charAt(tag_complete.length - 2) === '/' ||
  421. this.Utils.in_array(tag_check, this.Utils.single_token)) { //if this tag name is a single tag type (either in the list or has a closing /)
  422. if (!peek) {
  423. this.tag_type = 'SINGLE';
  424. }
  425. } else if (indent_handlebars && tag_complete.charAt(0) === '{' && tag_check === 'else') {
  426. if (!peek) {
  427. this.indent_to_tag('if');
  428. this.tag_type = 'HANDLEBARS_ELSE';
  429. this.indent_content = true;
  430. this.traverse_whitespace();
  431. }
  432. } else if (this.is_unformatted(tag_check, unformatted)) { // do not reformat the "unformatted" tags
  433. comment = this.get_unformatted('</' + tag_check + '>', tag_complete); //...delegate to get_unformatted function
  434. content.push(comment);
  435. tag_end = this.pos - 1;
  436. this.tag_type = 'SINGLE';
  437. } else if (tag_check === 'script' &&
  438. (tag_complete.search('type') === -1 ||
  439. (tag_complete.search('type') > -1 &&
  440. tag_complete.search(/\b(text|application)\/(x-)?(javascript|ecmascript|jscript|livescript|(ld\+)?json)/) > -1))) {
  441. if (!peek) {
  442. this.record_tag(tag_check);
  443. this.tag_type = 'SCRIPT';
  444. }
  445. } else if (tag_check === 'style' &&
  446. (tag_complete.search('type') === -1 ||
  447. (tag_complete.search('type') > -1 && tag_complete.search('text/css') > -1))) {
  448. if (!peek) {
  449. this.record_tag(tag_check);
  450. this.tag_type = 'STYLE';
  451. }
  452. } else if (tag_check.charAt(0) === '!') { //peek for <! comment
  453. // for comments content is already correct.
  454. if (!peek) {
  455. this.tag_type = 'SINGLE';
  456. this.traverse_whitespace();
  457. }
  458. } else if (!peek) {
  459. if (tag_check.charAt(0) === '/') { //this tag is a double tag so check for tag-ending
  460. this.retrieve_tag(tag_check.substring(1)); //remove it and all ancestors
  461. this.tag_type = 'END';
  462. } else { //otherwise it's a start-tag
  463. this.record_tag(tag_check); //push it on the tag stack
  464. if (tag_check.toLowerCase() !== 'html') {
  465. this.indent_content = true;
  466. }
  467. this.tag_type = 'START';
  468. }
  469. // Allow preserving of newlines after a start or end tag
  470. if (this.traverse_whitespace()) {
  471. this.space_or_wrap(content);
  472. }
  473. if (this.Utils.in_array(tag_check, this.Utils.extra_liners)) { //check if this double needs an extra line
  474. this.print_newline(false, this.output);
  475. if (this.output.length && this.output[this.output.length - 2] !== '\n') {
  476. this.print_newline(true, this.output);
  477. }
  478. }
  479. }
  480. if (peek) {
  481. this.pos = orig_pos;
  482. this.line_char_count = orig_line_char_count;
  483. }
  484. return content.join(''); //returns fully formatted tag
  485. };
  486. this.get_comment = function(start_pos) { //function to return comment content in its entirety
  487. // this is will have very poor perf, but will work for now.
  488. var comment = '',
  489. delimiter = '>',
  490. matched = false;
  491. this.pos = start_pos;
  492. var input_char = this.input.charAt(this.pos);
  493. this.pos++;
  494. while (this.pos <= this.input.length) {
  495. comment += input_char;
  496. // only need to check for the delimiter if the last chars match
  497. if (comment.charAt(comment.length - 1) === delimiter.charAt(delimiter.length - 1) &&
  498. comment.indexOf(delimiter) !== -1) {
  499. break;
  500. }
  501. // only need to search for custom delimiter for the first few characters
  502. if (!matched && comment.length < 10) {
  503. if (comment.indexOf('<![if') === 0) { //peek for <![if conditional comment
  504. delimiter = '<![endif]>';
  505. matched = true;
  506. } else if (comment.indexOf('<![cdata[') === 0) { //if it's a <[cdata[ comment...
  507. delimiter = ']]>';
  508. matched = true;
  509. } else if (comment.indexOf('<![') === 0) { // some other ![ comment? ...
  510. delimiter = ']>';
  511. matched = true;
  512. } else if (comment.indexOf('<!--') === 0) { // <!-- comment ...
  513. delimiter = '-->';
  514. matched = true;
  515. } else if (comment.indexOf('{{!') === 0) { // {{! handlebars comment
  516. delimiter = '}}';
  517. matched = true;
  518. } else if (comment.indexOf('<?') === 0) { // {{! handlebars comment
  519. delimiter = '?>';
  520. matched = true;
  521. } else if (comment.indexOf('<%') === 0) { // {{! handlebars comment
  522. delimiter = '%>';
  523. matched = true;
  524. }
  525. }
  526. input_char = this.input.charAt(this.pos);
  527. this.pos++;
  528. }
  529. return comment;
  530. };
  531. function tokenMatcher(delimiter) {
  532. var token = '';
  533. var add = function(str) {
  534. var newToken = token + str.toLowerCase();
  535. token = newToken.length <= delimiter.length ? newToken : newToken.substr(newToken.length - delimiter.length, delimiter.length);
  536. };
  537. var doesNotMatch = function() {
  538. return token.indexOf(delimiter) === -1;
  539. };
  540. return {
  541. add: add,
  542. doesNotMatch: doesNotMatch
  543. };
  544. }
  545. this.get_unformatted = function(delimiter, orig_tag) { //function to return unformatted content in its entirety
  546. if (orig_tag && orig_tag.toLowerCase().indexOf(delimiter) !== -1) {
  547. return '';
  548. }
  549. var input_char = '';
  550. var content = '';
  551. var space = true;
  552. var delimiterMatcher = tokenMatcher(delimiter);
  553. do {
  554. if (this.pos >= this.input.length) {
  555. return content;
  556. }
  557. input_char = this.input.charAt(this.pos);
  558. this.pos++;
  559. if (this.Utils.in_array(input_char, this.Utils.whitespace)) {
  560. if (!space) {
  561. this.line_char_count--;
  562. continue;
  563. }
  564. if (input_char === '\n' || input_char === '\r') {
  565. content += '\n';
  566. /* Don't change tab indention for unformatted blocks. If using code for html editing, this will greatly affect <pre> tags if they are specified in the 'unformatted array'
  567. for (var i=0; i<this.indent_level; i++) {
  568. content += this.indent_string;
  569. }
  570. space = false; //...and make sure other indentation is erased
  571. */
  572. this.line_char_count = 0;
  573. continue;
  574. }
  575. }
  576. content += input_char;
  577. delimiterMatcher.add(input_char);
  578. this.line_char_count++;
  579. space = true;
  580. if (indent_handlebars && input_char === '{' && content.length && content.charAt(content.length - 2) === '{') {
  581. // Handlebars expressions in strings should also be unformatted.
  582. content += this.get_unformatted('}}');
  583. // Don't consider when stopping for delimiters.
  584. }
  585. } while (delimiterMatcher.doesNotMatch());
  586. return content;
  587. };
  588. this.get_token = function() { //initial handler for token-retrieval
  589. var token;
  590. if (this.last_token === 'TK_TAG_SCRIPT' || this.last_token === 'TK_TAG_STYLE') { //check if we need to format javascript
  591. var type = this.last_token.substr(7);
  592. token = this.get_contents_to(type);
  593. if (typeof token !== 'string') {
  594. return token;
  595. }
  596. return [token, 'TK_' + type];
  597. }
  598. if (this.current_mode === 'CONTENT') {
  599. token = this.get_content();
  600. if (typeof token !== 'string') {
  601. return token;
  602. } else {
  603. return [token, 'TK_CONTENT'];
  604. }
  605. }
  606. if (this.current_mode === 'TAG') {
  607. token = this.get_tag();
  608. if (typeof token !== 'string') {
  609. return token;
  610. } else {
  611. var tag_name_type = 'TK_TAG_' + this.tag_type;
  612. return [token, tag_name_type];
  613. }
  614. }
  615. };
  616. this.get_full_indent = function(level) {
  617. level = this.indent_level + level || 0;
  618. if (level < 1) {
  619. return '';
  620. }
  621. return Array(level + 1).join(this.indent_string);
  622. };
  623. this.is_unformatted = function(tag_check, unformatted) {
  624. //is this an HTML5 block-level link?
  625. if (!this.Utils.in_array(tag_check, unformatted)) {
  626. return false;
  627. }
  628. if (tag_check.toLowerCase() !== 'a' || !this.Utils.in_array('a', unformatted)) {
  629. return true;
  630. }
  631. //at this point we have an tag; is its first child something we want to remain
  632. //unformatted?
  633. var next_tag = this.get_tag(true /* peek. */ );
  634. // test next_tag to see if it is just html tag (no external content)
  635. var tag = (next_tag || "").match(/^\s*<\s*\/?([a-z]*)\s*[^>]*>\s*$/);
  636. // if next_tag comes back but is not an isolated tag, then
  637. // let's treat the 'a' tag as having content
  638. // and respect the unformatted option
  639. if (!tag || this.Utils.in_array(tag, unformatted)) {
  640. return true;
  641. } else {
  642. return false;
  643. }
  644. };
  645. this.printer = function(js_source, indent_character, indent_size, wrap_line_length, brace_style) { //handles input/output and some other printing functions
  646. this.input = js_source || ''; //gets the input for the Parser
  647. // HACK: newline parsing inconsistent. This brute force normalizes the input.
  648. this.input = this.input.replace(/\r\n|[\r\u2028\u2029]/g, '\n');
  649. this.output = [];
  650. this.indent_character = indent_character;
  651. this.indent_string = '';
  652. this.indent_size = indent_size;
  653. this.brace_style = brace_style;
  654. this.indent_level = 0;
  655. this.wrap_line_length = wrap_line_length;
  656. this.line_char_count = 0; //count to see if wrap_line_length was exceeded
  657. for (var i = 0; i < this.indent_size; i++) {
  658. this.indent_string += this.indent_character;
  659. }
  660. this.print_newline = function(force, arr) {
  661. this.line_char_count = 0;
  662. if (!arr || !arr.length) {
  663. return;
  664. }
  665. if (force || (arr[arr.length - 1] !== '\n')) { //we might want the extra line
  666. if ((arr[arr.length - 1] !== '\n')) {
  667. arr[arr.length - 1] = rtrim(arr[arr.length - 1]);
  668. }
  669. arr.push('\n');
  670. }
  671. };
  672. this.print_indentation = function(arr) {
  673. for (var i = 0; i < this.indent_level; i++) {
  674. arr.push(this.indent_string);
  675. this.line_char_count += this.indent_string.length;
  676. }
  677. };
  678. this.print_token = function(text) {
  679. // Avoid printing initial whitespace.
  680. if (this.is_whitespace(text) && !this.output.length) {
  681. return;
  682. }
  683. if (text || text !== '') {
  684. if (this.output.length && this.output[this.output.length - 1] === '\n') {
  685. this.print_indentation(this.output);
  686. text = ltrim(text);
  687. }
  688. }
  689. this.print_token_raw(text);
  690. };
  691. this.print_token_raw = function(text) {
  692. // If we are going to print newlines, truncate trailing
  693. // whitespace, as the newlines will represent the space.
  694. if (this.newlines > 0) {
  695. text = rtrim(text);
  696. }
  697. if (text && text !== '') {
  698. if (text.length > 1 && text.charAt(text.length - 1) === '\n') {
  699. // unformatted tags can grab newlines as their last character
  700. this.output.push(text.slice(0, -1));
  701. this.print_newline(false, this.output);
  702. } else {
  703. this.output.push(text);
  704. }
  705. }
  706. for (var n = 0; n < this.newlines; n++) {
  707. this.print_newline(n > 0, this.output);
  708. }
  709. this.newlines = 0;
  710. };
  711. this.indent = function() {
  712. this.indent_level++;
  713. };
  714. this.unindent = function() {
  715. if (this.indent_level > 0) {
  716. this.indent_level--;
  717. }
  718. };
  719. };
  720. return this;
  721. }
  722. /*_____________________--------------------_____________________*/
  723. multi_parser = new Parser(); //wrapping functions Parser
  724. multi_parser.printer(html_source, indent_character, indent_size, wrap_line_length, brace_style); //initialize starting values
  725. while (true) {
  726. var t = multi_parser.get_token();
  727. multi_parser.token_text = t[0];
  728. multi_parser.token_type = t[1];
  729. if (multi_parser.token_type === 'TK_EOF') {
  730. break;
  731. }
  732. switch (multi_parser.token_type) {
  733. case 'TK_TAG_START':
  734. multi_parser.print_newline(false, multi_parser.output);
  735. multi_parser.print_token(multi_parser.token_text);
  736. if (multi_parser.indent_content) {
  737. multi_parser.indent();
  738. multi_parser.indent_content = false;
  739. }
  740. multi_parser.current_mode = 'CONTENT';
  741. break;
  742. case 'TK_TAG_STYLE':
  743. case 'TK_TAG_SCRIPT':
  744. multi_parser.print_newline(false, multi_parser.output);
  745. multi_parser.print_token(multi_parser.token_text);
  746. multi_parser.current_mode = 'CONTENT';
  747. break;
  748. case 'TK_TAG_END':
  749. //Print new line only if the tag has no content and has child
  750. if (multi_parser.last_token === 'TK_CONTENT' && multi_parser.last_text === '') {
  751. var tag_name = multi_parser.token_text.match(/\w+/)[0];
  752. var tag_extracted_from_last_output = null;
  753. if (multi_parser.output.length) {
  754. tag_extracted_from_last_output = multi_parser.output[multi_parser.output.length - 1].match(/(?:<|{{#)\s*(\w+)/);
  755. }
  756. if (tag_extracted_from_last_output === null ||
  757. (tag_extracted_from_last_output[1] !== tag_name && !multi_parser.Utils.in_array(tag_extracted_from_last_output[1], unformatted))) {
  758. multi_parser.print_newline(false, multi_parser.output);
  759. }
  760. }
  761. multi_parser.print_token(multi_parser.token_text);
  762. multi_parser.current_mode = 'CONTENT';
  763. break;
  764. case 'TK_TAG_SINGLE':
  765. // Don't add a newline before elements that should remain unformatted.
  766. var tag_check = multi_parser.token_text.match(/^\s*<([a-z-]+)/i);
  767. if (!tag_check || !multi_parser.Utils.in_array(tag_check[1], unformatted)) {
  768. multi_parser.print_newline(false, multi_parser.output);
  769. }
  770. multi_parser.print_token(multi_parser.token_text);
  771. multi_parser.current_mode = 'CONTENT';
  772. break;
  773. case 'TK_TAG_HANDLEBARS_ELSE':
  774. // Don't add a newline if opening {{#if}} tag is on the current line
  775. var foundIfOnCurrentLine = false;
  776. for (var lastCheckedOutput = multi_parser.output.length - 1; lastCheckedOutput >= 0; lastCheckedOutput--) {
  777. if (multi_parser.output[lastCheckedOutput] === '\n') {
  778. break;
  779. } else {
  780. if (multi_parser.output[lastCheckedOutput].match(/{{#if/)) {
  781. foundIfOnCurrentLine = true;
  782. break;
  783. }
  784. }
  785. }
  786. if (!foundIfOnCurrentLine) {
  787. multi_parser.print_newline(false, multi_parser.output);
  788. }
  789. multi_parser.print_token(multi_parser.token_text);
  790. if (multi_parser.indent_content) {
  791. multi_parser.indent();
  792. multi_parser.indent_content = false;
  793. }
  794. multi_parser.current_mode = 'CONTENT';
  795. break;
  796. case 'TK_TAG_HANDLEBARS_COMMENT':
  797. multi_parser.print_token(multi_parser.token_text);
  798. multi_parser.current_mode = 'TAG';
  799. break;
  800. case 'TK_CONTENT':
  801. multi_parser.print_token(multi_parser.token_text);
  802. multi_parser.current_mode = 'TAG';
  803. break;
  804. case 'TK_STYLE':
  805. case 'TK_SCRIPT':
  806. if (multi_parser.token_text !== '') {
  807. multi_parser.print_newline(false, multi_parser.output);
  808. var text = multi_parser.token_text,
  809. _beautifier,
  810. script_indent_level = 1;
  811. if (multi_parser.token_type === 'TK_SCRIPT') {
  812. _beautifier = typeof js_beautify === 'function' && js_beautify;
  813. } else if (multi_parser.token_type === 'TK_STYLE') {
  814. _beautifier = typeof css_beautify === 'function' && css_beautify;
  815. }
  816. if (options.indent_scripts === "keep") {
  817. script_indent_level = 0;
  818. } else if (options.indent_scripts === "separate") {
  819. script_indent_level = -multi_parser.indent_level;
  820. }
  821. var indentation = multi_parser.get_full_indent(script_indent_level);
  822. if (_beautifier) {
  823. // call the Beautifier if avaliable
  824. var Child_options = function() {
  825. this.eol = '\n';
  826. };
  827. Child_options.prototype = options;
  828. var child_options = new Child_options();
  829. text = _beautifier(text.replace(/^\s*/, indentation), child_options);
  830. } else {
  831. // simply indent the string otherwise
  832. var white = text.match(/^\s*/)[0];
  833. var _level = white.match(/[^\n\r]*$/)[0].split(multi_parser.indent_string).length - 1;
  834. var reindent = multi_parser.get_full_indent(script_indent_level - _level);
  835. text = text.replace(/^\s*/, indentation)
  836. .replace(/\r\n|\r|\n/g, '\n' + reindent)
  837. .replace(/\s+$/, '');
  838. }
  839. if (text) {
  840. multi_parser.print_token_raw(text);
  841. multi_parser.print_newline(true, multi_parser.output);
  842. }
  843. }
  844. multi_parser.current_mode = 'TAG';
  845. break;
  846. default:
  847. // We should not be getting here but we don't want to drop input on the floor
  848. // Just output the text and move on
  849. if (multi_parser.token_text !== '') {
  850. multi_parser.print_token(multi_parser.token_text);
  851. }
  852. break;
  853. }
  854. multi_parser.last_token = multi_parser.token_type;
  855. multi_parser.last_text = multi_parser.token_text;
  856. }
  857. var sweet_code = multi_parser.output.join('').replace(/[\r\n\t ]+$/, '');
  858. // establish end_with_newline
  859. if (end_with_newline) {
  860. sweet_code += '\n';
  861. }
  862. if (eol !== '\n') {
  863. sweet_code = sweet_code.replace(/[\n]/g, eol);
  864. }
  865. return sweet_code;
  866. }
  867. if (typeof define === "function" && define.amd) {
  868. // Add support for AMD ( https://github.com/amdjs/amdjs-api/wiki/AMD#defineamd-property- )
  869. define(["require", "./beautify", "./beautify-css"], function(requireamd) {
  870. var js_beautify = requireamd("./beautify");
  871. var css_beautify = requireamd("./beautify-css");
  872. return {
  873. html_beautify: function(html_source, options) {
  874. return style_html(html_source, options, js_beautify.js_beautify, css_beautify.css_beautify);
  875. }
  876. };
  877. });
  878. } else if (typeof exports !== "undefined") {
  879. // Add support for CommonJS. Just put this file somewhere on your require.paths
  880. // and you will be able to `var html_beautify = require("beautify").html_beautify`.
  881. var js_beautify = require('./beautify.js');
  882. var css_beautify = require('./beautify-css.js');
  883. exports.html_beautify = function(html_source, options) {
  884. return style_html(html_source, options, js_beautify.js_beautify, css_beautify.css_beautify);
  885. };
  886. } else if (typeof window !== "undefined") {
  887. // If we're running a web page and don't have either of the above, add our one global
  888. window.html_beautify = function(html_source, options) {
  889. return style_html(html_source, options, window.js_beautify, window.css_beautify);
  890. };
  891. } else if (typeof global !== "undefined") {
  892. // If we don't even have window, try global.
  893. global.html_beautify = function(html_source, options) {
  894. return style_html(html_source, options, global.js_beautify, global.css_beautify);
  895. };
  896. }
  897. }());