Drupal investigation

JS.php 50KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180
  1. <?php
  2. /**
  3. * Tokenizes JS code.
  4. *
  5. * PHP version 5
  6. *
  7. * @category PHP
  8. * @package PHP_CodeSniffer
  9. * @author Greg Sherwood <gsherwood@squiz.net>
  10. * @author Marc McIntyre <mmcintyre@squiz.net>
  11. * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
  12. * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
  13. * @link http://pear.php.net/package/PHP_CodeSniffer
  14. */
  15. /**
  16. * Tokenizes JS code.
  17. *
  18. * @category PHP
  19. * @package PHP_CodeSniffer
  20. * @author Greg Sherwood <gsherwood@squiz.net>
  21. * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
  22. * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
  23. * @version Release: @package_version@
  24. * @link http://pear.php.net/package/PHP_CodeSniffer
  25. */
  26. class PHP_CodeSniffer_Tokenizers_JS
  27. {
  28. /**
  29. * If TRUE, files that appear to be minified will not be processed.
  30. *
  31. * @var boolean
  32. */
  33. public $skipMinified = true;
  34. /**
  35. * A list of tokens that are allowed to open a scope.
  36. *
  37. * This array also contains information about what kind of token the scope
  38. * opener uses to open and close the scope, if the token strictly requires
  39. * an opener, if the token can share a scope closer, and who it can be shared
  40. * with. An example of a token that shares a scope closer is a CASE scope.
  41. *
  42. * @var array
  43. */
  44. public $scopeOpeners = array(
  45. T_IF => array(
  46. 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
  47. 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
  48. 'strict' => false,
  49. 'shared' => false,
  50. 'with' => array(),
  51. ),
  52. T_TRY => array(
  53. 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
  54. 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
  55. 'strict' => true,
  56. 'shared' => false,
  57. 'with' => array(),
  58. ),
  59. T_CATCH => array(
  60. 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
  61. 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
  62. 'strict' => true,
  63. 'shared' => false,
  64. 'with' => array(),
  65. ),
  66. T_ELSE => array(
  67. 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
  68. 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
  69. 'strict' => false,
  70. 'shared' => false,
  71. 'with' => array(),
  72. ),
  73. T_FOR => array(
  74. 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
  75. 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
  76. 'strict' => false,
  77. 'shared' => false,
  78. 'with' => array(),
  79. ),
  80. T_FUNCTION => array(
  81. 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
  82. 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
  83. 'strict' => false,
  84. 'shared' => false,
  85. 'with' => array(),
  86. ),
  87. T_WHILE => array(
  88. 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
  89. 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
  90. 'strict' => false,
  91. 'shared' => false,
  92. 'with' => array(),
  93. ),
  94. T_DO => array(
  95. 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
  96. 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
  97. 'strict' => true,
  98. 'shared' => false,
  99. 'with' => array(),
  100. ),
  101. T_SWITCH => array(
  102. 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
  103. 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
  104. 'strict' => true,
  105. 'shared' => false,
  106. 'with' => array(),
  107. ),
  108. T_CASE => array(
  109. 'start' => array(T_COLON => T_COLON),
  110. 'end' => array(
  111. T_BREAK => T_BREAK,
  112. T_RETURN => T_RETURN,
  113. T_CONTINUE => T_CONTINUE,
  114. T_THROW => T_THROW,
  115. ),
  116. 'strict' => true,
  117. 'shared' => true,
  118. 'with' => array(
  119. T_DEFAULT => T_DEFAULT,
  120. T_CASE => T_CASE,
  121. T_SWITCH => T_SWITCH,
  122. ),
  123. ),
  124. T_DEFAULT => array(
  125. 'start' => array(T_COLON => T_COLON),
  126. 'end' => array(
  127. T_BREAK => T_BREAK,
  128. T_RETURN => T_RETURN,
  129. T_CONTINUE => T_CONTINUE,
  130. T_THROW => T_THROW,
  131. ),
  132. 'strict' => true,
  133. 'shared' => true,
  134. 'with' => array(
  135. T_CASE => T_CASE,
  136. T_SWITCH => T_SWITCH,
  137. ),
  138. ),
  139. );
  140. /**
  141. * A list of tokens that end the scope.
  142. *
  143. * This array is just a unique collection of the end tokens
  144. * from the _scopeOpeners array. The data is duplicated here to
  145. * save time during parsing of the file.
  146. *
  147. * @var array
  148. */
  149. public $endScopeTokens = array(
  150. T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
  151. T_BREAK => T_BREAK,
  152. );
  153. /**
  154. * A list of special JS tokens and their types.
  155. *
  156. * @var array
  157. */
  158. protected $tokenValues = array(
  159. 'function' => 'T_FUNCTION',
  160. 'prototype' => 'T_PROTOTYPE',
  161. 'try' => 'T_TRY',
  162. 'catch' => 'T_CATCH',
  163. 'return' => 'T_RETURN',
  164. 'throw' => 'T_THROW',
  165. 'break' => 'T_BREAK',
  166. 'switch' => 'T_SWITCH',
  167. 'continue' => 'T_CONTINUE',
  168. 'if' => 'T_IF',
  169. 'else' => 'T_ELSE',
  170. 'do' => 'T_DO',
  171. 'while' => 'T_WHILE',
  172. 'for' => 'T_FOR',
  173. 'var' => 'T_VAR',
  174. 'case' => 'T_CASE',
  175. 'default' => 'T_DEFAULT',
  176. 'true' => 'T_TRUE',
  177. 'false' => 'T_FALSE',
  178. 'null' => 'T_NULL',
  179. 'this' => 'T_THIS',
  180. 'typeof' => 'T_TYPEOF',
  181. '(' => 'T_OPEN_PARENTHESIS',
  182. ')' => 'T_CLOSE_PARENTHESIS',
  183. '{' => 'T_OPEN_CURLY_BRACKET',
  184. '}' => 'T_CLOSE_CURLY_BRACKET',
  185. '[' => 'T_OPEN_SQUARE_BRACKET',
  186. ']' => 'T_CLOSE_SQUARE_BRACKET',
  187. '?' => 'T_INLINE_THEN',
  188. '.' => 'T_OBJECT_OPERATOR',
  189. '+' => 'T_PLUS',
  190. '-' => 'T_MINUS',
  191. '*' => 'T_MULTIPLY',
  192. '%' => 'T_MODULUS',
  193. '/' => 'T_DIVIDE',
  194. '^' => 'T_LOGICAL_XOR',
  195. ',' => 'T_COMMA',
  196. ';' => 'T_SEMICOLON',
  197. ':' => 'T_COLON',
  198. '<' => 'T_LESS_THAN',
  199. '>' => 'T_GREATER_THAN',
  200. '<<' => 'T_SL',
  201. '>>' => 'T_SR',
  202. '>>>' => 'T_ZSR',
  203. '<<=' => 'T_SL_EQUAL',
  204. '>>=' => 'T_SR_EQUAL',
  205. '>>>=' => 'T_ZSR_EQUAL',
  206. '<=' => 'T_IS_SMALLER_OR_EQUAL',
  207. '>=' => 'T_IS_GREATER_OR_EQUAL',
  208. '=>' => 'T_DOUBLE_ARROW',
  209. '!' => 'T_BOOLEAN_NOT',
  210. '||' => 'T_BOOLEAN_OR',
  211. '&&' => 'T_BOOLEAN_AND',
  212. '|' => 'T_BITWISE_OR',
  213. '&' => 'T_BITWISE_AND',
  214. '!=' => 'T_IS_NOT_EQUAL',
  215. '!==' => 'T_IS_NOT_IDENTICAL',
  216. '=' => 'T_EQUAL',
  217. '==' => 'T_IS_EQUAL',
  218. '===' => 'T_IS_IDENTICAL',
  219. '-=' => 'T_MINUS_EQUAL',
  220. '+=' => 'T_PLUS_EQUAL',
  221. '*=' => 'T_MUL_EQUAL',
  222. '/=' => 'T_DIV_EQUAL',
  223. '%=' => 'T_MOD_EQUAL',
  224. '++' => 'T_INC',
  225. '--' => 'T_DEC',
  226. '//' => 'T_COMMENT',
  227. '/*' => 'T_COMMENT',
  228. '/**' => 'T_DOC_COMMENT',
  229. '*/' => 'T_COMMENT',
  230. );
  231. /**
  232. * A list string delimiters.
  233. *
  234. * @var array
  235. */
  236. protected $stringTokens = array(
  237. '\'' => '\'',
  238. '"' => '"',
  239. );
  240. /**
  241. * A list tokens that start and end comments.
  242. *
  243. * @var array
  244. */
  245. protected $commentTokens = array(
  246. '//' => null,
  247. '/*' => '*/',
  248. '/**' => '*/',
  249. );
  250. /**
  251. * Creates an array of tokens when given some JS code.
  252. *
  253. * @param string $string The string to tokenize.
  254. * @param string $eolChar The EOL character to use for splitting strings.
  255. *
  256. * @return array
  257. */
  258. public function tokenizeString($string, $eolChar='\n')
  259. {
  260. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  261. echo "\t*** START JS TOKENIZING ***".PHP_EOL;
  262. }
  263. $maxTokenLength = 0;
  264. foreach ($this->tokenValues as $token => $values) {
  265. if (strlen($token) > $maxTokenLength) {
  266. $maxTokenLength = strlen($token);
  267. }
  268. }
  269. $tokens = array();
  270. $inString = '';
  271. $stringChar = null;
  272. $inComment = '';
  273. $buffer = '';
  274. $preStringBuffer = '';
  275. $cleanBuffer = false;
  276. $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
  277. $tokens[] = array(
  278. 'code' => T_OPEN_TAG,
  279. 'type' => 'T_OPEN_TAG',
  280. 'content' => '',
  281. );
  282. // Convert newlines to single characters for ease of
  283. // processing. We will change them back later.
  284. $string = str_replace($eolChar, "\n", $string);
  285. $chars = str_split($string);
  286. $numChars = count($chars);
  287. for ($i = 0; $i < $numChars; $i++) {
  288. $char = $chars[$i];
  289. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  290. $content = PHP_CodeSniffer::prepareForOutput($char);
  291. $bufferContent = PHP_CodeSniffer::prepareForOutput($buffer);
  292. if ($inString !== '') {
  293. echo "\t";
  294. }
  295. if ($inComment !== '') {
  296. echo "\t";
  297. }
  298. echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
  299. }//end if
  300. if ($inString === '' && $inComment === '' && $buffer !== '') {
  301. // If the buffer only has whitespace and we are about to
  302. // add a character, store the whitespace first.
  303. if (trim($char) !== '' && trim($buffer) === '') {
  304. $tokens[] = array(
  305. 'code' => T_WHITESPACE,
  306. 'type' => 'T_WHITESPACE',
  307. 'content' => str_replace("\n", $eolChar, $buffer),
  308. );
  309. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  310. $content = PHP_CodeSniffer::prepareForOutput($buffer);
  311. echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
  312. }
  313. $buffer = '';
  314. }
  315. // If the buffer is not whitespace and we are about to
  316. // add a whitespace character, store the content first.
  317. if ($inString === ''
  318. && $inComment === ''
  319. && trim($char) === ''
  320. && trim($buffer) !== ''
  321. ) {
  322. $tokens[] = array(
  323. 'code' => T_STRING,
  324. 'type' => 'T_STRING',
  325. 'content' => str_replace("\n", $eolChar, $buffer),
  326. );
  327. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  328. $content = PHP_CodeSniffer::prepareForOutput($buffer);
  329. echo "\t=> Added token T_STRING ($content)".PHP_EOL;
  330. }
  331. $buffer = '';
  332. }
  333. }//end if
  334. // Process strings.
  335. if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
  336. if ($inString === $char) {
  337. // This could be the end of the string, but make sure it
  338. // is not escaped first.
  339. $escapes = 0;
  340. for ($x = ($i - 1); $x >= 0; $x--) {
  341. if ($chars[$x] !== '\\') {
  342. break;
  343. }
  344. $escapes++;
  345. }
  346. if ($escapes === 0 || ($escapes % 2) === 0) {
  347. // There is an even number escape chars,
  348. // so this is not escaped, it is the end of the string.
  349. $tokens[] = array(
  350. 'code' => T_CONSTANT_ENCAPSED_STRING,
  351. 'type' => 'T_CONSTANT_ENCAPSED_STRING',
  352. 'content' => str_replace("\n", $eolChar, $buffer).$char,
  353. );
  354. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  355. echo "\t\t* found end of string *".PHP_EOL;
  356. $content = PHP_CodeSniffer::prepareForOutput($buffer.$char);
  357. echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
  358. }
  359. $buffer = '';
  360. $preStringBuffer = '';
  361. $inString = '';
  362. $stringChar = null;
  363. continue;
  364. }//end if
  365. } else if ($inString === '') {
  366. $inString = $char;
  367. $stringChar = $i;
  368. $preStringBuffer = $buffer;
  369. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  370. echo "\t\t* looking for string closer *".PHP_EOL;
  371. }
  372. }//end if
  373. }//end if
  374. if ($inString !== '' && $char === "\n") {
  375. // Unless this newline character is escaped, the string did not
  376. // end before the end of the line, which means it probably
  377. // wasn't a string at all (maybe a regex).
  378. if ($chars[($i - 1)] !== '\\') {
  379. $i = $stringChar;
  380. $buffer = $preStringBuffer;
  381. $preStringBuffer = '';
  382. $inString = '';
  383. $stringChar = null;
  384. $char = $chars[$i];
  385. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  386. echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
  387. }
  388. }
  389. }
  390. $buffer .= $char;
  391. // We don't look for special tokens inside strings,
  392. // so if we are in a string, we can continue here now
  393. // that the current char is in the buffer.
  394. if ($inString !== '') {
  395. continue;
  396. }
  397. // Special case for T_DIVIDE which can actually be
  398. // the start of a regular expression.
  399. if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
  400. $regex = $this->getRegexToken(
  401. $i,
  402. $string,
  403. $chars,
  404. $tokens,
  405. $eolChar
  406. );
  407. if ($regex !== null) {
  408. $tokens[] = array(
  409. 'code' => T_REGULAR_EXPRESSION,
  410. 'type' => 'T_REGULAR_EXPRESSION',
  411. 'content' => $regex['content'],
  412. );
  413. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  414. $content = PHP_CodeSniffer::prepareForOutput($regex['content']);
  415. echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
  416. }
  417. $i = $regex['end'];
  418. $buffer = '';
  419. $cleanBuffer = false;
  420. continue;
  421. }//end if
  422. }//end if
  423. // Check for known tokens, but ignore tokens found that are not at
  424. // the end of a string, like FOR and this.FORmat.
  425. if (isset($this->tokenValues[strtolower($buffer)]) === true
  426. && (preg_match('|[a-zA-z0-9_]|', $char) === 0
  427. || isset($chars[($i + 1)]) === false
  428. || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
  429. ) {
  430. $matchedToken = false;
  431. $lookAheadLength = ($maxTokenLength - strlen($buffer));
  432. if ($lookAheadLength > 0) {
  433. // The buffer contains a token type, but we need
  434. // to look ahead at the next chars to see if this is
  435. // actually part of a larger token. For example,
  436. // FOR and FOREACH.
  437. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  438. echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
  439. }
  440. $charBuffer = $buffer;
  441. for ($x = 1; $x <= $lookAheadLength; $x++) {
  442. if (isset($chars[($i + $x)]) === false) {
  443. break;
  444. }
  445. $charBuffer .= $chars[($i + $x)];
  446. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  447. $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
  448. echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
  449. }
  450. if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
  451. // We've found something larger that matches
  452. // so we can ignore this char. Except for 1 very specific
  453. // case where a comment like /**/ needs to tokenize as
  454. // T_COMMENT and not T_DOC_COMMENT.
  455. $oldType = $this->tokenValues[strtolower($buffer)];
  456. $newType = $this->tokenValues[strtolower($charBuffer)];
  457. if ($oldType === 'T_COMMENT'
  458. && $newType === 'T_DOC_COMMENT'
  459. && $chars[($i + $x + 1)] === '/'
  460. ) {
  461. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  462. echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
  463. }
  464. } else {
  465. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  466. echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
  467. }
  468. $matchedToken = true;
  469. break;
  470. }
  471. }//end if
  472. }//end for
  473. }//end if
  474. if ($matchedToken === false) {
  475. if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
  476. echo "\t\t* look ahead found nothing *".PHP_EOL;
  477. }
  478. $value = $this->tokenValues[strtolower($buffer)];
  479. if ($value === 'T_FUNCTION' && $buffer !== 'function') {
  480. // The function keyword needs to be all lowercase or else
  481. // it is just a function called "Function".
  482. $value = 'T_STRING';
  483. }
  484. $tokens[] = array(
  485. 'code' => constant($value),
  486. 'type' => $value,
  487. 'content' => $buffer,
  488. );
  489. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  490. $content = PHP_CodeSniffer::prepareForOutput($buffer);
  491. echo "\t=> Added token $value ($content)".PHP_EOL;
  492. }
  493. $cleanBuffer = true;
  494. }//end if
  495. } else if (isset($this->tokenValues[strtolower($char)]) === true) {
  496. // No matter what token we end up using, we don't
  497. // need the content in the buffer any more because we have
  498. // found a valid token.
  499. $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
  500. if ($newContent !== '') {
  501. $tokens[] = array(
  502. 'code' => T_STRING,
  503. 'type' => 'T_STRING',
  504. 'content' => $newContent,
  505. );
  506. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  507. $content = PHP_CodeSniffer::prepareForOutput(substr($buffer, 0, -1));
  508. echo "\t=> Added token T_STRING ($content)".PHP_EOL;
  509. }
  510. }
  511. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  512. echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
  513. }
  514. // The char is a token type, but we need to look ahead at the
  515. // next chars to see if this is actually part of a larger token.
  516. // For example, = and ===.
  517. $charBuffer = $char;
  518. $matchedToken = false;
  519. for ($x = 1; $x <= $maxTokenLength; $x++) {
  520. if (isset($chars[($i + $x)]) === false) {
  521. break;
  522. }
  523. $charBuffer .= $chars[($i + $x)];
  524. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  525. $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
  526. echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
  527. }
  528. if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
  529. // We've found something larger that matches
  530. // so we can ignore this char.
  531. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  532. $type = $this->tokenValues[strtolower($charBuffer)];
  533. echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
  534. }
  535. $matchedToken = true;
  536. break;
  537. }
  538. }//end for
  539. if ($matchedToken === false) {
  540. $value = $this->tokenValues[strtolower($char)];
  541. $tokens[] = array(
  542. 'code' => constant($value),
  543. 'type' => $value,
  544. 'content' => $char,
  545. );
  546. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  547. echo "\t\t* look ahead found nothing *".PHP_EOL;
  548. $content = PHP_CodeSniffer::prepareForOutput($char);
  549. echo "\t=> Added token $value ($content)".PHP_EOL;
  550. }
  551. $cleanBuffer = true;
  552. } else {
  553. $buffer = $char;
  554. }//end if
  555. }//end if
  556. // Keep track of content inside comments.
  557. if ($inComment === ''
  558. && array_key_exists($buffer, $this->commentTokens) === true
  559. ) {
  560. // This is not really a comment if the content
  561. // looks like \// (i.e., it is escaped).
  562. if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
  563. $lastToken = array_pop($tokens);
  564. $lastContent = $lastToken['content'];
  565. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  566. $value = $this->tokenValues[strtolower($lastContent)];
  567. $content = PHP_CodeSniffer::prepareForOutput($lastContent);
  568. echo "\t=> Removed token $value ($content)".PHP_EOL;
  569. }
  570. $lastChars = str_split($lastContent);
  571. $lastNumChars = count($lastChars);
  572. for ($x = 0; $x < $lastNumChars; $x++) {
  573. $lastChar = $lastChars[$x];
  574. $value = $this->tokenValues[strtolower($lastChar)];
  575. $tokens[] = array(
  576. 'code' => constant($value),
  577. 'type' => $value,
  578. 'content' => $lastChar,
  579. );
  580. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  581. $content = PHP_CodeSniffer::prepareForOutput($lastChar);
  582. echo "\t=> Added token $value ($content)".PHP_EOL;
  583. }
  584. }
  585. } else {
  586. // We have started a comment.
  587. $inComment = $buffer;
  588. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  589. echo "\t\t* looking for end of comment *".PHP_EOL;
  590. }
  591. }//end if
  592. } else if ($inComment !== '') {
  593. if ($this->commentTokens[$inComment] === null) {
  594. // Comment ends at the next newline.
  595. if (strpos($buffer, "\n") !== false) {
  596. $inComment = '';
  597. }
  598. } else {
  599. if ($this->commentTokens[$inComment] === $buffer) {
  600. $inComment = '';
  601. }
  602. }
  603. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  604. if ($inComment === '') {
  605. echo "\t\t* found end of comment *".PHP_EOL;
  606. }
  607. }
  608. if ($inComment === '' && $cleanBuffer === false) {
  609. $tokens[] = array(
  610. 'code' => T_STRING,
  611. 'type' => 'T_STRING',
  612. 'content' => str_replace("\n", $eolChar, $buffer),
  613. );
  614. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  615. $content = PHP_CodeSniffer::prepareForOutput($buffer);
  616. echo "\t=> Added token T_STRING ($content)".PHP_EOL;
  617. }
  618. $buffer = '';
  619. }
  620. }//end if
  621. if ($cleanBuffer === true) {
  622. $buffer = '';
  623. $cleanBuffer = false;
  624. }
  625. }//end for
  626. if (empty($buffer) === false) {
  627. // Buffer contains whitespace from the end of the file.
  628. $tokens[] = array(
  629. 'code' => T_WHITESPACE,
  630. 'type' => 'T_WHITESPACE',
  631. 'content' => str_replace("\n", $eolChar, $buffer),
  632. );
  633. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  634. $content = PHP_CodeSniffer::prepareForOutput($buffer);
  635. echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
  636. }
  637. }
  638. $tokens[] = array(
  639. 'code' => T_CLOSE_TAG,
  640. 'type' => 'T_CLOSE_TAG',
  641. 'content' => '',
  642. );
  643. /*
  644. Now that we have done some basic tokenizing, we need to
  645. modify the tokens to join some together and split some apart
  646. so they match what the PHP tokenizer does.
  647. */
  648. $finalTokens = array();
  649. $newStackPtr = 0;
  650. $numTokens = count($tokens);
  651. for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
  652. $token = $tokens[$stackPtr];
  653. /*
  654. Look for comments and join the tokens together.
  655. */
  656. if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
  657. $newContent = '';
  658. $tokenContent = $token['content'];
  659. $endContent = null;
  660. if (isset($this->commentTokens[$tokenContent]) === true) {
  661. $endContent = $this->commentTokens[$tokenContent];
  662. }
  663. while ($tokenContent !== $endContent) {
  664. if ($endContent === null
  665. && strpos($tokenContent, $eolChar) !== false
  666. ) {
  667. // A null end token means the comment ends at the end of
  668. // the line so we look for newlines and split the token.
  669. $tokens[$stackPtr]['content'] = substr(
  670. $tokenContent,
  671. (strpos($tokenContent, $eolChar) + strlen($eolChar))
  672. );
  673. $tokenContent = substr(
  674. $tokenContent,
  675. 0,
  676. (strpos($tokenContent, $eolChar) + strlen($eolChar))
  677. );
  678. // If the substr failed, skip the token as the content
  679. // will now be blank.
  680. if ($tokens[$stackPtr]['content'] !== false
  681. && $tokens[$stackPtr]['content'] !== ''
  682. ) {
  683. $stackPtr--;
  684. }
  685. break;
  686. }//end if
  687. $stackPtr++;
  688. $newContent .= $tokenContent;
  689. if (isset($tokens[$stackPtr]) === false) {
  690. break;
  691. }
  692. $tokenContent = $tokens[$stackPtr]['content'];
  693. }//end while
  694. if ($token['code'] === T_DOC_COMMENT) {
  695. $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $eolChar, $newStackPtr);
  696. foreach ($commentTokens as $commentToken) {
  697. $finalTokens[$newStackPtr] = $commentToken;
  698. $newStackPtr++;
  699. }
  700. continue;
  701. } else {
  702. // Save the new content in the current token so
  703. // the code below can chop it up on newlines.
  704. $token['content'] = $newContent.$tokenContent;
  705. }
  706. }//end if
  707. /*
  708. If this token has newlines in its content, split each line up
  709. and create a new token for each line. We do this so it's easier
  710. to ascertain where errors occur on a line.
  711. Note that $token[1] is the token's content.
  712. */
  713. if (strpos($token['content'], $eolChar) !== false) {
  714. $tokenLines = explode($eolChar, $token['content']);
  715. $numLines = count($tokenLines);
  716. for ($i = 0; $i < $numLines; $i++) {
  717. $newToken['content'] = $tokenLines[$i];
  718. if ($i === ($numLines - 1)) {
  719. if ($tokenLines[$i] === '') {
  720. break;
  721. }
  722. } else {
  723. $newToken['content'] .= $eolChar;
  724. }
  725. $newToken['type'] = $token['type'];
  726. $newToken['code'] = $token['code'];
  727. $finalTokens[$newStackPtr] = $newToken;
  728. $newStackPtr++;
  729. }
  730. } else {
  731. $finalTokens[$newStackPtr] = $token;
  732. $newStackPtr++;
  733. }//end if
  734. // Convert numbers, including decimals.
  735. if ($token['code'] === T_STRING
  736. || $token['code'] === T_OBJECT_OPERATOR
  737. ) {
  738. $newContent = '';
  739. $oldStackPtr = $stackPtr;
  740. while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
  741. $newContent .= $tokens[$stackPtr]['content'];
  742. $stackPtr++;
  743. }
  744. if ($newContent !== '' && $newContent !== '.') {
  745. $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
  746. if (ctype_digit($newContent) === true) {
  747. $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
  748. $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
  749. } else {
  750. $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
  751. $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
  752. }
  753. $stackPtr--;
  754. continue;
  755. } else {
  756. $stackPtr = $oldStackPtr;
  757. }
  758. }//end if
  759. // Convert the token after an object operator into a string, in most cases.
  760. if ($token['code'] === T_OBJECT_OPERATOR) {
  761. for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
  762. if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
  763. continue;
  764. }
  765. if ($tokens[$i]['code'] !== T_PROTOTYPE
  766. && $tokens[$i]['code'] !== T_LNUMBER
  767. && $tokens[$i]['code'] !== T_DNUMBER
  768. ) {
  769. $tokens[$i]['code'] = T_STRING;
  770. $tokens[$i]['type'] = 'T_STRING';
  771. }
  772. break;
  773. }
  774. }
  775. }//end for
  776. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  777. echo "\t*** END TOKENIZING ***".PHP_EOL;
  778. }
  779. return $finalTokens;
  780. }//end tokenizeString()
  781. /**
  782. * Tokenizes a regular expression if one is found.
  783. *
  784. * If a regular expression is not found, NULL is returned.
  785. *
  786. * @param string $char The index of the possible regex start character.
  787. * @param string $string The complete content of the string being tokenized.
  788. * @param string $chars An array of characters being tokenized.
  789. * @param string $tokens The current array of tokens found in the string.
  790. * @param string $eolChar The EOL character to use for splitting strings.
  791. *
  792. * @return void
  793. */
  794. public function getRegexToken($char, $string, $chars, $tokens, $eolChar)
  795. {
  796. $beforeTokens = array(
  797. T_EQUAL => true,
  798. T_IS_NOT_EQUAL => true,
  799. T_IS_IDENTICAL => true,
  800. T_IS_NOT_IDENTICAL => true,
  801. T_OPEN_PARENTHESIS => true,
  802. T_OPEN_SQUARE_BRACKET => true,
  803. T_RETURN => true,
  804. T_BOOLEAN_OR => true,
  805. T_BOOLEAN_AND => true,
  806. T_BITWISE_OR => true,
  807. T_BITWISE_AND => true,
  808. T_COMMA => true,
  809. T_COLON => true,
  810. T_TYPEOF => true,
  811. T_INLINE_THEN => true,
  812. T_INLINE_ELSE => true,
  813. );
  814. $afterTokens = array(
  815. ',' => true,
  816. ')' => true,
  817. ']' => true,
  818. ';' => true,
  819. ' ' => true,
  820. '.' => true,
  821. ':' => true,
  822. $eolChar => true,
  823. );
  824. // Find the last non-whitespace token that was added
  825. // to the tokens array.
  826. $numTokens = count($tokens);
  827. for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
  828. if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
  829. break;
  830. }
  831. }
  832. if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
  833. return null;
  834. }
  835. // This is probably a regular expression, so look for the end of it.
  836. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  837. echo "\t* token possibly starts a regular expression *".PHP_EOL;
  838. }
  839. $numChars = count($chars);
  840. for ($next = ($char + 1); $next < $numChars; $next++) {
  841. if ($chars[$next] === '/') {
  842. // Just make sure this is not escaped first.
  843. if ($chars[($next - 1)] !== '\\') {
  844. // In the simple form: /.../ so we found the end.
  845. break;
  846. } else if ($chars[($next - 2)] === '\\') {
  847. // In the form: /...\\/ so we found the end.
  848. break;
  849. }
  850. } else {
  851. $possibleEolChar = substr($string, $next, strlen($eolChar));
  852. if ($possibleEolChar === $eolChar) {
  853. // This is the last token on the line and regular
  854. // expressions need to be defined on a single line,
  855. // so this is not a regular expression.
  856. break;
  857. }
  858. }
  859. }
  860. if ($chars[$next] !== '/') {
  861. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  862. echo "\t* could not find end of regular expression *".PHP_EOL;
  863. }
  864. return null;
  865. }
  866. while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
  867. // The token directly after the end of the regex can
  868. // be modifiers like global and case insensitive
  869. // (.e.g, /pattern/gi).
  870. $next++;
  871. }
  872. $regexEnd = $next;
  873. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  874. echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
  875. }
  876. for ($next = ($next + 1); $next < $numChars; $next++) {
  877. if ($chars[$next] !== ' ') {
  878. break;
  879. } else {
  880. $possibleEolChar = substr($string, $next, strlen($eolChar));
  881. if ($possibleEolChar === $eolChar) {
  882. // This is the last token on the line.
  883. break;
  884. }
  885. }
  886. }
  887. if (isset($afterTokens[$chars[$next]]) === false) {
  888. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  889. echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
  890. }
  891. return null;
  892. }
  893. // This is a regular expression, so join all the tokens together.
  894. $content = '';
  895. for ($x = $char; $x <= $regexEnd; $x++) {
  896. $content .= $chars[$x];
  897. }
  898. $token = array(
  899. 'start' => $char,
  900. 'end' => $regexEnd,
  901. 'content' => $content,
  902. );
  903. return $token;
  904. }//end getRegexToken()
  905. /**
  906. * Performs additional processing after main tokenizing.
  907. *
  908. * This additional processing looks for properties, closures, labels and objects.
  909. *
  910. * @param array $tokens The array of tokens to process.
  911. * @param string $eolChar The EOL character to use for splitting strings.
  912. *
  913. * @return void
  914. */
  915. public function processAdditional(&$tokens, $eolChar)
  916. {
  917. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  918. echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
  919. }
  920. $numTokens = count($tokens);
  921. $classStack = array();
  922. for ($i = 0; $i < $numTokens; $i++) {
  923. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  924. $type = $tokens[$i]['type'];
  925. $content = PHP_CodeSniffer::prepareForOutput($tokens[$i]['content']);
  926. echo str_repeat("\t", count($classStack));
  927. echo "\tProcess token $i: $type => $content".PHP_EOL;
  928. }
  929. // Looking for functions that are actually closures.
  930. if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
  931. for ($x = ($i + 1); $x < $numTokens; $x++) {
  932. if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
  933. break;
  934. }
  935. }
  936. if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
  937. $tokens[$i]['code'] = T_CLOSURE;
  938. $tokens[$i]['type'] = 'T_CLOSURE';
  939. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  940. $line = $tokens[$i]['line'];
  941. echo str_repeat("\t", count($classStack));
  942. echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
  943. }
  944. for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
  945. if (isset($tokens[$x]['conditions'][$i]) === false) {
  946. continue;
  947. }
  948. $tokens[$x]['conditions'][$i] = T_CLOSURE;
  949. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  950. $type = $tokens[$x]['type'];
  951. echo str_repeat("\t", count($classStack));
  952. echo "\t\t* cleaned $x ($type) *".PHP_EOL;
  953. }
  954. }
  955. }//end if
  956. continue;
  957. } else if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
  958. && isset($tokens[$i]['scope_condition']) === false
  959. && isset($tokens[$i]['bracket_closer']) === true
  960. ) {
  961. $classStack[] = $i;
  962. $closer = $tokens[$i]['bracket_closer'];
  963. $tokens[$i]['code'] = T_OBJECT;
  964. $tokens[$i]['type'] = 'T_OBJECT';
  965. $tokens[$closer]['code'] = T_CLOSE_OBJECT;
  966. $tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
  967. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  968. echo str_repeat("\t", count($classStack));
  969. echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
  970. echo str_repeat("\t", count($classStack));
  971. echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
  972. }
  973. for ($x = ($i + 1); $x < $closer; $x++) {
  974. $tokens[$x]['conditions'][$i] = T_OBJECT;
  975. ksort($tokens[$x]['conditions'], SORT_NUMERIC);
  976. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  977. $type = $tokens[$x]['type'];
  978. echo str_repeat("\t", count($classStack));
  979. echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
  980. }
  981. }
  982. } else if ($tokens[$i]['code'] === T_CLOSE_OBJECT) {
  983. $opener = array_pop($classStack);
  984. } else if ($tokens[$i]['code'] === T_COLON) {
  985. // If it is a scope opener, it belongs to a
  986. // DEFAULT or CASE statement.
  987. if (isset($tokens[$i]['scope_condition']) === true) {
  988. continue;
  989. }
  990. // Make sure this is not part of an inline IF statement.
  991. for ($x = ($i - 1); $x >= 0; $x--) {
  992. if ($tokens[$x]['code'] === T_INLINE_THEN) {
  993. $tokens[$i]['code'] = T_INLINE_ELSE;
  994. $tokens[$i]['type'] = 'T_INLINE_ELSE';
  995. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  996. echo str_repeat("\t", count($classStack));
  997. echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
  998. }
  999. continue(2);
  1000. } else if ($tokens[$x]['line'] < $tokens[$i]['line']) {
  1001. break;
  1002. }
  1003. }
  1004. // The string to the left of the colon is either a property or label.
  1005. for ($label = ($i - 1); $label >= 0; $label--) {
  1006. if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$label]['code']]) === false) {
  1007. break;
  1008. }
  1009. }
  1010. if ($tokens[$label]['code'] !== T_STRING
  1011. && $tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
  1012. ) {
  1013. continue;
  1014. }
  1015. if (empty($classStack) === false) {
  1016. $tokens[$label]['code'] = T_PROPERTY;
  1017. $tokens[$label]['type'] = 'T_PROPERTY';
  1018. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  1019. echo str_repeat("\t", count($classStack));
  1020. echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
  1021. }
  1022. } else {
  1023. $tokens[$label]['code'] = T_LABEL;
  1024. $tokens[$label]['type'] = 'T_LABEL';
  1025. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  1026. echo str_repeat("\t", count($classStack));
  1027. echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
  1028. }
  1029. }//end if
  1030. }//end if
  1031. }//end for
  1032. if (PHP_CODESNIFFER_VERBOSITY > 1) {
  1033. echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
  1034. }
  1035. }//end processAdditional()
  1036. }//end class