1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180 |
- <?php
- /**
- * Tokenizes JS code.
- *
- * PHP version 5
- *
- * @category PHP
- * @package PHP_CodeSniffer
- * @author Greg Sherwood <gsherwood@squiz.net>
- * @author Marc McIntyre <mmcintyre@squiz.net>
- * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
- * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
- * @link http://pear.php.net/package/PHP_CodeSniffer
- */
- /**
- * Tokenizes JS code.
- *
- * @category PHP
- * @package PHP_CodeSniffer
- * @author Greg Sherwood <gsherwood@squiz.net>
- * @copyright 2006-2014 Squiz Pty Ltd (ABN 77 084 670 600)
- * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
- * @version Release: @package_version@
- * @link http://pear.php.net/package/PHP_CodeSniffer
- */
- class PHP_CodeSniffer_Tokenizers_JS
- {
- /**
- * If TRUE, files that appear to be minified will not be processed.
- *
- * @var boolean
- */
- public $skipMinified = true;
- /**
- * A list of tokens that are allowed to open a scope.
- *
- * This array also contains information about what kind of token the scope
- * opener uses to open and close the scope, if the token strictly requires
- * an opener, if the token can share a scope closer, and who it can be shared
- * with. An example of a token that shares a scope closer is a CASE scope.
- *
- * @var array
- */
- public $scopeOpeners = array(
- T_IF => array(
- 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
- 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
- 'strict' => false,
- 'shared' => false,
- 'with' => array(),
- ),
- T_TRY => array(
- 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
- 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
- 'strict' => true,
- 'shared' => false,
- 'with' => array(),
- ),
- T_CATCH => array(
- 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
- 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
- 'strict' => true,
- 'shared' => false,
- 'with' => array(),
- ),
- T_ELSE => array(
- 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
- 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
- 'strict' => false,
- 'shared' => false,
- 'with' => array(),
- ),
- T_FOR => array(
- 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
- 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
- 'strict' => false,
- 'shared' => false,
- 'with' => array(),
- ),
- T_FUNCTION => array(
- 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
- 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
- 'strict' => false,
- 'shared' => false,
- 'with' => array(),
- ),
- T_WHILE => array(
- 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
- 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
- 'strict' => false,
- 'shared' => false,
- 'with' => array(),
- ),
- T_DO => array(
- 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
- 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
- 'strict' => true,
- 'shared' => false,
- 'with' => array(),
- ),
- T_SWITCH => array(
- 'start' => array(T_OPEN_CURLY_BRACKET => T_OPEN_CURLY_BRACKET),
- 'end' => array(T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET),
- 'strict' => true,
- 'shared' => false,
- 'with' => array(),
- ),
- T_CASE => array(
- 'start' => array(T_COLON => T_COLON),
- 'end' => array(
- T_BREAK => T_BREAK,
- T_RETURN => T_RETURN,
- T_CONTINUE => T_CONTINUE,
- T_THROW => T_THROW,
- ),
- 'strict' => true,
- 'shared' => true,
- 'with' => array(
- T_DEFAULT => T_DEFAULT,
- T_CASE => T_CASE,
- T_SWITCH => T_SWITCH,
- ),
- ),
- T_DEFAULT => array(
- 'start' => array(T_COLON => T_COLON),
- 'end' => array(
- T_BREAK => T_BREAK,
- T_RETURN => T_RETURN,
- T_CONTINUE => T_CONTINUE,
- T_THROW => T_THROW,
- ),
- 'strict' => true,
- 'shared' => true,
- 'with' => array(
- T_CASE => T_CASE,
- T_SWITCH => T_SWITCH,
- ),
- ),
- );
- /**
- * A list of tokens that end the scope.
- *
- * This array is just a unique collection of the end tokens
- * from the _scopeOpeners array. The data is duplicated here to
- * save time during parsing of the file.
- *
- * @var array
- */
- public $endScopeTokens = array(
- T_CLOSE_CURLY_BRACKET => T_CLOSE_CURLY_BRACKET,
- T_BREAK => T_BREAK,
- );
- /**
- * A list of special JS tokens and their types.
- *
- * @var array
- */
- protected $tokenValues = array(
- 'function' => 'T_FUNCTION',
- 'prototype' => 'T_PROTOTYPE',
- 'try' => 'T_TRY',
- 'catch' => 'T_CATCH',
- 'return' => 'T_RETURN',
- 'throw' => 'T_THROW',
- 'break' => 'T_BREAK',
- 'switch' => 'T_SWITCH',
- 'continue' => 'T_CONTINUE',
- 'if' => 'T_IF',
- 'else' => 'T_ELSE',
- 'do' => 'T_DO',
- 'while' => 'T_WHILE',
- 'for' => 'T_FOR',
- 'var' => 'T_VAR',
- 'case' => 'T_CASE',
- 'default' => 'T_DEFAULT',
- 'true' => 'T_TRUE',
- 'false' => 'T_FALSE',
- 'null' => 'T_NULL',
- 'this' => 'T_THIS',
- 'typeof' => 'T_TYPEOF',
- '(' => 'T_OPEN_PARENTHESIS',
- ')' => 'T_CLOSE_PARENTHESIS',
- '{' => 'T_OPEN_CURLY_BRACKET',
- '}' => 'T_CLOSE_CURLY_BRACKET',
- '[' => 'T_OPEN_SQUARE_BRACKET',
- ']' => 'T_CLOSE_SQUARE_BRACKET',
- '?' => 'T_INLINE_THEN',
- '.' => 'T_OBJECT_OPERATOR',
- '+' => 'T_PLUS',
- '-' => 'T_MINUS',
- '*' => 'T_MULTIPLY',
- '%' => 'T_MODULUS',
- '/' => 'T_DIVIDE',
- '^' => 'T_LOGICAL_XOR',
- ',' => 'T_COMMA',
- ';' => 'T_SEMICOLON',
- ':' => 'T_COLON',
- '<' => 'T_LESS_THAN',
- '>' => 'T_GREATER_THAN',
- '<<' => 'T_SL',
- '>>' => 'T_SR',
- '>>>' => 'T_ZSR',
- '<<=' => 'T_SL_EQUAL',
- '>>=' => 'T_SR_EQUAL',
- '>>>=' => 'T_ZSR_EQUAL',
- '<=' => 'T_IS_SMALLER_OR_EQUAL',
- '>=' => 'T_IS_GREATER_OR_EQUAL',
- '=>' => 'T_DOUBLE_ARROW',
- '!' => 'T_BOOLEAN_NOT',
- '||' => 'T_BOOLEAN_OR',
- '&&' => 'T_BOOLEAN_AND',
- '|' => 'T_BITWISE_OR',
- '&' => 'T_BITWISE_AND',
- '!=' => 'T_IS_NOT_EQUAL',
- '!==' => 'T_IS_NOT_IDENTICAL',
- '=' => 'T_EQUAL',
- '==' => 'T_IS_EQUAL',
- '===' => 'T_IS_IDENTICAL',
- '-=' => 'T_MINUS_EQUAL',
- '+=' => 'T_PLUS_EQUAL',
- '*=' => 'T_MUL_EQUAL',
- '/=' => 'T_DIV_EQUAL',
- '%=' => 'T_MOD_EQUAL',
- '++' => 'T_INC',
- '--' => 'T_DEC',
- '//' => 'T_COMMENT',
- '/*' => 'T_COMMENT',
- '/**' => 'T_DOC_COMMENT',
- '*/' => 'T_COMMENT',
- );
- /**
- * A list string delimiters.
- *
- * @var array
- */
- protected $stringTokens = array(
- '\'' => '\'',
- '"' => '"',
- );
- /**
- * A list tokens that start and end comments.
- *
- * @var array
- */
- protected $commentTokens = array(
- '//' => null,
- '/*' => '*/',
- '/**' => '*/',
- );
- /**
- * Creates an array of tokens when given some JS code.
- *
- * @param string $string The string to tokenize.
- * @param string $eolChar The EOL character to use for splitting strings.
- *
- * @return array
- */
- public function tokenizeString($string, $eolChar='\n')
- {
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t*** START JS TOKENIZING ***".PHP_EOL;
- }
- $maxTokenLength = 0;
- foreach ($this->tokenValues as $token => $values) {
- if (strlen($token) > $maxTokenLength) {
- $maxTokenLength = strlen($token);
- }
- }
- $tokens = array();
- $inString = '';
- $stringChar = null;
- $inComment = '';
- $buffer = '';
- $preStringBuffer = '';
- $cleanBuffer = false;
- $commentTokenizer = new PHP_CodeSniffer_Tokenizers_Comment();
- $tokens[] = array(
- 'code' => T_OPEN_TAG,
- 'type' => 'T_OPEN_TAG',
- 'content' => '',
- );
- // Convert newlines to single characters for ease of
- // processing. We will change them back later.
- $string = str_replace($eolChar, "\n", $string);
- $chars = str_split($string);
- $numChars = count($chars);
- for ($i = 0; $i < $numChars; $i++) {
- $char = $chars[$i];
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($char);
- $bufferContent = PHP_CodeSniffer::prepareForOutput($buffer);
- if ($inString !== '') {
- echo "\t";
- }
- if ($inComment !== '') {
- echo "\t";
- }
- echo "\tProcess char $i => $content (buffer: $bufferContent)".PHP_EOL;
- }//end if
- if ($inString === '' && $inComment === '' && $buffer !== '') {
- // If the buffer only has whitespace and we are about to
- // add a character, store the whitespace first.
- if (trim($char) !== '' && trim($buffer) === '') {
- $tokens[] = array(
- 'code' => T_WHITESPACE,
- 'type' => 'T_WHITESPACE',
- 'content' => str_replace("\n", $eolChar, $buffer),
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($buffer);
- echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
- }
- $buffer = '';
- }
- // If the buffer is not whitespace and we are about to
- // add a whitespace character, store the content first.
- if ($inString === ''
- && $inComment === ''
- && trim($char) === ''
- && trim($buffer) !== ''
- ) {
- $tokens[] = array(
- 'code' => T_STRING,
- 'type' => 'T_STRING',
- 'content' => str_replace("\n", $eolChar, $buffer),
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($buffer);
- echo "\t=> Added token T_STRING ($content)".PHP_EOL;
- }
- $buffer = '';
- }
- }//end if
- // Process strings.
- if ($inComment === '' && isset($this->stringTokens[$char]) === true) {
- if ($inString === $char) {
- // This could be the end of the string, but make sure it
- // is not escaped first.
- $escapes = 0;
- for ($x = ($i - 1); $x >= 0; $x--) {
- if ($chars[$x] !== '\\') {
- break;
- }
- $escapes++;
- }
- if ($escapes === 0 || ($escapes % 2) === 0) {
- // There is an even number escape chars,
- // so this is not escaped, it is the end of the string.
- $tokens[] = array(
- 'code' => T_CONSTANT_ENCAPSED_STRING,
- 'type' => 'T_CONSTANT_ENCAPSED_STRING',
- 'content' => str_replace("\n", $eolChar, $buffer).$char,
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t\t* found end of string *".PHP_EOL;
- $content = PHP_CodeSniffer::prepareForOutput($buffer.$char);
- echo "\t=> Added token T_CONSTANT_ENCAPSED_STRING ($content)".PHP_EOL;
- }
- $buffer = '';
- $preStringBuffer = '';
- $inString = '';
- $stringChar = null;
- continue;
- }//end if
- } else if ($inString === '') {
- $inString = $char;
- $stringChar = $i;
- $preStringBuffer = $buffer;
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t\t* looking for string closer *".PHP_EOL;
- }
- }//end if
- }//end if
- if ($inString !== '' && $char === "\n") {
- // Unless this newline character is escaped, the string did not
- // end before the end of the line, which means it probably
- // wasn't a string at all (maybe a regex).
- if ($chars[($i - 1)] !== '\\') {
- $i = $stringChar;
- $buffer = $preStringBuffer;
- $preStringBuffer = '';
- $inString = '';
- $stringChar = null;
- $char = $chars[$i];
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t\t* found newline before end of string, bailing *".PHP_EOL;
- }
- }
- }
- $buffer .= $char;
- // We don't look for special tokens inside strings,
- // so if we are in a string, we can continue here now
- // that the current char is in the buffer.
- if ($inString !== '') {
- continue;
- }
- // Special case for T_DIVIDE which can actually be
- // the start of a regular expression.
- if ($buffer === $char && $char === '/' && $chars[($i + 1)] !== '*') {
- $regex = $this->getRegexToken(
- $i,
- $string,
- $chars,
- $tokens,
- $eolChar
- );
- if ($regex !== null) {
- $tokens[] = array(
- 'code' => T_REGULAR_EXPRESSION,
- 'type' => 'T_REGULAR_EXPRESSION',
- 'content' => $regex['content'],
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($regex['content']);
- echo "\t=> Added token T_REGULAR_EXPRESSION ($content)".PHP_EOL;
- }
- $i = $regex['end'];
- $buffer = '';
- $cleanBuffer = false;
- continue;
- }//end if
- }//end if
- // Check for known tokens, but ignore tokens found that are not at
- // the end of a string, like FOR and this.FORmat.
- if (isset($this->tokenValues[strtolower($buffer)]) === true
- && (preg_match('|[a-zA-z0-9_]|', $char) === 0
- || isset($chars[($i + 1)]) === false
- || preg_match('|[a-zA-z0-9_]|', $chars[($i + 1)]) === 0)
- ) {
- $matchedToken = false;
- $lookAheadLength = ($maxTokenLength - strlen($buffer));
- if ($lookAheadLength > 0) {
- // The buffer contains a token type, but we need
- // to look ahead at the next chars to see if this is
- // actually part of a larger token. For example,
- // FOR and FOREACH.
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t\t* buffer possibly contains token, looking ahead $lookAheadLength chars *".PHP_EOL;
- }
- $charBuffer = $buffer;
- for ($x = 1; $x <= $lookAheadLength; $x++) {
- if (isset($chars[($i + $x)]) === false) {
- break;
- }
- $charBuffer .= $chars[($i + $x)];
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
- echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
- }
- if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
- // We've found something larger that matches
- // so we can ignore this char. Except for 1 very specific
- // case where a comment like /**/ needs to tokenize as
- // T_COMMENT and not T_DOC_COMMENT.
- $oldType = $this->tokenValues[strtolower($buffer)];
- $newType = $this->tokenValues[strtolower($charBuffer)];
- if ($oldType === 'T_COMMENT'
- && $newType === 'T_DOC_COMMENT'
- && $chars[($i + $x + 1)] === '/'
- ) {
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t\t* look ahead ignored T_DOC_COMMENT, continuing *".PHP_EOL;
- }
- } else {
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t\t* look ahead found more specific token ($newType), ignoring $i *".PHP_EOL;
- }
- $matchedToken = true;
- break;
- }
- }//end if
- }//end for
- }//end if
- if ($matchedToken === false) {
- if (PHP_CODESNIFFER_VERBOSITY > 1 && $lookAheadLength > 0) {
- echo "\t\t* look ahead found nothing *".PHP_EOL;
- }
- $value = $this->tokenValues[strtolower($buffer)];
- if ($value === 'T_FUNCTION' && $buffer !== 'function') {
- // The function keyword needs to be all lowercase or else
- // it is just a function called "Function".
- $value = 'T_STRING';
- }
- $tokens[] = array(
- 'code' => constant($value),
- 'type' => $value,
- 'content' => $buffer,
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($buffer);
- echo "\t=> Added token $value ($content)".PHP_EOL;
- }
- $cleanBuffer = true;
- }//end if
- } else if (isset($this->tokenValues[strtolower($char)]) === true) {
- // No matter what token we end up using, we don't
- // need the content in the buffer any more because we have
- // found a valid token.
- $newContent = substr(str_replace("\n", $eolChar, $buffer), 0, -1);
- if ($newContent !== '') {
- $tokens[] = array(
- 'code' => T_STRING,
- 'type' => 'T_STRING',
- 'content' => $newContent,
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput(substr($buffer, 0, -1));
- echo "\t=> Added token T_STRING ($content)".PHP_EOL;
- }
- }
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t\t* char is token, looking ahead ".($maxTokenLength - 1).' chars *'.PHP_EOL;
- }
- // The char is a token type, but we need to look ahead at the
- // next chars to see if this is actually part of a larger token.
- // For example, = and ===.
- $charBuffer = $char;
- $matchedToken = false;
- for ($x = 1; $x <= $maxTokenLength; $x++) {
- if (isset($chars[($i + $x)]) === false) {
- break;
- }
- $charBuffer .= $chars[($i + $x)];
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($charBuffer);
- echo "\t\t=> Looking ahead $x chars => $content".PHP_EOL;
- }
- if (isset($this->tokenValues[strtolower($charBuffer)]) === true) {
- // We've found something larger that matches
- // so we can ignore this char.
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $type = $this->tokenValues[strtolower($charBuffer)];
- echo "\t\t* look ahead found more specific token ($type), ignoring $i *".PHP_EOL;
- }
- $matchedToken = true;
- break;
- }
- }//end for
- if ($matchedToken === false) {
- $value = $this->tokenValues[strtolower($char)];
- $tokens[] = array(
- 'code' => constant($value),
- 'type' => $value,
- 'content' => $char,
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t\t* look ahead found nothing *".PHP_EOL;
- $content = PHP_CodeSniffer::prepareForOutput($char);
- echo "\t=> Added token $value ($content)".PHP_EOL;
- }
- $cleanBuffer = true;
- } else {
- $buffer = $char;
- }//end if
- }//end if
- // Keep track of content inside comments.
- if ($inComment === ''
- && array_key_exists($buffer, $this->commentTokens) === true
- ) {
- // This is not really a comment if the content
- // looks like \// (i.e., it is escaped).
- if (isset($chars[($i - 2)]) === true && $chars[($i - 2)] === '\\') {
- $lastToken = array_pop($tokens);
- $lastContent = $lastToken['content'];
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $value = $this->tokenValues[strtolower($lastContent)];
- $content = PHP_CodeSniffer::prepareForOutput($lastContent);
- echo "\t=> Removed token $value ($content)".PHP_EOL;
- }
- $lastChars = str_split($lastContent);
- $lastNumChars = count($lastChars);
- for ($x = 0; $x < $lastNumChars; $x++) {
- $lastChar = $lastChars[$x];
- $value = $this->tokenValues[strtolower($lastChar)];
- $tokens[] = array(
- 'code' => constant($value),
- 'type' => $value,
- 'content' => $lastChar,
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($lastChar);
- echo "\t=> Added token $value ($content)".PHP_EOL;
- }
- }
- } else {
- // We have started a comment.
- $inComment = $buffer;
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t\t* looking for end of comment *".PHP_EOL;
- }
- }//end if
- } else if ($inComment !== '') {
- if ($this->commentTokens[$inComment] === null) {
- // Comment ends at the next newline.
- if (strpos($buffer, "\n") !== false) {
- $inComment = '';
- }
- } else {
- if ($this->commentTokens[$inComment] === $buffer) {
- $inComment = '';
- }
- }
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- if ($inComment === '') {
- echo "\t\t* found end of comment *".PHP_EOL;
- }
- }
- if ($inComment === '' && $cleanBuffer === false) {
- $tokens[] = array(
- 'code' => T_STRING,
- 'type' => 'T_STRING',
- 'content' => str_replace("\n", $eolChar, $buffer),
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($buffer);
- echo "\t=> Added token T_STRING ($content)".PHP_EOL;
- }
- $buffer = '';
- }
- }//end if
- if ($cleanBuffer === true) {
- $buffer = '';
- $cleanBuffer = false;
- }
- }//end for
- if (empty($buffer) === false) {
- // Buffer contains whitespace from the end of the file.
- $tokens[] = array(
- 'code' => T_WHITESPACE,
- 'type' => 'T_WHITESPACE',
- 'content' => str_replace("\n", $eolChar, $buffer),
- );
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $content = PHP_CodeSniffer::prepareForOutput($buffer);
- echo "\t=> Added token T_WHITESPACE ($content)".PHP_EOL;
- }
- }
- $tokens[] = array(
- 'code' => T_CLOSE_TAG,
- 'type' => 'T_CLOSE_TAG',
- 'content' => '',
- );
- /*
- Now that we have done some basic tokenizing, we need to
- modify the tokens to join some together and split some apart
- so they match what the PHP tokenizer does.
- */
- $finalTokens = array();
- $newStackPtr = 0;
- $numTokens = count($tokens);
- for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
- $token = $tokens[$stackPtr];
- /*
- Look for comments and join the tokens together.
- */
- if ($token['code'] === T_COMMENT || $token['code'] === T_DOC_COMMENT) {
- $newContent = '';
- $tokenContent = $token['content'];
- $endContent = null;
- if (isset($this->commentTokens[$tokenContent]) === true) {
- $endContent = $this->commentTokens[$tokenContent];
- }
- while ($tokenContent !== $endContent) {
- if ($endContent === null
- && strpos($tokenContent, $eolChar) !== false
- ) {
- // A null end token means the comment ends at the end of
- // the line so we look for newlines and split the token.
- $tokens[$stackPtr]['content'] = substr(
- $tokenContent,
- (strpos($tokenContent, $eolChar) + strlen($eolChar))
- );
- $tokenContent = substr(
- $tokenContent,
- 0,
- (strpos($tokenContent, $eolChar) + strlen($eolChar))
- );
- // If the substr failed, skip the token as the content
- // will now be blank.
- if ($tokens[$stackPtr]['content'] !== false
- && $tokens[$stackPtr]['content'] !== ''
- ) {
- $stackPtr--;
- }
- break;
- }//end if
- $stackPtr++;
- $newContent .= $tokenContent;
- if (isset($tokens[$stackPtr]) === false) {
- break;
- }
- $tokenContent = $tokens[$stackPtr]['content'];
- }//end while
- if ($token['code'] === T_DOC_COMMENT) {
- $commentTokens = $commentTokenizer->tokenizeString($newContent.$tokenContent, $eolChar, $newStackPtr);
- foreach ($commentTokens as $commentToken) {
- $finalTokens[$newStackPtr] = $commentToken;
- $newStackPtr++;
- }
- continue;
- } else {
- // Save the new content in the current token so
- // the code below can chop it up on newlines.
- $token['content'] = $newContent.$tokenContent;
- }
- }//end if
- /*
- If this token has newlines in its content, split each line up
- and create a new token for each line. We do this so it's easier
- to ascertain where errors occur on a line.
- Note that $token[1] is the token's content.
- */
- if (strpos($token['content'], $eolChar) !== false) {
- $tokenLines = explode($eolChar, $token['content']);
- $numLines = count($tokenLines);
- for ($i = 0; $i < $numLines; $i++) {
- $newToken['content'] = $tokenLines[$i];
- if ($i === ($numLines - 1)) {
- if ($tokenLines[$i] === '') {
- break;
- }
- } else {
- $newToken['content'] .= $eolChar;
- }
- $newToken['type'] = $token['type'];
- $newToken['code'] = $token['code'];
- $finalTokens[$newStackPtr] = $newToken;
- $newStackPtr++;
- }
- } else {
- $finalTokens[$newStackPtr] = $token;
- $newStackPtr++;
- }//end if
- // Convert numbers, including decimals.
- if ($token['code'] === T_STRING
- || $token['code'] === T_OBJECT_OPERATOR
- ) {
- $newContent = '';
- $oldStackPtr = $stackPtr;
- while (preg_match('|^[0-9\.]+$|', $tokens[$stackPtr]['content']) !== 0) {
- $newContent .= $tokens[$stackPtr]['content'];
- $stackPtr++;
- }
- if ($newContent !== '' && $newContent !== '.') {
- $finalTokens[($newStackPtr - 1)]['content'] = $newContent;
- if (ctype_digit($newContent) === true) {
- $finalTokens[($newStackPtr - 1)]['code'] = constant('T_LNUMBER');
- $finalTokens[($newStackPtr - 1)]['type'] = 'T_LNUMBER';
- } else {
- $finalTokens[($newStackPtr - 1)]['code'] = constant('T_DNUMBER');
- $finalTokens[($newStackPtr - 1)]['type'] = 'T_DNUMBER';
- }
- $stackPtr--;
- continue;
- } else {
- $stackPtr = $oldStackPtr;
- }
- }//end if
- // Convert the token after an object operator into a string, in most cases.
- if ($token['code'] === T_OBJECT_OPERATOR) {
- for ($i = ($stackPtr + 1); $i < $numTokens; $i++) {
- if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
- continue;
- }
- if ($tokens[$i]['code'] !== T_PROTOTYPE
- && $tokens[$i]['code'] !== T_LNUMBER
- && $tokens[$i]['code'] !== T_DNUMBER
- ) {
- $tokens[$i]['code'] = T_STRING;
- $tokens[$i]['type'] = 'T_STRING';
- }
- break;
- }
- }
- }//end for
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t*** END TOKENIZING ***".PHP_EOL;
- }
- return $finalTokens;
- }//end tokenizeString()
- /**
- * Tokenizes a regular expression if one is found.
- *
- * If a regular expression is not found, NULL is returned.
- *
- * @param string $char The index of the possible regex start character.
- * @param string $string The complete content of the string being tokenized.
- * @param string $chars An array of characters being tokenized.
- * @param string $tokens The current array of tokens found in the string.
- * @param string $eolChar The EOL character to use for splitting strings.
- *
- * @return void
- */
- public function getRegexToken($char, $string, $chars, $tokens, $eolChar)
- {
- $beforeTokens = array(
- T_EQUAL => true,
- T_IS_NOT_EQUAL => true,
- T_IS_IDENTICAL => true,
- T_IS_NOT_IDENTICAL => true,
- T_OPEN_PARENTHESIS => true,
- T_OPEN_SQUARE_BRACKET => true,
- T_RETURN => true,
- T_BOOLEAN_OR => true,
- T_BOOLEAN_AND => true,
- T_BITWISE_OR => true,
- T_BITWISE_AND => true,
- T_COMMA => true,
- T_COLON => true,
- T_TYPEOF => true,
- T_INLINE_THEN => true,
- T_INLINE_ELSE => true,
- );
- $afterTokens = array(
- ',' => true,
- ')' => true,
- ']' => true,
- ';' => true,
- ' ' => true,
- '.' => true,
- ':' => true,
- $eolChar => true,
- );
- // Find the last non-whitespace token that was added
- // to the tokens array.
- $numTokens = count($tokens);
- for ($prev = ($numTokens - 1); $prev >= 0; $prev--) {
- if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$prev]['code']]) === false) {
- break;
- }
- }
- if (isset($beforeTokens[$tokens[$prev]['code']]) === false) {
- return null;
- }
- // This is probably a regular expression, so look for the end of it.
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t* token possibly starts a regular expression *".PHP_EOL;
- }
- $numChars = count($chars);
- for ($next = ($char + 1); $next < $numChars; $next++) {
- if ($chars[$next] === '/') {
- // Just make sure this is not escaped first.
- if ($chars[($next - 1)] !== '\\') {
- // In the simple form: /.../ so we found the end.
- break;
- } else if ($chars[($next - 2)] === '\\') {
- // In the form: /...\\/ so we found the end.
- break;
- }
- } else {
- $possibleEolChar = substr($string, $next, strlen($eolChar));
- if ($possibleEolChar === $eolChar) {
- // This is the last token on the line and regular
- // expressions need to be defined on a single line,
- // so this is not a regular expression.
- break;
- }
- }
- }
- if ($chars[$next] !== '/') {
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t* could not find end of regular expression *".PHP_EOL;
- }
- return null;
- }
- while (preg_match('|[a-zA-Z]|', $chars[($next + 1)]) !== 0) {
- // The token directly after the end of the regex can
- // be modifiers like global and case insensitive
- // (.e.g, /pattern/gi).
- $next++;
- }
- $regexEnd = $next;
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t* found end of regular expression at token $regexEnd *".PHP_EOL;
- }
- for ($next = ($next + 1); $next < $numChars; $next++) {
- if ($chars[$next] !== ' ') {
- break;
- } else {
- $possibleEolChar = substr($string, $next, strlen($eolChar));
- if ($possibleEolChar === $eolChar) {
- // This is the last token on the line.
- break;
- }
- }
- }
- if (isset($afterTokens[$chars[$next]]) === false) {
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t* tokens after regular expression do not look correct *".PHP_EOL;
- }
- return null;
- }
- // This is a regular expression, so join all the tokens together.
- $content = '';
- for ($x = $char; $x <= $regexEnd; $x++) {
- $content .= $chars[$x];
- }
- $token = array(
- 'start' => $char,
- 'end' => $regexEnd,
- 'content' => $content,
- );
- return $token;
- }//end getRegexToken()
- /**
- * Performs additional processing after main tokenizing.
- *
- * This additional processing looks for properties, closures, labels and objects.
- *
- * @param array $tokens The array of tokens to process.
- * @param string $eolChar The EOL character to use for splitting strings.
- *
- * @return void
- */
- public function processAdditional(&$tokens, $eolChar)
- {
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t*** START ADDITIONAL JS PROCESSING ***".PHP_EOL;
- }
- $numTokens = count($tokens);
- $classStack = array();
- for ($i = 0; $i < $numTokens; $i++) {
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $type = $tokens[$i]['type'];
- $content = PHP_CodeSniffer::prepareForOutput($tokens[$i]['content']);
- echo str_repeat("\t", count($classStack));
- echo "\tProcess token $i: $type => $content".PHP_EOL;
- }
- // Looking for functions that are actually closures.
- if ($tokens[$i]['code'] === T_FUNCTION && isset($tokens[$i]['scope_opener']) === true) {
- for ($x = ($i + 1); $x < $numTokens; $x++) {
- if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$x]['code']]) === false) {
- break;
- }
- }
- if ($tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
- $tokens[$i]['code'] = T_CLOSURE;
- $tokens[$i]['type'] = 'T_CLOSURE';
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $line = $tokens[$i]['line'];
- echo str_repeat("\t", count($classStack));
- echo "\t* token $i on line $line changed from T_FUNCTION to T_CLOSURE".PHP_EOL;
- }
- for ($x = ($tokens[$i]['scope_opener'] + 1); $x < $tokens[$i]['scope_closer']; $x++) {
- if (isset($tokens[$x]['conditions'][$i]) === false) {
- continue;
- }
- $tokens[$x]['conditions'][$i] = T_CLOSURE;
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $type = $tokens[$x]['type'];
- echo str_repeat("\t", count($classStack));
- echo "\t\t* cleaned $x ($type) *".PHP_EOL;
- }
- }
- }//end if
- continue;
- } else if ($tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
- && isset($tokens[$i]['scope_condition']) === false
- && isset($tokens[$i]['bracket_closer']) === true
- ) {
- $classStack[] = $i;
- $closer = $tokens[$i]['bracket_closer'];
- $tokens[$i]['code'] = T_OBJECT;
- $tokens[$i]['type'] = 'T_OBJECT';
- $tokens[$closer]['code'] = T_CLOSE_OBJECT;
- $tokens[$closer]['type'] = 'T_CLOSE_OBJECT';
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo str_repeat("\t", count($classStack));
- echo "\t* token $i converted from T_OPEN_CURLY_BRACKET to T_OBJECT *".PHP_EOL;
- echo str_repeat("\t", count($classStack));
- echo "\t* token $closer converted from T_CLOSE_CURLY_BRACKET to T_CLOSE_OBJECT *".PHP_EOL;
- }
- for ($x = ($i + 1); $x < $closer; $x++) {
- $tokens[$x]['conditions'][$i] = T_OBJECT;
- ksort($tokens[$x]['conditions'], SORT_NUMERIC);
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- $type = $tokens[$x]['type'];
- echo str_repeat("\t", count($classStack));
- echo "\t\t* added T_OBJECT condition to $x ($type) *".PHP_EOL;
- }
- }
- } else if ($tokens[$i]['code'] === T_CLOSE_OBJECT) {
- $opener = array_pop($classStack);
- } else if ($tokens[$i]['code'] === T_COLON) {
- // If it is a scope opener, it belongs to a
- // DEFAULT or CASE statement.
- if (isset($tokens[$i]['scope_condition']) === true) {
- continue;
- }
- // Make sure this is not part of an inline IF statement.
- for ($x = ($i - 1); $x >= 0; $x--) {
- if ($tokens[$x]['code'] === T_INLINE_THEN) {
- $tokens[$i]['code'] = T_INLINE_ELSE;
- $tokens[$i]['type'] = 'T_INLINE_ELSE';
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo str_repeat("\t", count($classStack));
- echo "\t* token $i converted from T_COLON to T_INLINE_THEN *".PHP_EOL;
- }
- continue(2);
- } else if ($tokens[$x]['line'] < $tokens[$i]['line']) {
- break;
- }
- }
- // The string to the left of the colon is either a property or label.
- for ($label = ($i - 1); $label >= 0; $label--) {
- if (isset(PHP_CodeSniffer_Tokens::$emptyTokens[$tokens[$label]['code']]) === false) {
- break;
- }
- }
- if ($tokens[$label]['code'] !== T_STRING
- && $tokens[$label]['code'] !== T_CONSTANT_ENCAPSED_STRING
- ) {
- continue;
- }
- if (empty($classStack) === false) {
- $tokens[$label]['code'] = T_PROPERTY;
- $tokens[$label]['type'] = 'T_PROPERTY';
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo str_repeat("\t", count($classStack));
- echo "\t* token $label converted from T_STRING to T_PROPERTY *".PHP_EOL;
- }
- } else {
- $tokens[$label]['code'] = T_LABEL;
- $tokens[$label]['type'] = 'T_LABEL';
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo str_repeat("\t", count($classStack));
- echo "\t* token $label converted from T_STRING to T_LABEL *".PHP_EOL;
- }
- }//end if
- }//end if
- }//end for
- if (PHP_CODESNIFFER_VERBOSITY > 1) {
- echo "\t*** END ADDITIONAL JS PROCESSING ***".PHP_EOL;
- }
- }//end processAdditional()
- }//end class
|