CONTRIBUTING.md 0000644 00000000745 13663045604 0007012 0 ustar 00 # CONTRIBUTING
Make sure you read our [contributing guide][contributing guide on the website].
[contributing guide on the website]:https://www.doctrine-project.org/contribute
## Installing dependencies
```shell
composer install
composer bin all install
```
## Running checks locally
Here is a script to run all checks, you can use it as a git hook:
```shell
#!/bin/bash -eu
vendor/bin/phpunit --testdox
vendor/bin/psalm
echo '' | vendor/bin/phpcs
vendor/bin/phpstan analyze
```
LICENSE.txt 0000644 00000002116 13663045604 0006376 0 ustar 00 The MIT License (MIT)
Copyright (c) 2013 Jeremy Dorn
php bin/sql-formatter \"SELECT * FROM MyTable WHERE (id>5 AND \\`name\\` LIKE \\"testing\\");\"
";
echo "echo \"SELECT * FROM MyTable WHERE (id>5 AND \\`name\\` LIKE \\"testing\\");\" | php bin/sql-formatter
";
exit;
}
if(isset($argv[1])) {
$sql = $argv[1];
}
else {
$sql = stream_get_contents(fopen("php://stdin", "r"));
}
require_once(__DIR__.'/../lib/SqlFormatter.php');
/**
* Returns true if the stream supports colorization.
*
* Colorization is disabled if not supported by the stream:
*
* - Windows without Ansicon and ConEmu
* - non tty consoles
*
* @return bool true if the stream supports colorization, false otherwise
* @link https://github.com/symfony/symfony/blob/v2.4.6/src/Symfony/Component/Console/Output/StreamOutput.php#L97
*/
function hasColorSupport() {
if (DIRECTORY_SEPARATOR == '\\') {
return false !== getenv('ANSICON') || 'ON' === getenv('ConEmuANSI');
}
return function_exists('posix_isatty') && @posix_isatty(STDOUT);
}
$highlight = hasColorSupport();
echo SqlFormatter::format($sql, $highlight);
composer.json 0000644 00000001733 13663045604 0007301 0 ustar 00 {
"name": "doctrine/sql-formatter",
"description": "a PHP SQL highlighting library",
"homepage": "https://github.com/doctrine/sql-formatter/",
"keywords": ["sql", "highlight"],
"license": "MIT",
"type": "library",
"require": {
"php": "^7.1"
},
"require-dev": {
"bamarni/composer-bin-plugin": "^1.4"
},
"authors": [
{
"name": "Jeremy Dorn",
"email": "jeremy@jeremydorn.com",
"homepage": "http://jeremydorn.com/"
}
],
"autoload": {
"psr-4": {
"Doctrine\\SqlFormatter\\": "src"
}
},
"autoload-dev": {
"psr-4": {
"Doctrine\\SqlFormatter\\Tests\\": "tests"
}
},
"config": {
"sort-packages": true,
"platform": {
"php": "7.1.0"
}
},
"extra": {
"branch-alias": {
"dev-master": "1.x-dev"
}
},
"bin": ["bin/sql-formatter"]
}
src/CliHighlighter.php 0000644 00000004041 13663045604 0010740 0 ustar 00 */
private $escapeSequences;
/**
* @param arrayhtmlAttributes[self::HIGHLIGHT_PRE] . '>' . $string . ''; } } src/NullHighlighter.php 0000644 00000001007 13663045604 0011142 0 ustar 00 tokenizer = new Tokenizer(); $this->highlighter = $highlighter ?? (PHP_SAPI === 'cli' ? new CliHighlighter() : new HtmlHighlighter()); } /** * Format the whitespace in a SQL string to make it easier to read. * * @param string $string The SQL string * * @return string The SQL string with HTML styles and formatting wrapped in a
tag */ public function format(string $string, string $indentString = ' ') : string { // This variable will be populated with formatted html $return = ''; // Use an actual tab while formatting and then switch out with $indentString at the end $tab = "\t"; $indentLevel = 0; $newline = false; $inlineParentheses = false; $increaseSpecialIndent = false; $increaseBlockIndent = false; $indentTypes = []; $addedNewline = false; $inlineCount = 0; $inlineIndented = false; $clauseLimit = false; // Tokenize String $cursor = $this->tokenizer->tokenize($string); // Format token by token while ($token = $cursor->next(Token::TOKEN_TYPE_WHITESPACE)) { $highlighted = $this->highlighter->highlightToken( $token->type(), $token->value() ); // If we are increasing the special indent level now if ($increaseSpecialIndent) { $indentLevel++; $increaseSpecialIndent = false; array_unshift($indentTypes, 'special'); } // If we are increasing the block indent level now if ($increaseBlockIndent) { $indentLevel++; $increaseBlockIndent = false; array_unshift($indentTypes, 'block'); } // If we need a new line before the token if ($newline) { $return = rtrim($return, ' '); $return .= "\n" . str_repeat($tab, $indentLevel); $newline = false; $addedNewline = true; } else { $addedNewline = false; } // Display comments directly where they appear in the source if ($token->isOfType(Token::TOKEN_TYPE_COMMENT, Token::TOKEN_TYPE_BLOCK_COMMENT)) { if ($token->isOfType(Token::TOKEN_TYPE_BLOCK_COMMENT)) { $indent = str_repeat($tab, $indentLevel); $return = rtrim($return, " \t"); $return .= "\n" . $indent; $highlighted = str_replace("\n", "\n" . $indent, $highlighted); } $return .= $highlighted; $newline = true; continue; } if ($inlineParentheses) { // End of inline parentheses if ($token->value() === ')') { $return = rtrim($return, ' '); if ($inlineIndented) { array_shift($indentTypes); $indentLevel--; $return = rtrim($return, ' '); $return .= "\n" . str_repeat($tab, $indentLevel); } $inlineParentheses = false; $return .= $highlighted . ' '; continue; } if ($token->value() === ',') { if ($inlineCount >= 30) { $inlineCount = 0; $newline = true; } } $inlineCount += strlen($token->value()); } // Opening parentheses increase the block indent level and start a new line if ($token->value() === '(') { // First check if this should be an inline parentheses block // Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2) // Allow up to 3 non-whitespace tokens inside inline parentheses $length = 0; $subCursor = $cursor->subCursor(); for ($j=1; $j<=250; $j++) { // Reached end of string $next = $subCursor->next(Token::TOKEN_TYPE_WHITESPACE); if (! $next) { break; } // Reached closing parentheses, able to inline it if ($next->value() === ')') { $inlineParentheses = true; $inlineCount = 0; $inlineIndented = false; break; } // Reached an invalid token for inline parentheses if ($next->value()===';' || $next->value()==='(') { break; } // Reached an invalid token type for inline parentheses if ($next->isOfType( Token::TOKEN_TYPE_RESERVED_TOPLEVEL, Token::TOKEN_TYPE_RESERVED_NEWLINE, Token::TOKEN_TYPE_COMMENT, Token::TOKEN_TYPE_BLOCK_COMMENT )) { break; } $length += strlen($next->value()); } if ($inlineParentheses && $length > 30) { $increaseBlockIndent = true; $inlineIndented = true; $newline = true; } // Take out the preceding space unless there was whitespace there in the original query $prevToken = $cursor->subCursor()->previous(); if ($prevToken && ! $prevToken->isOfType(Token::TOKEN_TYPE_WHITESPACE)) { $return = rtrim($return, ' '); } if (! $inlineParentheses) { $increaseBlockIndent = true; // Add a newline after the parentheses $newline = true; } } elseif ($token->value() === ')') { // Closing parentheses decrease the block indent level // Remove whitespace before the closing parentheses $return = rtrim($return, ' '); $indentLevel--; // Reset indent level while ($j=array_shift($indentTypes)) { if ($j!=='special') { break; } $indentLevel--; } if ($indentLevel < 0) { // This is an error $indentLevel = 0; $return .= $this->highlighter->highlightError($token->value()); continue; } // Add a newline before the closing parentheses (if not already added) if (! $addedNewline) { $return .= "\n" . str_repeat($tab, $indentLevel); } } elseif ($token->isOfType(Token::TOKEN_TYPE_RESERVED_TOPLEVEL)) { // Top level reserved words start a new line and increase the special indent level $increaseSpecialIndent = true; // If the last indent type was 'special', decrease the special indent for this round reset($indentTypes); if (current($indentTypes)==='special') { $indentLevel--; array_shift($indentTypes); } // Add a newline after the top level reserved word $newline = true; // Add a newline before the top level reserved word (if not already added) if (! $addedNewline) { $return = rtrim($return, ' '); $return .= "\n" . str_repeat($tab, $indentLevel); } else { // If we already added a newline, redo the indentation since it may be different now $return = rtrim($return, $tab) . str_repeat($tab, $indentLevel); } if ($token->hasExtraWhitespace()) { $highlighted = preg_replace('/\s+/', ' ', $highlighted); } //if SQL 'LIMIT' clause, start variable to reset newline if ($token->value() === 'LIMIT' && ! $inlineParentheses) { $clauseLimit = true; } } elseif ($clauseLimit && $token->value() !== ',' && ! $token->isOfType(Token::TOKEN_TYPE_NUMBER, Token::TOKEN_TYPE_WHITESPACE)) { // Checks if we are out of the limit clause $clauseLimit = false; } elseif ($token->value() === ',' && ! $inlineParentheses) { // Commas start a new line (unless within inline parentheses or SQL 'LIMIT' clause) //If the previous TOKEN_VALUE is 'LIMIT', resets new line if ($clauseLimit === true) { $newline = false; $clauseLimit = false; } else { // All other cases of commas $newline = true; } } elseif ($token->isOfType(Token::TOKEN_TYPE_RESERVED_NEWLINE)) { // Newline reserved words start a new line // Add a newline before the reserved word (if not already added) if (! $addedNewline) { $return = rtrim($return, ' '); $return .= "\n" . str_repeat($tab, $indentLevel); } if ($token->hasExtraWhitespace()) { $highlighted = preg_replace('/\s+/', ' ', $highlighted); } } elseif ($token->isOfType(Token::TOKEN_TYPE_BOUNDARY)) { // Multiple boundary characters in a row should not have spaces between them (not including parentheses) $prevNotWhitespaceToken = $cursor->subCursor()->previous(Token::TOKEN_TYPE_WHITESPACE); if ($prevNotWhitespaceToken && $prevNotWhitespaceToken->isOfType(Token::TOKEN_TYPE_BOUNDARY)) { $prevToken = $cursor->subCursor()->previous(); if ($prevToken && ! $prevToken->isOfType(Token::TOKEN_TYPE_WHITESPACE)) { $return = rtrim($return, ' '); } } } // If the token shouldn't have a space before it if ($token->value() === '.' || $token->value() === ',' || $token->value() === ';') { $return = rtrim($return, ' '); } $return .= $highlighted . ' '; // If the token shouldn't have a space after it if ($token->value() === '(' || $token->value() === '.') { $return = rtrim($return, ' '); } // If this is the "-" of a negative number, it shouldn't have a space after it if ($token->value() !== '-') { continue; } $nextNotWhitespace = $cursor->subCursor()->next(Token::TOKEN_TYPE_WHITESPACE); if (! $nextNotWhitespace || ! $nextNotWhitespace->isOfType(Token::TOKEN_TYPE_NUMBER)) { continue; } $prev = $cursor->subCursor()->previous(Token::TOKEN_TYPE_WHITESPACE); if (! $prev) { continue; } if ($prev->isOfType( Token::TOKEN_TYPE_QUOTE, Token::TOKEN_TYPE_BACKTICK_QUOTE, Token::TOKEN_TYPE_WORD, Token::TOKEN_TYPE_NUMBER )) { continue; } $return = rtrim($return, ' '); } // If there are unmatched parentheses if (array_search('block', $indentTypes) !== false) { $return = rtrim($return, ' '); $return .= $this->highlighter->highlightErrorMessage( 'WARNING: unclosed parentheses or section' ); } // Replace tab characters with the configuration tab character $return = trim(str_replace("\t", $indentString, $return)); return $this->highlighter->output($return); } /** * Add syntax highlighting to a SQL string * * @param string $string The SQL string * * @return string The SQL string with HTML styles applied */ public function highlight(string $string) : string { $cursor = $this->tokenizer->tokenize($string); $return = ''; while ($token = $cursor->next()) { $return .= $this->highlighter->highlightToken( $token->type(), $token->value() ); } return $this->highlighter->output($return); } /** * Compress a query by collapsing white space and removing comments * * @param string $string The SQL string * * @return string The SQL string without comments */ public function compress(string $string) : string { $result = ''; $cursor = $this->tokenizer->tokenize($string); $whitespace = true; while ($token = $cursor->next()) { // Skip comment tokens if ($token->isOfType(Token::TOKEN_TYPE_COMMENT, Token::TOKEN_TYPE_BLOCK_COMMENT)) { continue; } // Remove extra whitespace in reserved words (e.g "OUTER JOIN" becomes "OUTER JOIN") if ($token->isOfType( Token::TOKEN_TYPE_RESERVED, Token::TOKEN_TYPE_RESERVED_NEWLINE, Token::TOKEN_TYPE_RESERVED_TOPLEVEL )) { $newValue = preg_replace('/\s+/', ' ', $token->value()); assert($newValue !== null); $token = $token->withValue($newValue); } if ($token->isOfType(Token::TOKEN_TYPE_WHITESPACE)) { // If the last token was whitespace, don't add another one if ($whitespace) { continue; } $whitespace = true; // Convert all whitespace to a single space $token = $token->withValue(' '); } else { $whitespace = false; } $result .= $token->value(); } return rtrim($result); } } src/Token.php 0000644 00000003357 13663045604 0007143 0 ustar 00 type = $type; $this->value = $value; } public function value() : string { return $this->value; } public function type() : int { return $this->type; } public function isOfType(int ...$types) : bool { return in_array($this->type, $types, true); } public function hasExtraWhitespace() : bool { return strpos($this->value(), ' ')!== false || strpos($this->value(), "\n") !== false || strpos($this->value(), "\t") !== false; } public function withValue(string $value) : self { return new self($this->type(), $value); } } src/Tokenizer.php 0000644 00000053661 13663045604 0010040 0 ustar 00 ', '+', '-', '*', '/', '!', '^', '%', '|', '&', '#', ]; /** * Stuff that only needs to be done once. Builds regular expressions and * sorts the reserved words. */ public function __construct() { // Sort reserved word list from longest word to shortest, 3x faster than usort $reservedMap = array_combine($this->reserved, array_map('strlen', $this->reserved)); assert($reservedMap !== false); arsort($reservedMap); $this->reserved = array_keys($reservedMap); // Set up regular expressions $this->regexBoundaries = '(' . implode( '|', $this->quoteRegex($this->boundaries) ) . ')'; $this->regexReserved = '(' . implode( '|', $this->quoteRegex($this->reserved) ) . ')'; $this->regexReservedToplevel = str_replace(' ', '\\s+', '(' . implode( '|', $this->quoteRegex($this->reservedToplevel) ) . ')'); $this->regexReservedNewline = str_replace(' ', '\\s+', '(' . implode( '|', $this->quoteRegex($this->reservedNewline) ) . ')'); $this->regexFunction = '(' . implode('|', $this->quoteRegex($this->functions)) . ')'; } /** * Takes a SQL string and breaks it into tokens. * Each token is an associative array with type and value. * * @param string $string The SQL string */ public function tokenize(string $string) : Cursor { $tokens = []; // Used to make sure the string keeps shrinking on each iteration $oldStringLen = strlen($string) + 1; $token = null; $currentLength = strlen($string); // Keep processing the string until it is empty while ($currentLength) { // If the string stopped shrinking, there was a problem if ($oldStringLen <= $currentLength) { $tokens[] = new Token(Token::TOKEN_TYPE_ERROR, $string); return new Cursor($tokens); } $oldStringLen = $currentLength; // Get the next token and the token type $token = $this->createNextToken($string, $token); $tokenLength = strlen($token->value()); $tokens[] = $token; // Advance the string $string = substr($string, $tokenLength); $currentLength -= $tokenLength; } return new Cursor($tokens); } /** * Return the next token and token type in a SQL string. * Quoted strings, comments, reserved words, whitespace, and punctuation * are all their own tokens. * * @param string $string The SQL string * @param Token|null $previous The result of the previous createNextToken() call * * @return Token An associative array containing the type and value of the token. */ private function createNextToken(string $string, ?Token $previous = null) : Token { $matches = []; // Whitespace if (preg_match('/^\s+/', $string, $matches)) { return new Token(Token::TOKEN_TYPE_WHITESPACE, $matches[0]); } // Comment if ($string[0] === '#' || (isset($string[1]) && ($string[0]==='-' && $string[1]==='-') || (isset($string[1]) && $string[0]==='/' && $string[1]==='*'))) { // Comment until end of line if ($string[0] === '-' || $string[0] === '#') { $last = strpos($string, "\n"); $type = Token::TOKEN_TYPE_COMMENT; } else { // Comment until closing comment tag $pos = strpos($string, '*/', 2); assert($pos !== false); $last = $pos + 2; $type = Token::TOKEN_TYPE_BLOCK_COMMENT; } if ($last === false) { $last = strlen($string); } return new Token($type, substr($string, 0, $last)); } // Quoted String if ($string[0]==='"' || $string[0]==='\'' || $string[0]==='`' || $string[0]==='[') { return new Token( ($string[0]==='`' || $string[0]==='[' ? Token::TOKEN_TYPE_BACKTICK_QUOTE : Token::TOKEN_TYPE_QUOTE), $this->getQuotedString($string) ); } // User-defined Variable if (($string[0] === '@' || $string[0] === ':') && isset($string[1])) { $value = null; $type = Token::TOKEN_TYPE_VARIABLE; // If the variable name is quoted if ($string[1]==='"' || $string[1]==='\'' || $string[1]==='`') { $value = $string[0] . $this->getQuotedString(substr($string, 1)); } else { // Non-quoted variable name preg_match('/^(' . $string[0] . '[a-zA-Z0-9\._\$]+)/', $string, $matches); if ($matches) { $value = $matches[1]; } } if ($value !== null) { return new Token($type, $value); } } // Number (decimal, binary, or hex) if (preg_match( '/^([0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)($|\s|"\'`|' . $this->regexBoundaries . ')/', $string, $matches )) { return new Token(Token::TOKEN_TYPE_NUMBER, $matches[1]); } // Boundary Character (punctuation and symbols) if (preg_match('/^(' . $this->regexBoundaries . ')/', $string, $matches)) { return new Token(Token::TOKEN_TYPE_BOUNDARY, $matches[1]); } // A reserved word cannot be preceded by a '.' // this makes it so in "mytable.from", "from" is not considered a reserved word if (! $previous || $previous->value() !== '.') { $upper = strtoupper($string); // Top Level Reserved Word if (preg_match( '/^(' . $this->regexReservedToplevel . ')($|\s|' . $this->regexBoundaries . ')/', $upper, $matches )) { return new Token( Token::TOKEN_TYPE_RESERVED_TOPLEVEL, substr($string, 0, strlen($matches[1])) ); } // Newline Reserved Word if (preg_match( '/^(' . $this->regexReservedNewline . ')($|\s|' . $this->regexBoundaries . ')/', $upper, $matches )) { return new Token( Token::TOKEN_TYPE_RESERVED_NEWLINE, substr($string, 0, strlen($matches[1])) ); } // Other Reserved Word if (preg_match( '/^(' . $this->regexReserved . ')($|\s|' . $this->regexBoundaries . ')/', $upper, $matches )) { return new Token( Token::TOKEN_TYPE_RESERVED, substr($string, 0, strlen($matches[1])) ); } } // A function must be succeeded by '(' // this makes it so "count(" is considered a function, but "count" alone is not $upper = strtoupper($string); // function if (preg_match('/^(' . $this->regexFunction . '[(]|\s|[)])/', $upper, $matches)) { return new Token( Token::TOKEN_TYPE_RESERVED, substr($string, 0, strlen($matches[1])-1) ); } // Non reserved word preg_match('/^(.*?)($|\s|["\'`]|' . $this->regexBoundaries . ')/', $string, $matches); return new Token(Token::TOKEN_TYPE_WORD, $matches[1]); } /** * Helper function for building regular expressions for reserved words and boundary characters * * @param string[] $strings The strings to be quoted * * @return string[] The quoted strings */ private function quoteRegex(array $strings) : array { return array_map(static function (string $string) : string { return preg_quote($string, '/'); }, $strings); } private function getQuotedString(string $string) : string { $ret = ''; // This checks for the following patterns: // 1. backtick quoted string using `` to escape // 2. square bracket quoted string (SQL Server) using ]] to escape // 3. double quoted string using "" or \" to escape // 4. single quoted string using '' or \' to escape if (preg_match( '/^(((`[^`]*($|`))+)| ((\[[^\]]*($|\]))(\][^\]]*($|\]))*)| (("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)| ((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/sx', $string, $matches )) { $ret = $matches[1]; } return $ret; } }