Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
98.94% |
186 / 188 |
|
88.89% |
16 / 18 |
CRAP | |
0.00% |
0 / 1 |
| QueryLexer | |
98.94% |
186 / 188 |
|
88.89% |
16 / 18 |
93 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| tokens | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
| scan | |
100.00% |
79 / 79 |
|
100.00% |
1 / 1 |
36 | |||
| addOperator | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| addBooleanOperator | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| isIdentifier | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| consumeIdentifier | |
100.00% |
23 / 23 |
|
100.00% |
1 / 1 |
14 | |||
| consumeNumber | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
7 | |||
| consumeString | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
7.01 | |||
| advance | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| peek | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| peekNext | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| matchNext | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
| atEnd | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| addToken | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| getLexeme | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| getIdentifierType | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
11 | |||
| error | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Cosray\Finder; |
| 6 | |
| 7 | use Cosray\Exception\ParserException; |
| 8 | use Cosray\Finder\Input\Token; |
| 9 | use Cosray\Finder\Input\TokenGroup; |
| 10 | use Cosray\Finder\Input\TokenType; |
| 11 | |
| 12 | final class QueryLexer |
| 13 | { |
| 14 | private int $start = 0; |
| 15 | private int $current = 0; |
| 16 | |
| 17 | /** @var list<Token> */ |
| 18 | private array $tokens = []; |
| 19 | |
| 20 | private readonly string $query; |
| 21 | private readonly array $source; |
| 22 | private readonly int $length; |
| 23 | |
| 24 | public function __construct( |
| 25 | private readonly array $builtins = [], |
| 26 | ) {} |
| 27 | |
| 28 | public function tokens(string $query): array |
| 29 | { |
| 30 | $this->query = $query; |
| 31 | $this->source = mb_str_split($query); |
| 32 | $this->length = count($this->source); |
| 33 | |
| 34 | while (!$this->atEnd()) { |
| 35 | $this->start = $this->current; |
| 36 | $this->scan(); |
| 37 | } |
| 38 | |
| 39 | return $this->tokens; |
| 40 | } |
| 41 | |
| 42 | private function scan(): void |
| 43 | { |
| 44 | $char = $this->advance(); |
| 45 | |
| 46 | switch ($char) { |
| 47 | case ' ': |
| 48 | case ',': // comma is ignored that it can be used inside of lists |
| 49 | case "\t": |
| 50 | case "\n": |
| 51 | case "\r": |
| 52 | break; |
| 53 | case '(': |
| 54 | $this->addToken(TokenGroup::LeftParen, TokenType::LeftParen); |
| 55 | break; |
| 56 | case ')': |
| 57 | $this->addToken(TokenGroup::RightParen, TokenType::RightParen); |
| 58 | break; |
| 59 | case '[': |
| 60 | $this->addToken(TokenGroup::LeftBracket, TokenType::LeftBracket); |
| 61 | break; |
| 62 | case ']': |
| 63 | $this->addToken(TokenGroup::RightBracket, TokenType::RightBracket); |
| 64 | break; |
| 65 | case '&': |
| 66 | $this->addBooleanOperator(TokenType::And); |
| 67 | break; |
| 68 | case '|': |
| 69 | $this->addBooleanOperator(TokenType::Or); |
| 70 | break; |
| 71 | case '=': |
| 72 | $this->addOperator(TokenType::Equal); |
| 73 | break; |
| 74 | case '@': |
| 75 | $this->addOperator(TokenType::In); |
| 76 | break; |
| 77 | case '~': |
| 78 | if ($this->matchNext('~')) { |
| 79 | if ($this->matchNext('*')) { |
| 80 | // ~~* |
| 81 | $this->addOperator(TokenType::ILike); |
| 82 | } else { |
| 83 | // ~~ |
| 84 | $this->addOperator(TokenType::Like); |
| 85 | } |
| 86 | } elseif ($this->matchNext('*')) { |
| 87 | // ~* |
| 88 | $this->addOperator(TokenType::IRegex); |
| 89 | } else { |
| 90 | // ~ |
| 91 | $this->addOperator(TokenType::Regex); |
| 92 | } |
| 93 | break; |
| 94 | case '!': |
| 95 | if ($this->matchNext('=')) { |
| 96 | // != |
| 97 | $this->addOperator(TokenType::Unequal); |
| 98 | } elseif ($this->matchNext('@')) { |
| 99 | $this->addOperator(TokenType::NotIn); |
| 100 | } elseif ($this->matchNext('~')) { |
| 101 | if ($this->matchNext('~')) { |
| 102 | if ($this->matchNext('*')) { |
| 103 | // !~~* |
| 104 | $this->addOperator(TokenType::IUnlike); |
| 105 | } else { |
| 106 | // !~~ |
| 107 | $this->addOperator(TokenType::Unlike); |
| 108 | } |
| 109 | } elseif ($this->matchNext('*')) { |
| 110 | // !~* |
| 111 | $this->addOperator(TokenType::INotRegex); |
| 112 | } else { |
| 113 | // !~ |
| 114 | $this->addOperator(TokenType::NotRegex); |
| 115 | } |
| 116 | } else { |
| 117 | $this->error( |
| 118 | "Invalid operator '!'. " |
| 119 | . "It can only be used in combination with '=' " |
| 120 | . "and '~', i. e. '!=' and '!~'", |
| 121 | ); |
| 122 | } |
| 123 | break; |
| 124 | case '>': |
| 125 | if ($this->matchNext('=')) { |
| 126 | $this->addOperator(TokenType::GreaterEqual); |
| 127 | } else { |
| 128 | $this->addOperator(TokenType::Greater); |
| 129 | } |
| 130 | break; |
| 131 | case '<': |
| 132 | if ($this->matchNext('=')) { |
| 133 | $this->addOperator(TokenType::LessEqual); |
| 134 | } else { |
| 135 | $this->addOperator(TokenType::Less); |
| 136 | } |
| 137 | break; |
| 138 | case '"': |
| 139 | case "'": |
| 140 | case '/': |
| 141 | $this->consumeString($char); |
| 142 | break; |
| 143 | default: |
| 144 | if (is_numeric($char) || $char === '-') { |
| 145 | $this->consumeNumber($char); |
| 146 | } elseif ($this->isIdentifier($char)) { |
| 147 | $this->consumeIdentifier(); |
| 148 | } else { |
| 149 | $this->error("Syntax error, unknown character '{$char}'"); |
| 150 | } |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | private function addOperator(TokenType $type): void |
| 155 | { |
| 156 | $this->addToken(TokenGroup::Operator, $type); |
| 157 | } |
| 158 | |
| 159 | private function addBooleanOperator(TokenType $type): void |
| 160 | { |
| 161 | $this->addToken(TokenGroup::BooleanOperator, $type); |
| 162 | } |
| 163 | |
| 164 | private function isIdentifier(string $char): bool |
| 165 | { |
| 166 | return ctype_alpha($char); |
| 167 | } |
| 168 | |
| 169 | private function consumeIdentifier(): void |
| 170 | { |
| 171 | $wasDot = false; |
| 172 | $wasSpecial = false; |
| 173 | |
| 174 | while (true) { |
| 175 | $char = $this->peek(); |
| 176 | $isDot = $char === '.'; |
| 177 | $isSpecial = $char === '*' || $char === '?'; |
| 178 | |
| 179 | $valid = |
| 180 | ctype_alpha($char) |
| 181 | || ctype_digit($char) |
| 182 | || $char === '_' |
| 183 | || $char === '-' |
| 184 | || $isDot |
| 185 | || $wasDot && $isSpecial; |
| 186 | |
| 187 | if ($valid && $wasSpecial && !$isDot) { |
| 188 | $this->error('Invalid use of special character (like ? or *) in identifier.'); |
| 189 | } |
| 190 | |
| 191 | $wasDot = $isDot; |
| 192 | $wasSpecial = $isSpecial; |
| 193 | |
| 194 | if ($valid && !$this->atEnd()) { |
| 195 | $this->advance(); |
| 196 | } else { |
| 197 | $lexeme = $this->getLexeme(); |
| 198 | $type = $this->getIdentifierType($lexeme); |
| 199 | $this->tokens[] = new Token(TokenGroup::Operand, $type, $this->start, $lexeme); |
| 200 | |
| 201 | break; |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | private function consumeNumber(string $char): void |
| 207 | { |
| 208 | if ($char === '-') { |
| 209 | if (!is_numeric($this->peek())) { |
| 210 | $this->error("Syntax error, unknown character '-'"); |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | while (is_numeric($this->peek())) { |
| 215 | $this->advance(); |
| 216 | } |
| 217 | |
| 218 | if ($this->peek() === '.') { |
| 219 | $this->advance(); |
| 220 | $hasFraction = false; |
| 221 | |
| 222 | while (is_numeric($this->peek())) { |
| 223 | $hasFraction = true; |
| 224 | $this->advance(); |
| 225 | } |
| 226 | |
| 227 | if (!$hasFraction) { |
| 228 | $this->error('Invalid number.'); |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | $this->addToken(TokenGroup::Operand, TokenType::Number); |
| 233 | } |
| 234 | |
| 235 | private function consumeString(string $char): void |
| 236 | { |
| 237 | while ($this->peek() !== $char && !$this->atEnd()) { |
| 238 | if ($this->peek() === '\\' && $this->peekNext() === $char) { |
| 239 | $this->advance(); |
| 240 | } |
| 241 | |
| 242 | $this->advance(); |
| 243 | } |
| 244 | |
| 245 | if ($this->atEnd()) { |
| 246 | $this->error('Unterminated string.'); |
| 247 | } |
| 248 | |
| 249 | // Hop to the closing " |
| 250 | $this->advance(); |
| 251 | |
| 252 | if ($this->start === $this->current) { |
| 253 | $lexeme = ''; |
| 254 | } else { |
| 255 | $start = $this->start + 1; |
| 256 | $length = $this->current - $this->start - 2; |
| 257 | $slice = array_slice($this->source, $start, $length); |
| 258 | $lexeme = implode('', $slice); |
| 259 | } |
| 260 | |
| 261 | // $this->afterValue = true; |
| 262 | $this->tokens[] = new Token( |
| 263 | TokenGroup::Operand, |
| 264 | TokenType::String, |
| 265 | $this->start, |
| 266 | str_replace('\\' . $char, $char, $lexeme), |
| 267 | ); |
| 268 | } |
| 269 | |
| 270 | private function advance(): string |
| 271 | { |
| 272 | $result = $this->source[$this->current]; |
| 273 | $this->current++; |
| 274 | |
| 275 | return $result; |
| 276 | } |
| 277 | |
| 278 | private function peek(): string |
| 279 | { |
| 280 | if ($this->atEnd()) { |
| 281 | return ''; |
| 282 | } |
| 283 | |
| 284 | return $this->source[$this->current]; |
| 285 | } |
| 286 | |
| 287 | private function peekNext(): string |
| 288 | { |
| 289 | if (($this->current + 1) > ($this->length - 1)) { |
| 290 | return ''; |
| 291 | } |
| 292 | |
| 293 | return $this->source[$this->current + 1]; |
| 294 | } |
| 295 | |
| 296 | private function matchNext(string $expected): bool |
| 297 | { |
| 298 | if ($this->atEnd()) { |
| 299 | return false; |
| 300 | } |
| 301 | |
| 302 | if ($this->source[$this->current] === $expected) { |
| 303 | $this->current++; |
| 304 | |
| 305 | return true; |
| 306 | } |
| 307 | |
| 308 | return false; |
| 309 | } |
| 310 | |
| 311 | private function atEnd(): bool |
| 312 | { |
| 313 | return $this->current > ($this->length - 1); |
| 314 | } |
| 315 | |
| 316 | private function addToken(TokenGroup $group, TokenType $type): void |
| 317 | { |
| 318 | $lexeme = $this->getLexeme(); |
| 319 | $this->tokens[] = new Token($group, $type, $this->start, $lexeme); |
| 320 | } |
| 321 | |
| 322 | private function getLexeme(): string |
| 323 | { |
| 324 | $length = $this->current - $this->start; |
| 325 | $slice = array_slice($this->source, $this->start, $length); |
| 326 | |
| 327 | return implode('', $slice); |
| 328 | } |
| 329 | |
| 330 | private function getIdentifierType(string $lexeme): TokenType |
| 331 | { |
| 332 | if (str_ends_with($lexeme, '.') || str_contains($lexeme, '..')) { |
| 333 | $this->error('Invalid use of dot (.) in indentifier'); |
| 334 | } |
| 335 | |
| 336 | switch ($lexeme) { |
| 337 | case 'true': |
| 338 | case 'false': |
| 339 | return TokenType::Boolean; |
| 340 | case 'null': |
| 341 | return TokenType::Null; |
| 342 | case 'now': |
| 343 | return TokenType::Keyword; |
| 344 | default: |
| 345 | if ($lexeme === 'path' || str_starts_with($lexeme, 'path.')) { |
| 346 | return TokenType::Path; |
| 347 | } |
| 348 | |
| 349 | if (in_array($lexeme, $this->builtins, true)) { |
| 350 | return TokenType::Builtin; |
| 351 | } |
| 352 | |
| 353 | return TokenType::Field; |
| 354 | } |
| 355 | } |
| 356 | |
| 357 | /** |
| 358 | * @throws ParserException |
| 359 | */ |
| 360 | private function error(string $msg): never |
| 361 | { |
| 362 | throw new ParserException( |
| 363 | "Parse error at position {$this->start}. {$msg}\n\n" |
| 364 | . "Query: `{$this->query}`\n" |
| 365 | . str_repeat(' ', $this->start + 8) |
| 366 | . str_repeat('^', $this->current - $this->start) |
| 367 | . "\n\n", |
| 368 | ); |
| 369 | } |
| 370 | } |