Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.94% covered (success)
98.94%
186 / 188
88.89% covered (warning)
88.89%
16 / 18
CRAP
0.00% covered (danger)
0.00%
0 / 1
QueryLexer
98.94% covered (success)
98.94%
186 / 188
88.89% covered (warning)
88.89%
16 / 18
93
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 tokens
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
2
 scan
100.00% covered (success)
100.00%
79 / 79
100.00% covered (success)
100.00%
1 / 1
36
 addOperator
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 addBooleanOperator
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 isIdentifier
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 consumeIdentifier
100.00% covered (success)
100.00%
23 / 23
100.00% covered (success)
100.00%
1 / 1
14
 consumeNumber
100.00% covered (success)
100.00%
14 / 14
100.00% covered (success)
100.00%
1 / 1
7
 consumeString
94.74% covered (success)
94.74%
18 / 19
0.00% covered (danger)
0.00%
0 / 1
7.01
 advance
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 peek
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 peekNext
66.67% covered (warning)
66.67%
2 / 3
0.00% covered (danger)
0.00%
0 / 1
2.15
 matchNext
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
3
 atEnd
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 addToken
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getLexeme
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 getIdentifierType
100.00% covered (success)
100.00%
14 / 14
100.00% covered (success)
100.00%
1 / 1
11
 error
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3declare(strict_types=1);
4
5namespace Cosray\Finder;
6
7use Cosray\Exception\ParserException;
8use Cosray\Finder\Input\Token;
9use Cosray\Finder\Input\TokenGroup;
10use Cosray\Finder\Input\TokenType;
11
12final class QueryLexer
13{
14    private int $start = 0;
15    private int $current = 0;
16
17    /** @var list<Token> */
18    private array $tokens = [];
19
20    private readonly string $query;
21    private readonly array $source;
22    private readonly int $length;
23
24    public function __construct(
25        private readonly array $builtins = [],
26    ) {}
27
28    public function tokens(string $query): array
29    {
30        $this->query = $query;
31        $this->source = mb_str_split($query);
32        $this->length = count($this->source);
33
34        while (!$this->atEnd()) {
35            $this->start = $this->current;
36            $this->scan();
37        }
38
39        return $this->tokens;
40    }
41
42    private function scan(): void
43    {
44        $char = $this->advance();
45
46        switch ($char) {
47            case ' ':
48            case ',': // comma is ignored that it can be used inside of lists
49            case "\t":
50            case "\n":
51            case "\r":
52                break;
53            case '(':
54                $this->addToken(TokenGroup::LeftParen, TokenType::LeftParen);
55                break;
56            case ')':
57                $this->addToken(TokenGroup::RightParen, TokenType::RightParen);
58                break;
59            case '[':
60                $this->addToken(TokenGroup::LeftBracket, TokenType::LeftBracket);
61                break;
62            case ']':
63                $this->addToken(TokenGroup::RightBracket, TokenType::RightBracket);
64                break;
65            case '&':
66                $this->addBooleanOperator(TokenType::And);
67                break;
68            case '|':
69                $this->addBooleanOperator(TokenType::Or);
70                break;
71            case '=':
72                $this->addOperator(TokenType::Equal);
73                break;
74            case '@':
75                $this->addOperator(TokenType::In);
76                break;
77            case '~':
78                if ($this->matchNext('~')) {
79                    if ($this->matchNext('*')) {
80                        // ~~*
81                        $this->addOperator(TokenType::ILike);
82                    } else {
83                        // ~~
84                        $this->addOperator(TokenType::Like);
85                    }
86                } elseif ($this->matchNext('*')) {
87                    // ~*
88                    $this->addOperator(TokenType::IRegex);
89                } else {
90                    // ~
91                    $this->addOperator(TokenType::Regex);
92                }
93                break;
94            case '!':
95                if ($this->matchNext('=')) {
96                    // !=
97                    $this->addOperator(TokenType::Unequal);
98                } elseif ($this->matchNext('@')) {
99                    $this->addOperator(TokenType::NotIn);
100                } elseif ($this->matchNext('~')) {
101                    if ($this->matchNext('~')) {
102                        if ($this->matchNext('*')) {
103                            // !~~*
104                            $this->addOperator(TokenType::IUnlike);
105                        } else {
106                            // !~~
107                            $this->addOperator(TokenType::Unlike);
108                        }
109                    } elseif ($this->matchNext('*')) {
110                        // !~*
111                        $this->addOperator(TokenType::INotRegex);
112                    } else {
113                        // !~
114                        $this->addOperator(TokenType::NotRegex);
115                    }
116                } else {
117                    $this->error(
118                        "Invalid operator '!'. "
119                        . "It can only be used in combination with '=' "
120                        . "and '~', i. e. '!=' and '!~'",
121                    );
122                }
123                break;
124            case '>':
125                if ($this->matchNext('=')) {
126                    $this->addOperator(TokenType::GreaterEqual);
127                } else {
128                    $this->addOperator(TokenType::Greater);
129                }
130                break;
131            case '<':
132                if ($this->matchNext('=')) {
133                    $this->addOperator(TokenType::LessEqual);
134                } else {
135                    $this->addOperator(TokenType::Less);
136                }
137                break;
138            case '"':
139            case "'":
140            case '/':
141                $this->consumeString($char);
142                break;
143            default:
144                if (is_numeric($char) || $char === '-') {
145                    $this->consumeNumber($char);
146                } elseif ($this->isIdentifier($char)) {
147                    $this->consumeIdentifier();
148                } else {
149                    $this->error("Syntax error, unknown character '{$char}'");
150                }
151        }
152    }
153
154    private function addOperator(TokenType $type): void
155    {
156        $this->addToken(TokenGroup::Operator, $type);
157    }
158
159    private function addBooleanOperator(TokenType $type): void
160    {
161        $this->addToken(TokenGroup::BooleanOperator, $type);
162    }
163
164    private function isIdentifier(string $char): bool
165    {
166        return ctype_alpha($char);
167    }
168
169    private function consumeIdentifier(): void
170    {
171        $wasDot = false;
172        $wasSpecial = false;
173
174        while (true) {
175            $char = $this->peek();
176            $isDot = $char === '.';
177            $isSpecial = $char === '*' || $char === '?';
178
179            $valid =
180                ctype_alpha($char)
181                || ctype_digit($char)
182                || $char === '_'
183                || $char === '-'
184                || $isDot
185                || $wasDot && $isSpecial;
186
187            if ($valid && $wasSpecial && !$isDot) {
188                $this->error('Invalid use of special character (like ? or *) in identifier.');
189            }
190
191            $wasDot = $isDot;
192            $wasSpecial = $isSpecial;
193
194            if ($valid && !$this->atEnd()) {
195                $this->advance();
196            } else {
197                $lexeme = $this->getLexeme();
198                $type = $this->getIdentifierType($lexeme);
199                $this->tokens[] = new Token(TokenGroup::Operand, $type, $this->start, $lexeme);
200
201                break;
202            }
203        }
204    }
205
206    private function consumeNumber(string $char): void
207    {
208        if ($char === '-') {
209            if (!is_numeric($this->peek())) {
210                $this->error("Syntax error, unknown character '-'");
211            }
212        }
213
214        while (is_numeric($this->peek())) {
215            $this->advance();
216        }
217
218        if ($this->peek() === '.') {
219            $this->advance();
220            $hasFraction = false;
221
222            while (is_numeric($this->peek())) {
223                $hasFraction = true;
224                $this->advance();
225            }
226
227            if (!$hasFraction) {
228                $this->error('Invalid number.');
229            }
230        }
231
232        $this->addToken(TokenGroup::Operand, TokenType::Number);
233    }
234
235    private function consumeString(string $char): void
236    {
237        while ($this->peek() !== $char && !$this->atEnd()) {
238            if ($this->peek() === '\\' && $this->peekNext() === $char) {
239                $this->advance();
240            }
241
242            $this->advance();
243        }
244
245        if ($this->atEnd()) {
246            $this->error('Unterminated string.');
247        }
248
249        // Hop to the closing "
250        $this->advance();
251
252        if ($this->start === $this->current) {
253            $lexeme = '';
254        } else {
255            $start = $this->start + 1;
256            $length = $this->current - $this->start - 2;
257            $slice = array_slice($this->source, $start, $length);
258            $lexeme = implode('', $slice);
259        }
260
261        // $this->afterValue = true;
262        $this->tokens[] = new Token(
263            TokenGroup::Operand,
264            TokenType::String,
265            $this->start,
266            str_replace('\\' . $char, $char, $lexeme),
267        );
268    }
269
270    private function advance(): string
271    {
272        $result = $this->source[$this->current];
273        $this->current++;
274
275        return $result;
276    }
277
278    private function peek(): string
279    {
280        if ($this->atEnd()) {
281            return '';
282        }
283
284        return $this->source[$this->current];
285    }
286
287    private function peekNext(): string
288    {
289        if (($this->current + 1) > ($this->length - 1)) {
290            return '';
291        }
292
293        return $this->source[$this->current + 1];
294    }
295
296    private function matchNext(string $expected): bool
297    {
298        if ($this->atEnd()) {
299            return false;
300        }
301
302        if ($this->source[$this->current] === $expected) {
303            $this->current++;
304
305            return true;
306        }
307
308        return false;
309    }
310
311    private function atEnd(): bool
312    {
313        return $this->current > ($this->length - 1);
314    }
315
316    private function addToken(TokenGroup $group, TokenType $type): void
317    {
318        $lexeme = $this->getLexeme();
319        $this->tokens[] = new Token($group, $type, $this->start, $lexeme);
320    }
321
322    private function getLexeme(): string
323    {
324        $length = $this->current - $this->start;
325        $slice = array_slice($this->source, $this->start, $length);
326
327        return implode('', $slice);
328    }
329
330    private function getIdentifierType(string $lexeme): TokenType
331    {
332        if (str_ends_with($lexeme, '.') || str_contains($lexeme, '..')) {
333            $this->error('Invalid use of dot (.) in indentifier');
334        }
335
336        switch ($lexeme) {
337            case 'true':
338            case 'false':
339                return TokenType::Boolean;
340            case 'null':
341                return TokenType::Null;
342            case 'now':
343                return TokenType::Keyword;
344            default:
345                if ($lexeme === 'path' || str_starts_with($lexeme, 'path.')) {
346                    return TokenType::Path;
347                }
348
349                if (in_array($lexeme, $this->builtins, true)) {
350                    return TokenType::Builtin;
351                }
352
353                return TokenType::Field;
354        }
355    }
356
357    /**
358     * @throws ParserException
359     */
360    private function error(string $msg): never
361    {
362        throw new ParserException(
363            "Parse error at position {$this->start}{$msg}\n\n"
364            . "Query: `{$this->query}`\n"
365            . str_repeat(' ', $this->start + 8)
366            . str_repeat('^', $this->current - $this->start)
367            . "\n\n",
368        );
369    }
370}