Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
78.79% |
26 / 33 |
|
33.33% |
1 / 3 |
CRAP | |
0.00% |
0 / 1 |
| Html | |
78.79% |
26 / 33 |
|
33.33% |
1 / 3 |
9.77 | |
0.00% |
0 / 1 |
| excerpt | |
50.00% |
3 / 6 |
|
0.00% |
0 / 1 |
2.50 | |||
| balanceTags | |
75.00% |
12 / 16 |
|
0.00% |
0 / 1 |
4.25 | |||
| sanitize | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
| 1 | <?php |
| 2 | |
| 3 | declare(strict_types=1); |
| 4 | |
| 5 | namespace Cosray\Util; |
| 6 | |
| 7 | use Symfony\Component\HtmlSanitizer\HtmlSanitizer; |
| 8 | use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; |
| 9 | |
| 10 | class Html |
| 11 | { |
| 12 | public static function excerpt( |
| 13 | string $html, |
| 14 | $limit = 30, |
| 15 | $allowedtags = '', |
| 16 | $ellipsis = '…', |
| 17 | ) { |
| 18 | $result = strip_tags($html, $allowedtags); |
| 19 | |
| 20 | if (str_word_count($result, 0) > $limit) { |
| 21 | $words = str_word_count($result, 2); |
| 22 | $pos = array_keys($words); |
| 23 | $result = substr($result, 0, $pos[$limit]) . $ellipsis; |
| 24 | } |
| 25 | |
| 26 | return self::balanceTags($result); |
| 27 | } |
| 28 | |
| 29 | /** |
| 30 | * Idea from this answer: https://stackoverflow.com/a/1725345. |
| 31 | * |
| 32 | * TODO: handle <br> etc. |
| 33 | */ |
| 34 | public static function balanceTags(string $text): string |
| 35 | { |
| 36 | // Find all opened tags in the front string |
| 37 | $tags = []; |
| 38 | preg_match_all( |
| 39 | '/<\\s*([a-z][\\w]*)(?: +\\w*\\s*=\\s*"[\\s\w?\\/%&=#^$_:()*^-]+")*\\s*>/i', |
| 40 | $text, |
| 41 | $tags, |
| 42 | PREG_OFFSET_CAPTURE, |
| 43 | ); |
| 44 | array_shift($tags); // get rid of the complete match from preg_match_all |
| 45 | |
| 46 | // Check if the opened arrays have been closed in the front string |
| 47 | $unclosed = []; |
| 48 | |
| 49 | foreach ($tags[0] as $t) { |
| 50 | [$tag, $pos] = $t; |
| 51 | |
| 52 | if (strpos($text, '</' . $tag, $pos) === false) { |
| 53 | $unclosed[] = $tag; |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | foreach (array_reverse($unclosed) as $tag) { |
| 58 | $text .= '</' . $tag . '>'; |
| 59 | } |
| 60 | |
| 61 | return $text; |
| 62 | } |
| 63 | |
| 64 | public static function sanitize( |
| 65 | string $html, |
| 66 | ?HtmlSanitizerConfig $config = null, |
| 67 | bool $removeEmptyLines = true, |
| 68 | ): string { |
| 69 | $config = $config ?: new HtmlSanitizerConfig() |
| 70 | // Allow "safe" elements and attributes. All scripts will be removed |
| 71 | // as well as other dangerous behaviors like CSS injection |
| 72 | ->allowStaticElements() |
| 73 | ->allowLinkSchemes(['http', 'https', 'mailto']) |
| 74 | ->allowRelativeLinks(); |
| 75 | $sanitizer = new HtmlSanitizer($config); |
| 76 | $result = $sanitizer->sanitize($html); |
| 77 | |
| 78 | // also remove empty lines |
| 79 | return $removeEmptyLines |
| 80 | ? preg_replace("/(^[\r\n]*|[\r\n]+)[\\s\t]*[\r\n]+/", PHP_EOL, $result) |
| 81 | : $result; |
| 82 | } |
| 83 | } |