Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
78.79% covered (warning)
78.79%
26 / 33
33.33% covered (danger)
33.33%
1 / 3
CRAP
0.00% covered (danger)
0.00%
0 / 1
Html
78.79% covered (warning)
78.79%
26 / 33
33.33% covered (danger)
33.33%
1 / 3
9.77
0.00% covered (danger)
0.00%
0 / 1
 excerpt
50.00% covered (danger)
50.00%
3 / 6
0.00% covered (danger)
0.00%
0 / 1
2.50
 balanceTags
75.00% covered (warning)
75.00%
12 / 16
0.00% covered (danger)
0.00%
0 / 1
4.25
 sanitize
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
3
1<?php
2
3declare(strict_types=1);
4
5namespace Cosray\Util;
6
7use Symfony\Component\HtmlSanitizer\HtmlSanitizer;
8use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig;
9
10class Html
11{
12    public static function excerpt(
13        string $html,
14        $limit = 30,
15        $allowedtags = '',
16        $ellipsis = '…',
17    ) {
18        $result = strip_tags($html, $allowedtags);
19
20        if (str_word_count($result, 0) > $limit) {
21            $words = str_word_count($result, 2);
22            $pos = array_keys($words);
23            $result = substr($result, 0, $pos[$limit]) . $ellipsis;
24        }
25
26        return self::balanceTags($result);
27    }
28
29    /**
30     * Idea from this answer: https://stackoverflow.com/a/1725345.
31     *
32     * TODO: handle <br> etc.
33     */
34    public static function balanceTags(string $text): string
35    {
36        // Find all opened tags in the front string
37        $tags = [];
38        preg_match_all(
39            '/<\\s*([a-z][\\w]*)(?: +\\w*\\s*=\\s*"[\\s\w?\\/%&=#^$_:()*^-]+")*\\s*>/i',
40            $text,
41            $tags,
42            PREG_OFFSET_CAPTURE,
43        );
44        array_shift($tags); // get rid of the complete match from preg_match_all
45
46        // Check if the opened arrays have been closed in the front string
47        $unclosed = [];
48
49        foreach ($tags[0] as $t) {
50            [$tag, $pos] = $t;
51
52            if (strpos($text, '</' . $tag, $pos) === false) {
53                $unclosed[] = $tag;
54            }
55        }
56
57        foreach (array_reverse($unclosed) as $tag) {
58            $text .= '</' . $tag . '>';
59        }
60
61        return $text;
62    }
63
64    public static function sanitize(
65        string $html,
66        ?HtmlSanitizerConfig $config = null,
67        bool $removeEmptyLines = true,
68    ): string {
69        $config = $config ?: new HtmlSanitizerConfig()
70            // Allow "safe" elements and attributes. All scripts will be removed
71            // as well as other dangerous behaviors like CSS injection
72            ->allowStaticElements()
73            ->allowLinkSchemes(['http', 'https', 'mailto'])
74            ->allowRelativeLinks();
75        $sanitizer = new HtmlSanitizer($config);
76        $result = $sanitizer->sanitize($html);
77
78        // also remove empty lines
79        return $removeEmptyLines
80            ? preg_replace("/(^[\r\n]*|[\r\n]+)[\\s\t]*[\r\n]+/", PHP_EOL, $result)
81            : $result;
82    }
83}