Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
36 / 36
100.00% covered (success)
100.00%
1 / 1
CRAP
100.00% covered (success)
100.00%
1 / 1
Strings
100.00% covered (success)
100.00%
36 / 36
100.00% covered (success)
100.00%
1 / 1
12
100.00% covered (success)
100.00%
1 / 1
 entropy
100.00% covered (success)
100.00%
36 / 36
100.00% covered (success)
100.00%
1 / 1
12
1<?php
2
3declare(strict_types=1);
4
5namespace Cosray\Util;
6
7class Strings
8{
9    /**
10     * Calculate entropy of a string.
11     */
12    public static function entropy(string $str): float
13    {
14        $classes = [
15            // lower case unicode characters
16            '/\p{Ll}/',
17            // upper case unicode characters
18            '/\p{Lu}/',
19            // unicode numbers
20            '/\p{N}/',
21        ];
22
23        $size = 0.0;
24        $str = trim($str);
25        $len = strlen($str);
26        $classCount = 0;
27
28        foreach ($classes as $pattern) {
29            $matches = [];
30
31            if (preg_match_all($pattern, $str, $matches)) {
32                $size += count(array_unique($matches[0]));
33                $classCount++;
34            }
35        }
36
37        // special characters
38        $matches = [];
39
40        foreach (str_split("/[ ,.?!\"┬ú$%^&*()-_=+[]{};:'@#~<>/\\|`┬¼┬ª]/", 1) as $char) {
41            if (!str_contains($str, $char)) {
42                continue;
43            }
44
45            $matches[] = $char;
46        }
47        $foundSpecialChars = count(array_unique($matches));
48
49        if ($foundSpecialChars > 0) {
50            $classCount++;
51            $size += $foundSpecialChars;
52        }
53
54        // Evaluate if its a simple string of chars next to each other
55        //   Like: abcdef or 1234
56        // This is only an approximation an should not add too much weight
57        // If this is below certain thresholds
58        $sumDiff = 1;
59        $chars = str_split($str, 1);
60
61        for ($i = 1; $i < count($chars); $i++) {
62            $sumDiff += abs(mb_ord($chars[$i - 1]) - mb_ord($chars[$i]));
63        }
64
65        // probably something like acegik...
66        if ($sumDiff <= 12) {
67            $len--;
68        }
69
70        // probably something like 12345 or aaabbb
71        if ($sumDiff <= 5) {
72            $len--;
73        }
74
75        if ($classCount > 0) {
76            $size += $classCount - 1;
77        }
78
79        if ($size === 0.0 || $len <= 0) {
80            return 0;
81        }
82
83        return log($size, 2) * $len;
84    }
85}