1: <?php declare(strict_types = 1);
2:
3: namespace PHPStan\PhpDocParser\Parser;
4:
5: use PHPStan\ShouldNotHappenException;
6: use function chr;
7: use function hexdec;
8: use function octdec;
9: use function preg_replace_callback;
10: use function str_replace;
11: use function substr;
12:
13: class StringUnescaper
14: {
15:
16: private const REPLACEMENTS = [
17: '\\' => '\\',
18: 'n' => "\n",
19: 'r' => "\r",
20: 't' => "\t",
21: 'f' => "\f",
22: 'v' => "\v",
23: 'e' => "\x1B",
24: ];
25:
26: public static function unescapeString(string $string): string
27: {
28: $quote = $string[0];
29:
30: if ($quote === '\'') {
31: return str_replace(
32: ['\\\\', '\\\''],
33: ['\\', '\''],
34: substr($string, 1, -1),
35: );
36: }
37:
38: return self::parseEscapeSequences(substr($string, 1, -1), '"');
39: }
40:
41: /**
42: * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
43: */
44: private static function parseEscapeSequences(string $str, string $quote): string
45: {
46: $str = str_replace('\\' . $quote, $quote, $str);
47:
48: return preg_replace_callback(
49: '~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~',
50: static function ($matches) {
51: $str = $matches[1];
52:
53: if (isset(self::REPLACEMENTS[$str])) {
54: return self::REPLACEMENTS[$str];
55: }
56: if ($str[0] === 'x' || $str[0] === 'X') {
57: return chr((int) hexdec(substr($str, 1)));
58: }
59: if ($str[0] === 'u') {
60: if (!isset($matches[2])) {
61: throw new ShouldNotHappenException();
62: }
63: return self::codePointToUtf8((int) hexdec($matches[2]));
64: }
65:
66: return chr((int) octdec($str));
67: },
68: $str,
69: );
70: }
71:
72: /**
73: * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
74: */
75: private static function codePointToUtf8(int $num): string
76: {
77: if ($num <= 0x7F) {
78: return chr($num);
79: }
80: if ($num <= 0x7FF) {
81: return chr(($num >> 6) + 0xC0)
82: . chr(($num & 0x3F) + 0x80);
83: }
84: if ($num <= 0xFFFF) {
85: return chr(($num >> 12) + 0xE0)
86: . chr((($num >> 6) & 0x3F) + 0x80)
87: . chr(($num & 0x3F) + 0x80);
88: }
89: if ($num <= 0x1FFFFF) {
90: return chr(($num >> 18) + 0xF0)
91: . chr((($num >> 12) & 0x3F) + 0x80)
92: . chr((($num >> 6) & 0x3F) + 0x80)
93: . chr(($num & 0x3F) + 0x80);
94: }
95:
96: // Invalid UTF-8 codepoint escape sequence: Codepoint too large
97: return "\xef\xbf\xbd";
98: }
99:
100: }
101: