| 1: | <?php declare(strict_types = 1); |
| 2: | |
| 3: | namespace PHPStan\PhpDocParser\Parser; |
| 4: | |
| 5: | use PHPStan\ShouldNotHappenException; |
| 6: | use function chr; |
| 7: | use function hexdec; |
| 8: | use function octdec; |
| 9: | use function preg_replace_callback; |
| 10: | use function str_replace; |
| 11: | use function substr; |
| 12: | |
| 13: | class StringUnescaper |
| 14: | { |
| 15: | |
| 16: | private const REPLACEMENTS = [ |
| 17: | '\\' => '\\', |
| 18: | 'n' => "\n", |
| 19: | 'r' => "\r", |
| 20: | 't' => "\t", |
| 21: | 'f' => "\f", |
| 22: | 'v' => "\v", |
| 23: | 'e' => "\x1B", |
| 24: | ]; |
| 25: | |
| 26: | public static function unescapeString(string $string): string |
| 27: | { |
| 28: | $quote = $string[0]; |
| 29: | |
| 30: | if ($quote === '\'') { |
| 31: | return str_replace( |
| 32: | ['\\\\', '\\\''], |
| 33: | ['\\', '\''], |
| 34: | substr($string, 1, -1), |
| 35: | ); |
| 36: | } |
| 37: | |
| 38: | return self::parseEscapeSequences(substr($string, 1, -1), '"'); |
| 39: | } |
| 40: | |
| 41: | |
| 42: | |
| 43: | |
| 44: | private static function parseEscapeSequences(string $str, string $quote): string |
| 45: | { |
| 46: | $str = str_replace('\\' . $quote, $quote, $str); |
| 47: | |
| 48: | return preg_replace_callback( |
| 49: | '~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~', |
| 50: | static function ($matches) { |
| 51: | $str = $matches[1]; |
| 52: | |
| 53: | if (isset(self::REPLACEMENTS[$str])) { |
| 54: | return self::REPLACEMENTS[$str]; |
| 55: | } |
| 56: | if ($str[0] === 'x' || $str[0] === 'X') { |
| 57: | return chr((int) hexdec(substr($str, 1))); |
| 58: | } |
| 59: | if ($str[0] === 'u') { |
| 60: | if (!isset($matches[2])) { |
| 61: | throw new ShouldNotHappenException(); |
| 62: | } |
| 63: | return self::codePointToUtf8((int) hexdec($matches[2])); |
| 64: | } |
| 65: | |
| 66: | return chr((int) octdec($str)); |
| 67: | }, |
| 68: | $str, |
| 69: | ); |
| 70: | } |
| 71: | |
| 72: | |
| 73: | |
| 74: | |
| 75: | private static function codePointToUtf8(int $num): string |
| 76: | { |
| 77: | if ($num <= 0x7F) { |
| 78: | return chr($num); |
| 79: | } |
| 80: | if ($num <= 0x7FF) { |
| 81: | return chr(($num >> 6) + 0xC0) |
| 82: | . chr(($num & 0x3F) + 0x80); |
| 83: | } |
| 84: | if ($num <= 0xFFFF) { |
| 85: | return chr(($num >> 12) + 0xE0) |
| 86: | . chr((($num >> 6) & 0x3F) + 0x80) |
| 87: | . chr(($num & 0x3F) + 0x80); |
| 88: | } |
| 89: | if ($num <= 0x1FFFFF) { |
| 90: | return chr(($num >> 18) + 0xF0) |
| 91: | . chr((($num >> 12) & 0x3F) + 0x80) |
| 92: | . chr((($num >> 6) & 0x3F) + 0x80) |
| 93: | . chr(($num & 0x3F) + 0x80); |
| 94: | } |
| 95: | |
| 96: | |
| 97: | return "\xef\xbf\xbd"; |
| 98: | } |
| 99: | |
| 100: | } |
| 101: | |