Last active
September 1, 2017 13:00
-
-
Save patsuckow/659d1715ccc7ef8fd87d00b31e15be1d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Получение кода символа Юникода | |
| * | |
| * @param string $utf8Char | |
| * Символ в кодировке UTF-8. Если в строке содержится больше одного символа | |
| * UTF-8, то учитывается только первый. | |
| * | |
| * @return int | |
| * Код символа из Юникода. | |
| * | |
| * @throws InvalidArgumentException | |
| */ | |
| function utf8ToCode($utf8Char) { | |
| $utf8Char = (string) $utf8Char; | |
| if ("" == $utf8Char) { | |
| throw new InvalidArgumentException("Empty string is not valid character"); | |
| } | |
| # [a, b, c, d, e, f] | |
| $bytes = array_map('ord', str_split(substr($utf8Char, 0, 6), 1)); | |
| # a, [b, c, d, e, f] | |
| $first = array_shift($bytes); | |
| # 0------- | |
| if ($first <= 0x7F) { | |
| return $first; | |
| } | |
| # 110----- 10------ | |
| elseif ($first >= 0xC0 && $first <= 0xDF) { | |
| $tail = 1; | |
| } | |
| # 1110---- 10------ 10------ | |
| elseif ($first >= 0xE0 && $first <= 0xEF) { | |
| $tail = 2; | |
| } | |
| # 11110--- 10------ 10------ 10------ | |
| elseif ($first >= 0xF0 && $first <= 0xF7) { | |
| $tail = 3; | |
| } | |
| # 111110-- 10------ 10------ 10------ 10------ | |
| elseif ($first >= 0xF8 && $first <= 0xFB) { | |
| $tail = 4; | |
| } | |
| # 1111110- 10------ 10------ 10------ 10------ 10------ | |
| elseif ($first >= 0xFC && $first <= 0xFD) { | |
| $tail = 5; | |
| } | |
| else { | |
| throw new InvalidArgumentException("First byte is not valid"); | |
| } | |
| if (count($bytes) < $tail) { | |
| throw new InvalidArgumentException("Corrupted character: $tail tail bytes required"); | |
| } | |
| $code = ($first & (0x3F >> $tail)) << ($tail * 6); | |
| $tails = array_slice($bytes, 0, $tail); | |
| foreach ($tails as $i => $byte) { | |
| $code |= ($byte & 0x3F) << (($tail - 1 - $i) * 6); | |
| } | |
| return $code; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment