* Based on: http://www.faqs.org/rfcs/rfc2781.html *
```	 *   Encoding UTF-16:
*
*   Encoding of a single character from an ISO 10646 character value to
*    UTF-16 proceeds as follows. Let U be the character number, no greater
*    than 0x10FFFF.
*
*    1) If U < 0x10000, encode U as a 16-bit unsigned integer and
*       terminate.
*
*    2) Let U' = U - 0x10000. Because U is less than or equal to 0x10FFFF,
*       U' must be less than or equal to 0xFFFFF. That is, U' can be
*       represented in 20 bits.
*
*    3) Initialize two 16-bit unsigned integers, W1 and W2, to 0xD800 and
*       0xDC00, respectively. These integers each have 10 bits free to
*       encode the character value, for a total of 20 bits.
*
*    4) Assign the 10 high-order bits of the 20-bit U' to the 10 low-order
*       bits of W1 and the 10 low-order bits of U' to the 10 low-order
*       bits of W2. Terminate.
*
*    Graphically, steps 2 through 4 look like:
*    U' = yyyyyyyyyyxxxxxxxxxx
*    W1 = 110110yyyyyyyyyy
*    W2 = 110111xxxxxxxxxx
* ```
* @param \$unicode (array) array containing UTF-8 unicode values * @param \$setbom (boolean) if true set the Byte Order Mark (BOM = 0xFEFF) * @return string * @protected * @author Nicola Asuni * @since 2.1.000 (2008-01-08) * @public static */ public static function arrUTF8ToUTF16BE(\$unicode, \$setbom=false) { \$outstr = ''; // string to be returned if (\$setbom) { \$outstr .= "\xFE\xFF"; // Byte Order Mark (BOM) } foreach (\$unicode as \$char) { if (\$char == 0x200b) { // skip Unicode Character 'ZERO WIDTH SPACE' (DEC:8203, U+200B) } elseif (\$char == 0xFFFD) { \$outstr .= "\xFF\xFD"; // replacement character } elseif (\$char < 0x10000) { \$outstr .= chr(\$char >> 0x08); \$outstr .= chr(\$char & 0xFF); } else { \$char -= 0x10000; \$w1 = 0xD800 | (\$char >> 0x0a); \$w2 = 0xDC00 | (\$char & 0x3FF); \$outstr .= chr(\$w1 >> 0x08); \$outstr .= chr(\$w1 & 0xFF); \$outstr .= chr(\$w2 >> 0x08); \$outstr .= chr(\$w2 & 0xFF); } } return \$outstr; } /** * Convert an array of UTF8 values to array of unicode characters * @param \$ta (array) The input array of UTF8 values. * @param \$isunicode (boolean) True for Unicode mode, false otherwise. * @return Return array of unicode characters * @since 4.5.037 (2009-04-07) * @public static */ public static function UTF8ArrayToUniArray(\$ta, \$isunicode=true) { if (\$isunicode) { return array_map(array('TCPDF_FONTS', 'unichrUnicode'), \$ta); } return array_map(array('TCPDF_FONTS', 'unichrASCII'), \$ta); } /** * Extract a slice of the \$strarr array and return it as string. * @param \$strarr (string) The input array of characters. * @param \$start (int) the starting element of \$strarr. * @param \$end (int) first element that will not be returned. * @param \$unicode (boolean) True if we are in unicode mode, false otherwise. * @return Return part of a string * @public static */ public static function UTF8ArrSubString(\$strarr, \$start='', \$end='', \$unicode=true) { if (strlen(\$start) == 0) { \$start = 0; } if (strlen(\$end) == 0) { \$end = count(\$strarr); } \$string = ''; for (\$i = \$start; \$i < \$end; ++\$i) { \$string .= self::unichr(\$strarr[\$i], \$unicode); } return \$string; } /** * Extract a slice of the \$uniarr array and return it as string. * @param \$uniarr (string) The input array of characters. * @param \$start (int) the starting element of \$strarr. * @param \$end (int) first element that will not be returned. * @return Return part of a string * @since 4.5.037 (2009-04-07) * @public static */ public static function UniArrSubString(\$uniarr, \$start='', \$end='') { if (strlen(\$start) == 0) { \$start = 0; } if (strlen(\$end) == 0) { \$end = count(\$uniarr); } \$string = ''; for (\$i=\$start; \$i < \$end; ++\$i) { \$string .= \$uniarr[\$i]; } return \$string; } /** * Converts UTF-8 characters array to array of Latin1 characters array
* @param \$unicode (array) array containing UTF-8 unicode values * @return array * @author Nicola Asuni * @since 4.8.023 (2010-01-15) * @public static */ public static function UTF8ArrToLatin1Arr(\$unicode) { \$outarr = array(); // array to be returned foreach (\$unicode as \$char) { if (\$char < 256) { \$outarr[] = \$char; } elseif (array_key_exists(\$char, TCPDF_FONT_DATA::\$uni_utf8tolatin)) { // map from UTF-8 \$outarr[] = TCPDF_FONT_DATA::\$uni_utf8tolatin[\$char]; } elseif (\$char == 0xFFFD) { // skip } else { \$outarr[] = 63; // '?' character } } return \$outarr; } /** * Converts UTF-8 characters array to array of Latin1 string
* @param \$unicode (array) array containing UTF-8 unicode values * @return array * @author Nicola Asuni * @since 4.8.023 (2010-01-15) * @public static */ public static function UTF8ArrToLatin1(\$unicode) { \$outstr = ''; // string to be returned foreach (\$unicode as \$char) { if (\$char < 256) { \$outstr .= chr(\$char); } elseif (array_key_exists(\$char, TCPDF_FONT_DATA::\$uni_utf8tolatin)) { // map from UTF-8 \$outstr .= chr(TCPDF_FONT_DATA::\$uni_utf8tolatin[\$char]); } elseif (\$char == 0xFFFD) { // skip } else { \$outstr .= '?'; } } return \$outstr; } /** * Converts UTF-8 character to integer value.
* Uses the getUniord() method if the value is not cached. * @param \$uch (string) character string to process. * @return integer Unicode value * @public static */ public static function uniord(\$uch) { if (!isset(self::\$cache_uniord[\$uch])) { self::\$cache_uniord[\$uch] = self::getUniord(\$uch); } return self::\$cache_uniord[\$uch]; } /** * Converts UTF-8 character to integer value.
* Invalid byte sequences will be replaced with 0xFFFD (replacement character)
* Based on: http://www.faqs.org/rfcs/rfc3629.html *
```	 *    Char. number range  |        UTF-8 octet sequence
*    --------------------+-----------------------------------------------
*    0000 0000-0000 007F | 0xxxxxxx
*    0000 0080-0000 07FF | 110xxxxx 10xxxxxx
*    0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
*    0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
*    ---------------------------------------------------------------------
*
*   ABFN notation:
*   ---------------------------------------------------------------------
*   UTF8-octets = *( UTF8-char )
*   UTF8-char   = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
*   UTF8-1      = %x00-7F
*   UTF8-2      = %xC2-DF UTF8-tail
*
*   UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
*                 %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
*   UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
*                 %xF4 %x80-8F 2( UTF8-tail )
*   UTF8-tail   = %x80-BF
*   ---------------------------------------------------------------------
* ```
* @param \$uch (string) character string to process. * @return integer Unicode value * @author Nicola Asuni * @public static */ public static function getUniord(\$uch) { if (function_exists('mb_convert_encoding')) { list(, \$char) = @unpack('N', mb_convert_encoding(\$uch, 'UCS-4BE', 'UTF-8')); if (\$char >= 0) { return \$char; } } \$bytes = array(); // array containing single character byte sequences \$countbytes = 0; \$numbytes = 1; // number of octetc needed to represent the UTF-8 character \$length = strlen(\$uch); for (\$i = 0; \$i < \$length; ++\$i) { \$char = ord(\$uch[\$i]); // get one string character at time if (\$countbytes == 0) { // get starting octect if (\$char <= 0x7F) { return \$char; // use the character "as is" because is ASCII } elseif ((\$char >> 0x05) == 0x06) { // 2 bytes character (0x06 = 110 BIN) \$bytes[] = (\$char - 0xC0) << 0x06; ++\$countbytes; \$numbytes = 2; } elseif ((\$char >> 0x04) == 0x0E) { // 3 bytes character (0x0E = 1110 BIN) \$bytes[] = (\$char - 0xE0) << 0x0C; ++\$countbytes; \$numbytes = 3; } elseif ((\$char >> 0x03) == 0x1E) { // 4 bytes character (0x1E = 11110 BIN) \$bytes[] = (\$char - 0xF0) << 0x12; ++\$countbytes; \$numbytes = 4; } else { // use replacement character for other invalid sequences return 0xFFFD; } } elseif ((\$char >> 0x06) == 0x02) { // bytes 2, 3 and 4 must start with 0x02 = 10 BIN \$bytes[] = \$char - 0x80; ++\$countbytes; if (\$countbytes == \$numbytes) { // compose UTF-8 bytes to a single unicode value \$char = \$bytes[0]; for (\$j = 1; \$j < \$numbytes; ++\$j) { \$char += (\$bytes[\$j] << ((\$numbytes - \$j - 1) * 0x06)); } if (((\$char >= 0xD800) AND (\$char <= 0xDFFF)) OR (\$char >= 0x10FFFF)) { // The definition of UTF-8 prohibits encoding character numbers between // U+D800 and U+DFFF, which are reserved for use with the UTF-16 // encoding form (as surrogate pairs) and do not directly represent // characters. return 0xFFFD; // use replacement character } else { return \$char; } } } else { // use replacement character for other invalid sequences return 0xFFFD; } } return 0xFFFD; } /** * Converts UTF-8 strings to codepoints array.
* Invalid byte sequences will be replaced with 0xFFFD (replacement character)
* @param \$str (string) string to process. * @param \$isunicode (boolean) True when the documetn is in Unicode mode, false otherwise. * @param \$currentfont (array) Reference to current font array. * @return array containing codepoints (UTF-8 characters values) * @author Nicola Asuni * @public static */ public static function UTF8StringToArray(\$str, \$isunicode=true, &\$currentfont) { if (\$isunicode) { // requires PCRE unicode support turned on \$chars = TCPDF_STATIC::pregSplit('//','u', \$str, -1, PREG_SPLIT_NO_EMPTY); \$carr = array_map(array('TCPDF_FONTS', 'uniord'), \$chars); } else { \$chars = str_split(\$str); \$carr = array_map('ord', \$chars); } if (is_array(\$currentfont['subsetchars']) && is_array(\$carr)) { \$currentfont['subsetchars'] += array_fill_keys(\$carr, true); } else { \$currentfont['subsetchars'] = array_merge(\$currentfont['subsetchars'], \$carr); } return \$carr; } /** * Converts UTF-8 strings to Latin1 when using the standard 14 core fonts.
* @param \$str (string) string to process. * @param \$isunicode (boolean) True when the documetn is in Unicode mode, false otherwise. * @param \$currentfont (array) Reference to current font array. * @return string * @since 3.2.000 (2008-06-23) * @public static */ public static function UTF8ToLatin1(\$str, \$isunicode=true, &\$currentfont) { \$unicode = self::UTF8StringToArray(\$str, \$isunicode, \$currentfont); // array containing UTF-8 unicode values return self::UTF8ArrToLatin1(\$unicode); } /** * Converts UTF-8 strings to UTF16-BE.
* @param \$str (string) string to process. * @param \$setbom (boolean) if true set the Byte Order Mark (BOM = 0xFEFF) * @param \$isunicode (boolean) True when the documetn is in Unicode mode, false otherwise. * @param \$currentfont (array) Reference to current font array. * @return string * @author Nicola Asuni * @since 1.53.0.TC005 (2005-01-05) * @public static */ public static function UTF8ToUTF16BE(\$str, \$setbom=false, \$isunicode=true, &\$currentfont) { if (!\$isunicode) { return \$str; // string is not in unicode } \$unicode = self::UTF8StringToArray(\$str, \$isunicode, \$currentfont); // array containing UTF-8 unicode values return self::arrUTF8ToUTF16BE(\$unicode, \$setbom); } /** * Reverse the RLT substrings using the Bidirectional Algorithm (http://unicode.org/reports/tr9/). * @param \$str (string) string to manipulate. * @param \$setbom (bool) if true set the Byte Order Mark (BOM = 0xFEFF) * @param \$forcertl (bool) if true forces RTL text direction * @param \$isunicode (boolean) True if the document is in Unicode mode, false otherwise. * @param \$currentfont (array) Reference to current font array. * @return string * @author Nicola Asuni * @since 2.1.000 (2008-01-08) * @public static */ public static function utf8StrRev(\$str, \$setbom=false, \$forcertl=false, \$isunicode=true, &\$currentfont) { return self::utf8StrArrRev(self::UTF8StringToArray(\$str, \$isunicode, \$currentfont), \$str, \$setbom, \$forcertl, \$isunicode, \$currentfont); } /** * Reverse the RLT substrings array using the Bidirectional Algorithm (http://unicode.org/reports/tr9/). * @param \$arr (array) array of unicode values. * @param \$str (string) string to manipulate (or empty value). * @param \$setbom (bool) if true set the Byte Order Mark (BOM = 0xFEFF) * @param \$forcertl (bool) if true forces RTL text direction * @param \$isunicode (boolean) True if the document is in Unicode mode, false otherwise. * @param \$currentfont (array) Reference to current font array. * @return string * @author Nicola Asuni * @since 4.9.000 (2010-03-27) * @public static */ public static function utf8StrArrRev(\$arr, \$str='', \$setbom=false, \$forcertl=false, \$isunicode=true, &\$currentfont) { return self::arrUTF8ToUTF16BE(self::utf8Bidi(\$arr, \$str, \$forcertl, \$isunicode, \$currentfont), \$setbom); } /** * Reverse the RLT substrings using the Bidirectional Algorithm (http://unicode.org/reports/tr9/). * @param \$ta (array) array of characters composing the string. * @param \$str (string) string to process * @param \$forcertl (bool) if 'R' forces RTL, if 'L' forces LTR * @param \$isunicode (boolean) True if the document is in Unicode mode, false otherwise. * @param \$currentfont (array) Reference to current font array. * @return array of unicode chars * @author Nicola Asuni * @since 2.4.000 (2008-03-06) * @public static */ public static function utf8Bidi(\$ta, \$str='', \$forcertl=false, \$isunicode=true, &\$currentfont) { // paragraph embedding level \$pel = 0; // max level \$maxlevel = 0; if (TCPDF_STATIC::empty_string(\$str)) { // create string from array \$str = self::UTF8ArrSubString(\$ta, '', '', \$isunicode); } // check if string contains arabic text if (preg_match(TCPDF_FONT_DATA::\$uni_RE_PATTERN_ARABIC, \$str)) { \$arabic = true; } else { \$arabic = false; } // check if string contains RTL text if (!(\$forcertl OR \$arabic OR preg_match(TCPDF_FONT_DATA::\$uni_RE_PATTERN_RTL, \$str))) { return \$ta; } // get number of chars \$numchars = count(\$ta); if (\$forcertl == 'R') { \$pel = 1; } elseif (\$forcertl == 'L') { \$pel = 0; } else { // P2. In each paragraph, find the first character of type L, AL, or R. // P3. If a character is found in P2 and it is of type AL or R, then set the paragraph embedding level to one; otherwise, set it to zero. for (\$i=0; \$i < \$numchars; ++\$i) { \$type = TCPDF_FONT_DATA::\$uni_type[\$ta[\$i]]; if (\$type == 'L') { \$pel = 0; break; } elseif ((\$type == 'AL') OR (\$type == 'R')) { \$pel = 1; break; } } } // Current Embedding Level \$cel = \$pel; // directional override status \$dos = 'N'; \$remember = array(); // start-of-level-run \$sor = \$pel % 2 ? 'R' : 'L'; \$eor = \$sor; // Array of characters data \$chardata = Array(); // X1. Begin by setting the current embedding level to the paragraph embedding level. Set the directional override status to neutral. Process each character iteratively, applying rules X2 through X9. Only embedding levels from 0 to 61 are valid in this phase. // In the resolution of levels in rules I1 and I2, the maximum embedding level of 62 can be reached. for (\$i=0; \$i < \$numchars; ++\$i) { if (\$ta[\$i] == TCPDF_FONT_DATA::\$uni_RLE) { // X2. With each RLE, compute the least greater odd embedding level. // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to neutral. // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status. \$next_level = \$cel + (\$cel % 2) + 1; if (\$next_level < 62) { \$remember[] = array('num' => TCPDF_FONT_DATA::\$uni_RLE, 'cel' => \$cel, 'dos' => \$dos); \$cel = \$next_level; \$dos = 'N'; \$sor = \$eor; \$eor = \$cel % 2 ? 'R' : 'L'; } } elseif (\$ta[\$i] == TCPDF_FONT_DATA::\$uni_LRE) { // X3. With each LRE, compute the least greater even embedding level. // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to neutral. // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status. \$next_level = \$cel + 2 - (\$cel % 2); if ( \$next_level < 62 ) { \$remember[] = array('num' => TCPDF_FONT_DATA::\$uni_LRE, 'cel' => \$cel, 'dos' => \$dos); \$cel = \$next_level; \$dos = 'N'; \$sor = \$eor; \$eor = \$cel % 2 ? 'R' : 'L'; } } elseif (\$ta[\$i] == TCPDF_FONT_DATA::\$uni_RLO) { // X4. With each RLO, compute the least greater odd embedding level. // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to right-to-left. // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status. \$next_level = \$cel + (\$cel % 2) + 1; if (\$next_level < 62) { \$remember[] = array('num' => TCPDF_FONT_DATA::\$uni_RLO, 'cel' => \$cel, 'dos' => \$dos); \$cel = \$next_level; \$dos = 'R'; \$sor = \$eor; \$eor = \$cel % 2 ? 'R' : 'L'; } } elseif (\$ta[\$i] == TCPDF_FONT_DATA::\$uni_LRO) { // X5. With each LRO, compute the least greater even embedding level. // a. If this new level would be valid, then this embedding code is valid. Remember (push) the current embedding level and override status. Reset the current level to this new level, and reset the override status to left-to-right. // b. If the new level would not be valid, then this code is invalid. Do not change the current level or override status. \$next_level = \$cel + 2 - (\$cel % 2); if ( \$next_level < 62 ) { \$remember[] = array('num' => TCPDF_FONT_DATA::\$uni_LRO, 'cel' => \$cel, 'dos' => \$dos); \$cel = \$next_level; \$dos = 'L'; \$sor = \$eor; \$eor = \$cel % 2 ? 'R' : 'L'; } } elseif (\$ta[\$i] == TCPDF_FONT_DATA::\$uni_PDF) { // X7. With each PDF, determine the matching embedding or override code. If there was a valid matching code, restore (pop) the last remembered (pushed) embedding level and directional override. if (count(\$remember)) { \$last = count(\$remember ) - 1; if ((\$remember[\$last]['num'] == TCPDF_FONT_DATA::\$uni_RLE) OR (\$remember[\$last]['num'] == TCPDF_FONT_DATA::\$uni_LRE) OR (\$remember[\$last]['num'] == TCPDF_FONT_DATA::\$uni_RLO) OR (\$remember[\$last]['num'] == TCPDF_FONT_DATA::\$uni_LRO)) { \$match = array_pop(\$remember); \$cel = \$match['cel']; \$dos = \$match['dos']; \$sor = \$eor; \$eor = (\$cel > \$match['cel'] ? \$cel : \$match['cel']) % 2 ? 'R' : 'L'; } } } elseif ((\$ta[\$i] != TCPDF_FONT_DATA::\$uni_RLE) AND (\$ta[\$i] != TCPDF_FONT_DATA::\$uni_LRE) AND (\$ta[\$i] != TCPDF_FONT_DATA::\$uni_RLO) AND (\$ta[\$i] != TCPDF_FONT_DATA::\$uni_LRO) AND (\$ta[\$i] != TCPDF_FONT_DATA::\$uni_PDF)) { // X6. For all types besides RLE, LRE, RLO, LRO, and PDF: // a. Set the level of the current character to the current embedding level. // b. Whenever the directional override status is not neutral, reset the current character type to the directional override status. if (\$dos != 'N') { \$chardir = \$dos; } else { if (isset(TCPDF_FONT_DATA::\$uni_type[\$ta[\$i]])) { \$chardir = TCPDF_FONT_DATA::\$uni_type[\$ta[\$i]]; } else { \$chardir = 'L'; } } // stores string characters and other information \$chardata[] = array('char' => \$ta[\$i], 'level' => \$cel, 'type' => \$chardir, 'sor' => \$sor, 'eor' => \$eor); } } // end for each char // X8. All explicit directional embeddings and overrides are completely terminated at the end of each paragraph. Paragraph separators are not included in the embedding. // X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes. // X10. The remaining rules are applied to each run of characters at the same level. For each run, determine the start-of-level-run (sor) and end-of-level-run (eor) type, either L or R. This depends on the higher of the two levels on either side of the boundary (at the start or end of the paragraph, the level of the 'other' run is the base embedding level). If the higher level is odd, the type is R; otherwise, it is L. // 3.3.3 Resolving Weak Types // Weak types are now resolved one level run at a time. At level run boundaries where the type of the character on the other side of the boundary is required, the type assigned to sor or eor is used. // Nonspacing marks are now resolved based on the previous characters. \$numchars = count(\$chardata); // W1. Examine each nonspacing mark (NSM) in the level run, and change the type of the NSM to the type of the previous character. If the NSM is at the start of the level run, it will get the type of sor. \$prevlevel = -1; // track level changes \$levcount = 0; // counts consecutive chars at the same level for (\$i=0; \$i < \$numchars; ++\$i) { if (\$chardata[\$i]['type'] == 'NSM') { if (\$levcount) { \$chardata[\$i]['type'] = \$chardata[\$i]['sor']; } elseif (\$i > 0) { \$chardata[\$i]['type'] = \$chardata[(\$i-1)]['type']; } } if (\$chardata[\$i]['level'] != \$prevlevel) { \$levcount = 0; } else { ++\$levcount; } \$prevlevel = \$chardata[\$i]['level']; } // W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sor) is found. If an AL is found, change the type of the European number to Arabic number. \$prevlevel = -1; \$levcount = 0; for (\$i=0; \$i < \$numchars; ++\$i) { if (\$chardata[\$i]['char'] == 'EN') { for (\$j=\$levcount; \$j >= 0; \$j--) { if (\$chardata[\$j]['type'] == 'AL') { \$chardata[\$i]['type'] = 'AN'; } elseif ((\$chardata[\$j]['type'] == 'L') OR (\$chardata[\$j]['type'] == 'R')) { break; } } } if (\$chardata[\$i]['level'] != \$prevlevel) { \$levcount = 0; } else { ++\$levcount; } \$prevlevel = \$chardata[\$i]['level']; } // W3. Change all ALs to R. for (\$i=0; \$i < \$numchars; ++\$i) { if (\$chardata[\$i]['type'] == 'AL') { \$chardata[\$i]['type'] = 'R'; } } // W4. A single European separator between two European numbers changes to a European number. A single common separator between two numbers of the same type changes to that type. \$prevlevel = -1; \$levcount = 0; for (\$i=0; \$i < \$numchars; ++\$i) { if ((\$levcount > 0) AND ((\$i+1) < \$numchars) AND (\$chardata[(\$i+1)]['level'] == \$prevlevel)) { if ((\$chardata[\$i]['type'] == 'ES') AND (\$chardata[(\$i-1)]['type'] == 'EN') AND (\$chardata[(\$i+1)]['type'] == 'EN')) { \$chardata[\$i]['type'] = 'EN'; } elseif ((\$chardata[\$i]['type'] == 'CS') AND (\$chardata[(\$i-1)]['type'] == 'EN') AND (\$chardata[(\$i+1)]['type'] == 'EN')) { \$chardata[\$i]['type'] = 'EN'; } elseif ((\$chardata[\$i]['type'] == 'CS') AND (\$chardata[(\$i-1)]['type'] == 'AN') AND (\$chardata[(\$i+1)]['type'] == 'AN')) { \$chardata[\$i]['type'] = 'AN'; } } if (\$chardata[\$i]['level'] != \$prevlevel) { \$levcount = 0; } else { ++\$levcount; } \$prevlevel = \$chardata[\$i]['level']; } // W5. A sequence of European terminators adjacent to European numbers changes to all European numbers. \$prevlevel = -1; \$levcount = 0; for (\$i=0; \$i < \$numchars; ++\$i) { if (\$chardata[\$i]['type'] == 'ET') { if ((\$levcount > 0) AND (\$chardata[(\$i-1)]['type'] == 'EN')) { \$chardata[\$i]['type'] = 'EN'; } else { \$j = \$i+1; while ((\$j < \$numchars) AND (\$chardata[\$j]['level'] == \$prevlevel)) { if (\$chardata[\$j]['type'] == 'EN') { \$chardata[\$i]['type'] = 'EN'; break; } elseif (\$chardata[\$j]['type'] != 'ET') { break; } ++\$j; } } } if (\$chardata[\$i]['level'] != \$prevlevel) { \$levcount = 0; } else { ++\$levcount; } \$prevlevel = \$chardata[\$i]['level']; } // W6. Otherwise, separators and terminators change to Other Neutral. \$prevlevel = -1; \$levcount = 0; for (\$i=0; \$i < \$numchars; ++\$i) { if ((\$chardata[\$i]['type'] == 'ET') OR (\$chardata[\$i]['type'] == 'ES') OR (\$chardata[\$i]['type'] == 'CS')) { \$chardata[\$i]['type'] = 'ON'; } if (\$chardata[\$i]['level'] != \$prevlevel) { \$levcount = 0; } else { ++\$levcount; } \$prevlevel = \$chardata[\$i]['level']; } //W7. Search backward from each instance of a European number until the first strong type (R, L, or sor) is found. If an L is found, then change the type of the European number to L. \$prevlevel = -1; \$levcount = 0; for (\$i=0; \$i < \$numchars; ++\$i) { if (\$chardata[\$i]['char'] == 'EN') { for (\$j=\$levcount; \$j >= 0; \$j--) { if (\$chardata[\$j]['type'] == 'L') { \$chardata[\$i]['type'] = 'L'; } elseif (\$chardata[\$j]['type'] == 'R') { break; } } } if (\$chardata[\$i]['level'] != \$prevlevel) { \$levcount = 0; } else { ++\$levcount; } \$prevlevel = \$chardata[\$i]['level']; } // N1. A sequence of neutrals takes the direction of the surrounding strong text if the text on both sides has the same direction. European and Arabic numbers act as if they were R in terms of their influence on neutrals. Start-of-level-run (sor) and end-of-level-run (eor) are used at level run boundaries. \$prevlevel = -1; \$levcount = 0; for (\$i=0; \$i < \$numchars; ++\$i) { if ((\$levcount > 0) AND ((\$i+1) < \$numchars) AND (\$chardata[(\$i+1)]['level'] == \$prevlevel)) { if ((\$chardata[\$i]['type'] == 'N') AND (\$chardata[(\$i-1)]['type'] == 'L') AND (\$chardata[(\$i+1)]['type'] == 'L')) { \$chardata[\$i]['type'] = 'L'; } elseif ((\$chardata[\$i]['type'] == 'N') AND ((\$chardata[(\$i-1)]['type'] == 'R') OR (\$chardata[(\$i-1)]['type'] == 'EN') OR (\$chardata[(\$i-1)]['type'] == 'AN')) AND ((\$chardata[(\$i+1)]['type'] == 'R') OR (\$chardata[(\$i+1)]['type'] == 'EN') OR (\$chardata[(\$i+1)]['type'] == 'AN'))) { \$chardata[\$i]['type'] = 'R'; } elseif (\$chardata[\$i]['type'] == 'N') { // N2. Any remaining neutrals take the embedding direction \$chardata[\$i]['type'] = \$chardata[\$i]['sor']; } } elseif ((\$levcount == 0) AND ((\$i+1) < \$numchars) AND (\$chardata[(\$i+1)]['level'] == \$prevlevel)) { // first char if ((\$chardata[\$i]['type'] == 'N') AND (\$chardata[\$i]['sor'] == 'L') AND (\$chardata[(\$i+1)]['type'] == 'L')) { \$chardata[\$i]['type'] = 'L'; } elseif ((\$chardata[\$i]['type'] == 'N') AND ((\$chardata[\$i]['sor'] == 'R') OR (\$chardata[\$i]['sor'] == 'EN') OR (\$chardata[\$i]['sor'] == 'AN')) AND ((\$chardata[(\$i+1)]['type'] == 'R') OR (\$chardata[(\$i+1)]['type'] == 'EN') OR (\$chardata[(\$i+1)]['type'] == 'AN'))) { \$chardata[\$i]['type'] = 'R'; } elseif (\$chardata[\$i]['type'] == 'N') { // N2. Any remaining neutrals take the embedding direction \$chardata[\$i]['type'] = \$chardata[\$i]['sor']; } } elseif ((\$levcount > 0) AND (((\$i+1) == \$numchars) OR ((\$i+1) < \$numchars) AND (\$chardata[(\$i+1)]['level'] != \$prevlevel))) { //last char if ((\$chardata[\$i]['type'] == 'N') AND (\$chardata[(\$i-1)]['type'] == 'L') AND (\$chardata[\$i]['eor'] == 'L')) { \$chardata[\$i]['type'] = 'L'; } elseif ((\$chardata[\$i]['type'] == 'N') AND ((\$chardata[(\$i-1)]['type'] == 'R') OR (\$chardata[(\$i-1)]['type'] == 'EN') OR (\$chardata[(\$i-1)]['type'] == 'AN')) AND ((\$chardata[\$i]['eor'] == 'R') OR (\$chardata[\$i]['eor'] == 'EN') OR (\$chardata[\$i]['eor'] == 'AN'))) { \$chardata[\$i]['type'] = 'R'; } elseif (\$chardata[\$i]['type'] == 'N') { // N2. Any remaining neutrals take the embedding direction \$chardata[\$i]['type'] = \$chardata[\$i]['sor']; } } elseif (\$chardata[\$i]['type'] == 'N') { // N2. Any remaining neutrals take the embedding direction \$chardata[\$i]['type'] = \$chardata[\$i]['sor']; } if (\$chardata[\$i]['level'] != \$prevlevel) { \$levcount = 0; } else { ++\$levcount; } \$prevlevel = \$chardata[\$i]['level']; } // I1. For all characters with an even (left-to-right) embedding direction, those of type R go up one level and those of type AN or EN go up two levels. // I2. For all characters with an odd (right-to-left) embedding direction, those of type L, EN or AN go up one level. for (\$i=0; \$i < \$numchars; ++\$i) { \$odd = \$chardata[\$i]['level'] % 2; if (\$odd) { if ((\$chardata[\$i]['type'] == 'L') OR (\$chardata[\$i]['type'] == 'AN') OR (\$chardata[\$i]['type'] == 'EN')) { \$chardata[\$i]['level'] += 1; } } else { if (\$chardata[\$i]['type'] == 'R') { \$chardata[\$i]['level'] += 1; } elseif ((\$chardata[\$i]['type'] == 'AN') OR (\$chardata[\$i]['type'] == 'EN')) { \$chardata[\$i]['level'] += 2; } } \$maxlevel = max(\$chardata[\$i]['level'],\$maxlevel); } // L1. On each line, reset the embedding level of the following characters to the paragraph embedding level: // 1. Segment separators, // 2. Paragraph separators, // 3. Any sequence of whitespace characters preceding a segment separator or paragraph separator, and // 4. Any sequence of white space characters at the end of the line. for (\$i=0; \$i < \$numchars; ++\$i) { if ((\$chardata[\$i]['type'] == 'B') OR (\$chardata[\$i]['type'] == 'S')) { \$chardata[\$i]['level'] = \$pel; } elseif (\$chardata[\$i]['type'] == 'WS') { \$j = \$i+1; while (\$j < \$numchars) { if (((\$chardata[\$j]['type'] == 'B') OR (\$chardata[\$j]['type'] == 'S')) OR ((\$j == (\$numchars-1)) AND (\$chardata[\$j]['type'] == 'WS'))) { \$chardata[\$i]['level'] = \$pel; break; } elseif (\$chardata[\$j]['type'] != 'WS') { break; } ++\$j; } } } // Arabic Shaping // Cursively connected scripts, such as Arabic or Syriac, require the selection of positional character shapes that depend on adjacent characters. Shaping is logically applied after the Bidirectional Algorithm is used and is limited to characters within the same directional run. if (\$arabic) { \$endedletter = array(1569,1570,1571,1572,1573,1575,1577,1583,1584,1585,1586,1608,1688); \$alfletter = array(1570,1571,1573,1575); \$chardata2 = \$chardata; \$laaletter = false; \$charAL = array(); \$x = 0; for (\$i=0; \$i < \$numchars; ++\$i) { if ((TCPDF_FONT_DATA::\$uni_type[\$chardata[\$i]['char']] == 'AL') OR (\$chardata[\$i]['char'] == 32) OR (\$chardata[\$i]['char'] == 8204)) { \$charAL[\$x] = \$chardata[\$i]; \$charAL[\$x]['i'] = \$i; \$chardata[\$i]['x'] = \$x; ++\$x; } } \$numAL = \$x; for (\$i=0; \$i < \$numchars; ++\$i) { \$thischar = \$chardata[\$i]; if (\$i > 0) { \$prevchar = \$chardata[(\$i-1)]; } else { \$prevchar = false; } if ((\$i+1) < \$numchars) { \$nextchar = \$chardata[(\$i+1)]; } else { \$nextchar = false; } if (TCPDF_FONT_DATA::\$uni_type[\$thischar['char']] == 'AL') { \$x = \$thischar['x']; if (\$x > 0) { \$prevchar = \$charAL[(\$x-1)]; } else { \$prevchar = false; } if ((\$x+1) < \$numAL) { \$nextchar = \$charAL[(\$x+1)]; } else { \$nextchar = false; } // if laa letter if ((\$prevchar !== false) AND (\$prevchar['char'] == 1604) AND (in_array(\$thischar['char'], \$alfletter))) { \$arabicarr = TCPDF_FONT_DATA::\$uni_laa_array; \$laaletter = true; if (\$x > 1) { \$prevchar = \$charAL[(\$x-2)]; } else { \$prevchar = false; } } else { \$arabicarr = TCPDF_FONT_DATA::\$uni_arabicsubst; \$laaletter = false; } if ((\$prevchar !== false) AND (\$nextchar !== false) AND ((TCPDF_FONT_DATA::\$uni_type[\$prevchar['char']] == 'AL') OR (TCPDF_FONT_DATA::\$uni_type[\$prevchar['char']] == 'NSM')) AND ((TCPDF_FONT_DATA::\$uni_type[\$nextchar['char']] == 'AL') OR (TCPDF_FONT_DATA::\$uni_type[\$nextchar['char']] == 'NSM')) AND (\$prevchar['type'] == \$thischar['type']) AND (\$nextchar['type'] == \$thischar['type']) AND (\$nextchar['char'] != 1567)) { if (in_array(\$prevchar['char'], \$endedletter)) { if (isset(\$arabicarr[\$thischar['char']][2])) { // initial \$chardata2[\$i]['char'] = \$arabicarr[\$thischar['char']][2]; } } else { if (isset(\$arabicarr[\$thischar['char']][3])) { // medial \$chardata2[\$i]['char'] = \$arabicarr[\$thischar['char']][3]; } } } elseif ((\$nextchar !== false) AND ((TCPDF_FONT_DATA::\$uni_type[\$nextchar['char']] == 'AL') OR (TCPDF_FONT_DATA::\$uni_type[\$nextchar['char']] == 'NSM')) AND (\$nextchar['type'] == \$thischar['type']) AND (\$nextchar['char'] != 1567)) { if (isset(\$arabicarr[\$chardata[\$i]['char']][2])) { // initial \$chardata2[\$i]['char'] = \$arabicarr[\$thischar['char']][2]; } } elseif (((\$prevchar !== false) AND ((TCPDF_FONT_DATA::\$uni_type[\$prevchar['char']] == 'AL') OR (TCPDF_FONT_DATA::\$uni_type[\$prevchar['char']] == 'NSM')) AND (\$prevchar['type'] == \$thischar['type'])) OR ((\$nextchar !== false) AND (\$nextchar['char'] == 1567))) { // final if ((\$i > 1) AND (\$thischar['char'] == 1607) AND (\$chardata[\$i-1]['char'] == 1604) AND (\$chardata[\$i-2]['char'] == 1604)) { //Allah Word // mark characters to delete with false \$chardata2[\$i-2]['char'] = false; \$chardata2[\$i-1]['char'] = false; \$chardata2[\$i]['char'] = 65010; } else { if ((\$prevchar !== false) AND in_array(\$prevchar['char'], \$endedletter)) { if (isset(\$arabicarr[\$thischar['char']][0])) { // isolated \$chardata2[\$i]['char'] = \$arabicarr[\$thischar['char']][0]; } } else { if (isset(\$arabicarr[\$thischar['char']][1])) { // final \$chardata2[\$i]['char'] = \$arabicarr[\$thischar['char']][1]; } } } } elseif (isset(\$arabicarr[\$thischar['char']][0])) { // isolated \$chardata2[\$i]['char'] = \$arabicarr[\$thischar['char']][0]; } // if laa letter if (\$laaletter) { // mark characters to delete with false \$chardata2[(\$charAL[(\$x-1)]['i'])]['char'] = false; } } // end if AL (Arabic Letter) } // end for each char /* * Combining characters that can occur with Arabic Shadda (0651 HEX, 1617 DEC) are replaced. * Putting the combining mark and shadda in the same glyph allows us to avoid the two marks overlapping each other in an illegible manner. */ for (\$i = 0; \$i < (\$numchars-1); ++\$i) { if ((\$chardata2[\$i]['char'] == 1617) AND (isset(TCPDF_FONT_DATA::\$uni_diacritics[(\$chardata2[\$i+1]['char'])]))) { // check if the subtitution font is defined on current font if (isset(\$currentfont['cw'][(TCPDF_FONT_DATA::\$uni_diacritics[(\$chardata2[\$i+1]['char'])])])) { \$chardata2[\$i]['char'] = false; \$chardata2[\$i+1]['char'] = TCPDF_FONT_DATA::\$uni_diacritics[(\$chardata2[\$i+1]['char'])]; } } } // remove marked characters foreach (\$chardata2 as \$key => \$value) { if (\$value['char'] === false) { unset(\$chardata2[\$key]); } } \$chardata = array_values(\$chardata2); \$numchars = count(\$chardata); unset(\$chardata2); unset(\$arabicarr); unset(\$laaletter); unset(\$charAL); } // L2. From the highest level found in the text to the lowest odd level on each line, including intermediate levels not actually present in the text, reverse any contiguous sequence of characters that are at that level or higher. for (\$j=\$maxlevel; \$j > 0; \$j--) { \$ordarray = Array(); \$revarr = Array(); \$onlevel = false; for (\$i=0; \$i < \$numchars; ++\$i) { if (\$chardata[\$i]['level'] >= \$j) { \$onlevel = true; if (isset(TCPDF_FONT_DATA::\$uni_mirror[\$chardata[\$i]['char']])) { // L4. A character is depicted by a mirrored glyph if and only if (a) the resolved directionality of that character is R, and (b) the Bidi_Mirrored property value of that character is true. \$chardata[\$i]['char'] = TCPDF_FONT_DATA::\$uni_mirror[\$chardata[\$i]['char']]; } \$revarr[] = \$chardata[\$i]; } else { if (\$onlevel) { \$revarr = array_reverse(\$revarr); \$ordarray = array_merge(\$ordarray, \$revarr); \$revarr = Array(); \$onlevel = false; } \$ordarray[] = \$chardata[\$i]; } } if (\$onlevel) { \$revarr = array_reverse(\$revarr); \$ordarray = array_merge(\$ordarray, \$revarr); } \$chardata = \$ordarray; } \$ordarray = array(); foreach (\$chardata as \$cd) { \$ordarray[] = \$cd['char']; // store char values for subsetting \$currentfont['subsetchars'][\$cd['char']] = true; } return \$ordarray; } } // END OF TCPDF_FONTS CLASS //============================================================+ // END OF FILE //============================================================+