Current Path : /var/www/html/clients/nkpgkx11.e-nk.ru/bitrix/modules/main/classes/general/ |
Current File : /var/www/html/clients/nkpgkx11.e-nk.ru/bitrix/modules/main/classes/general/charset_converter.php |
<? define("PATH2CONVERT_TABLES", $_SERVER["DOCUMENT_ROOT"]."/bitrix/modules/main/cvtables/"); global $BX_CHARSET_TABLE_CACHE; $BX_CHARSET_TABLE_CACHE = Array(); class CharsetConverter { private static $instance; private $arErrors = array(); /** * @static * @return CharsetConverter */ public static function GetInstance() { if (!isset(self::$instance)) { $c = __CLASS__; self::$instance = new $c; } return self::$instance; } public static function ConvertCharset($string, $charset_in, $charset_out, &$errorMessage = "") { $string = strval($string); if(strcasecmp($charset_in, $charset_out) == 0) return $string; $errorMessage = ''; if ($string == '') return ''; if (extension_loaded("mbstring")) { //For UTF-16 we have to detect the order of bytes //Default for mbstring extension is Big endian //Little endian have to pointed explicitly if (strtoupper($charset_in) == "UTF-16") { $ch = substr($string, 0, 1); //If Little endian found - cutoff BOF bytes and point mbstring to this fact explicitly if ($ch == "\xFF" && substr($string, 1, 1) == "\xFE") return mb_convert_encoding(substr($string, 2), $charset_out, "UTF-16LE"); //If it is Big endian, just remove BOF bytes elseif ($ch == "\xFE" && substr($string, 1, 1) == "\xFF") return mb_convert_encoding(substr($string, 2), $charset_out, $charset_in); //Otherwise assime Little endian without BOF else return mb_convert_encoding($string, $charset_out, "UTF-16LE"); } else { $res = mb_convert_encoding($string, $charset_out, $charset_in); if (strlen($res) > 0) return $res; } } if (!defined("BX_ICONV_DISABLE") || BX_ICONV_DISABLE !== true) { $utf_string = false; if (strtoupper($charset_in) == "UTF-16") { $ch = substr($string, 0, 1); if (($ch != "\xFF") || ($ch != "\xFE")) $utf_string = "\xFF\xFE".$string; } if (function_exists('iconv')) { if ($utf_string) $res = iconv($charset_in, $charset_out."//IGNORE", $utf_string); else $res = iconv($charset_in, $charset_out."//IGNORE", $string); if (!$res) $errorMessage .= "Iconv reported failure while converting string to requested character encoding. "; return $res; } elseif (function_exists('libiconv')) { if ($utf_string) $res = libiconv($charset_in, $charset_out, $utf_string); else $res = libiconv($charset_in, $charset_out, $string); if (!$res) $errorMessage .= "Libiconv reported failure while converting string to requested character encoding. "; return $res; } } $cvt = self::GetInstance(); $res = $cvt->Convert($string, $charset_in, $charset_out); if (!$res) { $arErrors = $cvt->GetErrors(); if (count($arErrors) > 0) $errorMessage = implode("\n", $arErrors); } return $res; } protected function HexToUtf($utfCharInHex) { $result = ""; $utfCharInDec = hexdec($utfCharInHex); if ($utfCharInDec < 128) $result .= chr($utfCharInDec); elseif ($utfCharInDec < 2048) $result .= chr(($utfCharInDec >> 6) + 192).chr(($utfCharInDec & 63) + 128); elseif ($utfCharInDec < 65536) $result .= chr(($utfCharInDec >> 12) + 224).chr((($utfCharInDec >> 6) & 63) + 128).chr(($utfCharInDec & 63) + 128); elseif ($utfCharInDec < 2097152) $result .= chr($utfCharInDec >> 18 + 240).chr((($utfCharInDec >> 12) & 63) + 128).chr(($utfCharInDec >> 6) & 63 + 128). chr($utfCharInDec & 63 + 128); return $result; } protected function BuildConvertTable() { global $BX_CHARSET_TABLE_CACHE; for ($i = 0; $i < func_num_args(); $i++) { $fileName = func_get_arg($i); if(isset($BX_CHARSET_TABLE_CACHE[$fileName])) continue; $BX_CHARSET_TABLE_CACHE[$fileName] = Array(); if(!file_exists(PATH2CONVERT_TABLES.$fileName)) { $this->AddError(str_replace("#FILE#", PATH2CONVERT_TABLES.$fileName, "File #FILE# is not found.")); continue; } if (!is_file(PATH2CONVERT_TABLES.$fileName)) { $this->AddError(str_replace("#FILE#", PATH2CONVERT_TABLES.$fileName, "File #FILE# is not a file.")); continue; } if (!($hFile = fopen(PATH2CONVERT_TABLES.$fileName, "r"))) { $this->AddError(str_replace("#FILE#", PATH2CONVERT_TABLES.$fileName, "Can not open file #FILE# for reading.")); continue; } while (!feof($hFile)) { if ($line = trim(fgets($hFile, 1024))) { if (substr($line, 0, 1) != "#") { $hexValue = preg_split("/[\s,]+/", $line, 3); if (substr($hexValue[1], 0, 1) != "#") { $key = strtoupper(str_replace("0x", "", $hexValue[1])); $value = strtoupper(str_replace("0x", "", $hexValue[0])); $BX_CHARSET_TABLE_CACHE[func_get_arg($i)][$key] = $value; } } } } fclose($hFile); } return $BX_CHARSET_TABLE_CACHE; } public function Convert($sourceString, $charsetFrom, $charsetTo) { $this->ClearErrors(); if (strlen($sourceString) <= 0) { $this->AddError("Nothing to convert."); return false; } if (strlen($charsetFrom) <= 0) { $this->AddError("Source charset is not set."); return false; } if (strlen($charsetTo) <= 0) { $this->AddError("Destination charset is not set."); return false; } $charsetFrom = strtolower($charsetFrom); $charsetTo = strtolower($charsetTo); if($charsetFrom == $charsetTo) return $sourceString; $resultString = ""; if($charsetFrom == "ucs-2") { $arConvertTable = $this->BuildConvertTable($charsetTo); for($i = 0; $i < strlen($sourceString); $i+=2) { $hexChar = strtoupper(dechex(ord($sourceString[$i])).dechex(ord($sourceString[$i+1]))); $hexChar = str_pad($hexChar, 4, "0", STR_PAD_LEFT); if($arConvertTable[$charsetTo][$hexChar]) { if($charsetTo != "utf-8") $resultString .= chr(hexdec($arConvertTable[$charsetTo][$hexChar])); else $resultString .= $this->HexToUtf($arConvertTable[$charsetTo][$hexChar]); } } } elseif($charsetFrom == "utf-16") { $arConvertTable = $this->BuildConvertTable($charsetTo); for($i = 0; $i < strlen($sourceString); $i+=2) { $hexChar = sprintf("%02X%02X", ord($sourceString[$i+1]), ord($sourceString[$i])); if($arConvertTable[$charsetTo][$hexChar]) { if($charsetTo != "utf-8") $resultString .= chr(hexdec($arConvertTable[$charsetTo][$hexChar])); else $resultString .= $this->HexToUtf($arConvertTable[$charsetTo][$hexChar]); } } } elseif($charsetFrom != "utf-8") { if($charsetTo != "utf-8") $arConvertTable = $this->BuildConvertTable($charsetFrom, $charsetTo); else $arConvertTable = $this->BuildConvertTable($charsetFrom); if(!$arConvertTable) return false; $stringLength = (extension_loaded("mbstring") ? mb_strlen($sourceString, $charsetFrom) : strlen($sourceString)); for ($i = 0; $i < $stringLength; $i++) { $hexChar = strtoupper(dechex(ord($sourceString[$i]))); if(strlen($hexChar) == 1) $hexChar = "0".$hexChar; if(($charsetFrom == "gsm0338") && ($hexChar == '1B')) { $i++; $hexChar .= strtoupper(dechex(ord($sourceString[$i]))); } if($charsetTo != "utf-8") { if(in_array($hexChar, $arConvertTable[$charsetFrom])) { $unicodeHexChar = array_search($hexChar, $arConvertTable[$charsetFrom]); $arUnicodeHexChar = explode("+", $unicodeHexChar); for ($j = 0; $j < count($arUnicodeHexChar); $j++) { if (array_key_exists($arUnicodeHexChar[$j], $arConvertTable[$charsetTo])) $resultString .= chr(hexdec($arConvertTable[$charsetTo][$arUnicodeHexChar[$j]])); else $this->AddError(str_replace("#CHAR#", $sourceString[$i], "Can not find matching char \"#CHAR#\" in destination encoding table.")); } } else $this->AddError(str_replace("#CHAR#", $sourceString[$i], "Can not find matching char \"#CHAR#\" in source encoding table.")); } else { if(in_array("$hexChar", $arConvertTable[$charsetFrom])) { $unicodeHexChar = array_search($hexChar, $arConvertTable[$charsetFrom]); $arUnicodeHexChar = explode("+", $unicodeHexChar); for ($j = 0; $j < count($arUnicodeHexChar); $j++) $resultString .= $this->HexToUtf($arUnicodeHexChar[$j]); } else $this->AddError(str_replace("#CHAR#", $sourceString[$i], "Can not find matching char \"#CHAR#\" in source encoding table.")); } } } else { $arConvertTable = $this->BuildConvertTable($charsetTo); if(!$arConvertTable) return false; foreach($arConvertTable[$charsetTo] as $unicodeHexChar => $hexChar) { $EntitieOrChar = chr(hexdec($hexChar)); $sourceString = str_replace($this->HexToUtf($unicodeHexChar), $EntitieOrChar, $sourceString); } $resultString = $sourceString; } return $resultString; } public function GetErrors() { return $this->arErrors; } protected function AddError($error, $errorCode = "") { if (empty($error)) return; $fs = (empty($errorCode) ? "%s" : "[%s] %s"); $this->arErrors[] = sprintf($fs, $error, $errorCode); } protected function ClearErrors() { $this->arErrors = array(); } } ?>