| 
							- <?php
 - 
 - namespace dokuwiki\Utf8;
 - 
 - /**
 -  * Methods to convert from and to UTF-8 strings
 -  */
 - class Conversion
 - {
 -     /**
 -      * Encodes UTF-8 characters to HTML entities
 -      *
 -      * @author Tom N Harris <tnharris@whoopdedo.org>
 -      * @author <vpribish at shopping dot com>
 -      * @link   http://php.net/manual/en/function.utf8-decode.php
 -      *
 -      * @param string $str
 -      * @param bool $all Encode non-utf8 char to HTML as well
 -      * @return string
 -      */
 -     public static function toHtml($str, $all = false)
 -     {
 -         $ret = '';
 -         foreach (Unicode::fromUtf8($str) as $cp) {
 -             if ($cp < 0x80 && !$all) {
 -                 $ret .= chr($cp);
 -             } elseif ($cp < 0x100) {
 -                 $ret .= "&#$cp;";
 -             } else {
 -                 $ret .= '&#x' . dechex($cp) . ';';
 -             }
 -         }
 -         return $ret;
 -     }
 - 
 -     /**
 -      * Decodes HTML entities to UTF-8 characters
 -      *
 -      * Convert any &#..; entity to a codepoint,
 -      * The entities flag defaults to only decoding numeric entities.
 -      * Pass HTML_ENTITIES and named entities, including & < etc.
 -      * are handled as well. Avoids the problem that would occur if you
 -      * had to decode "&#38;&amp;#38;"
 -      *
 -      * unhtmlspecialchars(\dokuwiki\Utf8\Conversion::fromHtml($s)) -> "&&"
 -      * \dokuwiki\Utf8\Conversion::fromHtml(unhtmlspecialchars($s)) -> "&&#38;"
 -      * what it should be                   -> "&&#38;"
 -      *
 -      * @author Tom N Harris <tnharris@whoopdedo.org>
 -      *
 -      * @param  string $str UTF-8 encoded string
 -      * @param  boolean $entities decode name entities in addtition to numeric ones
 -      * @return string  UTF-8 encoded string with numeric (and named) entities replaced.
 -      */
 -     public static function fromHtml($str, $entities = false)
 -     {
 -         if (!$entities) {
 -             return preg_replace_callback(
 -                 '/(&#([Xx])?([0-9A-Za-z]+);)/m',
 -                 [self::class, 'decodeNumericEntity'],
 -                 $str
 -             );
 -         }
 - 
 -         return preg_replace_callback(
 -             '/&(#)?([Xx])?([0-9A-Za-z]+);/m',
 -             [self::class, 'decodeAnyEntity'],
 -             $str
 -         );
 -     }
 - 
 -     /**
 -      * Decodes any HTML entity to it's correct UTF-8 char equivalent
 -      *
 -      * @param string $ent An entity
 -      * @return string
 -      */
 -     protected static function decodeAnyEntity($ent)
 -     {
 -         // create the named entity lookup table
 -         static $table = null;
 -         if ($table === null) {
 -             $table = get_html_translation_table(HTML_ENTITIES);
 -             $table = array_flip($table);
 -             $table = array_map(
 -                 static fn($c) => Unicode::toUtf8([ord($c)]),
 -                 $table
 -             );
 -         }
 - 
 -         if ($ent[1] === '#') {
 -             return self::decodeNumericEntity($ent);
 -         }
 - 
 -         if (array_key_exists($ent[0], $table)) {
 -             return $table[$ent[0]];
 -         }
 - 
 -         return $ent[0];
 -     }
 - 
 -     /**
 -      * Decodes numeric HTML entities to their correct UTF-8 characters
 -      *
 -      * @param $ent string A numeric entity
 -      * @return string|false
 -      */
 -     protected static function decodeNumericEntity($ent)
 -     {
 -         switch ($ent[2]) {
 -             case 'X':
 -             case 'x':
 -                 $cp = hexdec($ent[3]);
 -                 break;
 -             default:
 -                 $cp = (int) $ent[3];
 -                 break;
 -         }
 -         return Unicode::toUtf8([$cp]);
 -     }
 - 
 -     /**
 -      * UTF-8 to UTF-16BE conversion.
 -      *
 -      * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
 -      *
 -      * @param string $str
 -      * @param bool $bom
 -      * @return string
 -      */
 -     public static function toUtf16be($str, $bom = false)
 -     {
 -         $out = $bom ? "\xFE\xFF" : '';
 -         if (UTF8_MBSTRING) {
 -             return $out . mb_convert_encoding($str, 'UTF-16BE', 'UTF-8');
 -         }
 - 
 -         $uni = Unicode::fromUtf8($str);
 -         foreach ($uni as $cp) {
 -             $out .= pack('n', $cp);
 -         }
 -         return $out;
 -     }
 - 
 -     /**
 -      * UTF-8 to UTF-16BE conversion.
 -      *
 -      * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
 -      *
 -      * @param string $str
 -      * @return false|string
 -      */
 -     public static function fromUtf16be($str)
 -     {
 -         $uni = unpack('n*', $str);
 -         return Unicode::toUtf8($uni);
 -     }
 - }
 
 
  |