SkinbaseNova/app/Services/LegacySmileyMapper.php

<?php

namespace App\Services;

/**
 * Centralized mapping from legacy GIF smiley codes to Unicode emoji.
 *
 * Usage:
 *   $result = LegacySmileyMapper::convert($text);
 *   $map    = LegacySmileyMapper::getMap();
 */
class LegacySmileyMapper
{
    /**
     * The canonical smiley-code → emoji map.
     * Keys must be unique; variants are listed via aliases.
     */
    private static array $map = [
        // Core
        ':beer'       => '🍺',
        ':clap'       => '👏',
        ':coffee'     => '☕',
        ':cry'        => '😢',
        ':lol'        => '😂',
        ':love'       => '❤️',
        ':HB'         => '🎂',
        ':wow'        => '😮',
        // Extended legacy codes
        ':smile'      => '😊',
        ':grin'       => '😁',
        ':wink'       => '😉',
        ':tongue'     => '😛',
        ':cool'       => '😎',
        ':angry'      => '😠',
        ':sad'        => '😞',
        ':laugh'      => '😆',
        ':hug'        => '🤗',
        ':thumb'      => '👍',
        ':thumbs'     => '👍',
        ':thumbsup'   => '👍',
        ':fire'       => '🔥',
        ':star'       => '⭐',
        ':heart'      => '❤️',
        ':broken'     => '💔',
        ':music'      => '🎵',
        ':note'       => '🎶',
        ':art'        => '🎨',
        ':camera'     => '📷',
        ':gift'       => '🎁',
        ':cake'       => '🎂',
        ':wave'       => '👋',
        ':ok'         => '👌',
        ':pray'       => '🙏',
        ':think'      => '🤔',
        ':eyes'       => '👀',
        ':rainbow'    => '🌈',
        ':sun'        => '☀️',
        ':moon'       => '🌙',
        ':party'      => '🎉',
        ':bomb'       => '💣',
        ':skull'      => '💀',
        ':alien'      => '👽',
        ':robot'      => '🤖',
        ':poop'       => '💩',
        ':money'      => '💰',
        ':bulb'       => '💡',
        ':check'      => '✅',
        ':x'          => '❌',
        ':warning'    => '⚠️',
        ':question'   => '❓',
        ':exclamation' => '❗',
        ':100'        => '💯',
    ];

    /**
     * Convert all legacy smiley codes in $text to Unicode emoji.
     * Only replaces codes that are surrounded by whitespace or start/end of string.
     *
     * @return string
     */
    public static function convert(string $text): string
    {
        if (empty($text)) {
            return $text;
        }

        foreach (static::$map as $code => $emoji) {
            // Use word-boundary-style: the code must be followed by whitespace,
            // end of string, or punctuation — not part of a word.
            $escaped = preg_quote($code, '/');
            $text    = preg_replace(
                '/(?<=\s|^)' . $escaped . '(?=\s|$|[.,!?;])/um',
                $emoji,
                $text
            );
        }

        return $text;
    }

    /**
     * Returns all codes that are present in the given text (for reporting).
     *
     * @return string[]
     */
    public static function detect(string $text): array
    {
        $found = [];
        foreach (array_keys(static::$map) as $code) {
            $escaped = preg_quote($code, '/');
            if (preg_match('/(?<=\s|^)' . $escaped . '(?=\s|$|[.,!?;])/um', $text)) {
                $found[] = $code;
            }
        }
        return $found;
    }

    /**
     * Collapse consecutive runs of the same emoji that exceed $maxRun repetitions.
     *
     * Transforms e.g. "🍺 🍺 🍺 🍺 🍺 🍺 🍺 🍺" (8×) → "🍺 🍺 🍺 🍺 🍺 ×8"
     * so that spam/flood content is stored compactly and rendered readably.
     *
     * Both whitespace-separated ("🍺 🍺 🍺") and run-together ("🍺🍺🍺") forms
     * are collapsed.  Only emoji from the common Unicode blocks are affected;
     * regular text is never touched.
     *
     * @param  int $maxRun  Maximum number of identical emoji to keep (default 5).
     */
    public static function collapseFlood(string $text, int $maxRun = 5): string
    {
        if (empty($text)) {
            return $text;
        }

        $limit = max(1, $maxRun);

        // Match one emoji "unit" (codepoint from common ranges + optional variation
        // selector U+FE0E / U+FE0F), followed by $limit or more repetitions of
        // (optional horizontal whitespace + the same unit).
        // The \1 backreference works byte-for-byte in UTF-8, so it correctly
        // matches the same multi-byte sequence each time.
        $pattern = '/([\x{1F000}-\x{1FFFF}\x{2600}-\x{27EF}][\x{FE0E}\x{FE0F}]?)'
                 . '([ \t]*\1){' . $limit . ',}/u';

        return preg_replace_callback(
            $pattern,
            static function (array $m) use ($limit): string {
                $unit  = $m[1];
                // substr_count is byte-safe and correct for multi-byte sequences.
                $count = substr_count($m[0], $unit);
                return str_repeat($unit . ' ', $limit - 1) . $unit . ' ×' . $count;
            },
            $text
        ) ?? $text;
    }

    /**
     * Get the full mapping array.
     *
     * @return array<string, string>
     */
    public static function getMap(): array
    {
        return static::$map;
    }
}