'🍺', ':clap' => '👏', ':coffee' => '☕', ':cry' => '😢', ':lol' => '😂', ':love' => '❤️', ':HB' => '🎂', ':wow' => '😮', // Extended legacy codes ':smile' => '😊', ':grin' => '😁', ':wink' => '😉', ':tongue' => '😛', ':cool' => '😎', ':angry' => '😠', ':sad' => '😞', ':laugh' => '😆', ':hug' => '🤗', ':thumb' => '👍', ':thumbs' => '👍', ':thumbsup' => '👍', ':fire' => '🔥', ':star' => '⭐', ':heart' => '❤️', ':broken' => '💔', ':music' => '🎵', ':note' => '🎶', ':art' => '🎨', ':camera' => '📷', ':gift' => '🎁', ':cake' => '🎂', ':wave' => '👋', ':ok' => '👌', ':pray' => '🙏', ':think' => '🤔', ':eyes' => '👀', ':rainbow' => '🌈', ':sun' => '☀️', ':moon' => '🌙', ':party' => '🎉', ':bomb' => '💣', ':skull' => '💀', ':alien' => '👽', ':robot' => '🤖', ':poop' => '💩', ':money' => '💰', ':bulb' => '💡', ':check' => '✅', ':x' => '❌', ':warning' => '⚠️', ':question' => '❓', ':exclamation' => '❗', ':100' => '💯', ]; /** * Convert all legacy smiley codes in $text to Unicode emoji. * Only replaces codes that are surrounded by whitespace or start/end of string. * * @return string */ public static function convert(string $text): string { if (empty($text)) { return $text; } foreach (static::$map as $code => $emoji) { // Use word-boundary-style: the code must be followed by whitespace, // end of string, or punctuation — not part of a word. $escaped = preg_quote($code, '/'); $text = preg_replace( '/(?<=\s|^)' . $escaped . '(?=\s|$|[.,!?;])/um', $emoji, $text ); } return $text; } /** * Returns all codes that are present in the given text (for reporting). * * @return string[] */ public static function detect(string $text): array { $found = []; foreach (array_keys(static::$map) as $code) { $escaped = preg_quote($code, '/'); if (preg_match('/(?<=\s|^)' . $escaped . '(?=\s|$|[.,!?;])/um', $text)) { $found[] = $code; } } return $found; } /** * Collapse consecutive runs of the same emoji that exceed $maxRun repetitions. * * Transforms e.g. "🍺 🍺 🍺 🍺 🍺 🍺 🍺 🍺" (8×) → "🍺 🍺 🍺 🍺 🍺 ×8" * so that spam/flood content is stored compactly and rendered readably. * * Both whitespace-separated ("🍺 🍺 🍺") and run-together ("🍺🍺🍺") forms * are collapsed. Only emoji from the common Unicode blocks are affected; * regular text is never touched. * * @param int $maxRun Maximum number of identical emoji to keep (default 5). */ public static function collapseFlood(string $text, int $maxRun = 5): string { if (empty($text)) { return $text; } $limit = max(1, $maxRun); // Match one emoji "unit" (codepoint from common ranges + optional variation // selector U+FE0E / U+FE0F), followed by $limit or more repetitions of // (optional horizontal whitespace + the same unit). // The \1 backreference works byte-for-byte in UTF-8, so it correctly // matches the same multi-byte sequence each time. $pattern = '/([\x{1F000}-\x{1FFFF}\x{2600}-\x{27EF}][\x{FE0E}\x{FE0F}]?)' . '([ \t]*\1){' . $limit . ',}/u'; return preg_replace_callback( $pattern, static function (array $m) use ($limit): string { $unit = $m[1]; // substr_count is byte-safe and correct for multi-byte sequences. $count = substr_count($m[0], $unit); return str_repeat($unit . ' ', $limit - 1) . $unit . ' ×' . $count; }, $text ) ?? $text; } /** * Get the full mapping array. * * @return array */ public static function getMap(): array { return static::$map; } }