Files
SkinbaseNova/app/Services/LegacySmileyMapper.php
2026-02-26 21:12:32 +01:00

168 lines
5.2 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace App\Services;
/**
* Centralized mapping from legacy GIF smiley codes to Unicode emoji.
*
* Usage:
* $result = LegacySmileyMapper::convert($text);
* $map = LegacySmileyMapper::getMap();
*/
class LegacySmileyMapper
{
/**
* The canonical smiley-code → emoji map.
* Keys must be unique; variants are listed via aliases.
*/
private static array $map = [
// Core
':beer' => '🍺',
':clap' => '👏',
':coffee' => '☕',
':cry' => '😢',
':lol' => '😂',
':love' => '❤️',
':HB' => '🎂',
':wow' => '😮',
// Extended legacy codes
':smile' => '😊',
':grin' => '😁',
':wink' => '😉',
':tongue' => '😛',
':cool' => '😎',
':angry' => '😠',
':sad' => '😞',
':laugh' => '😆',
':hug' => '🤗',
':thumb' => '👍',
':thumbs' => '👍',
':thumbsup' => '👍',
':fire' => '🔥',
':star' => '⭐',
':heart' => '❤️',
':broken' => '💔',
':music' => '🎵',
':note' => '🎶',
':art' => '🎨',
':camera' => '📷',
':gift' => '🎁',
':cake' => '🎂',
':wave' => '👋',
':ok' => '👌',
':pray' => '🙏',
':think' => '🤔',
':eyes' => '👀',
':rainbow' => '🌈',
':sun' => '☀️',
':moon' => '🌙',
':party' => '🎉',
':bomb' => '💣',
':skull' => '💀',
':alien' => '👽',
':robot' => '🤖',
':poop' => '💩',
':money' => '💰',
':bulb' => '💡',
':check' => '✅',
':x' => '❌',
':warning' => '⚠️',
':question' => '❓',
':exclamation' => '❗',
':100' => '💯',
];
/**
* Convert all legacy smiley codes in $text to Unicode emoji.
* Only replaces codes that are surrounded by whitespace or start/end of string.
*
* @return string
*/
public static function convert(string $text): string
{
if (empty($text)) {
return $text;
}
foreach (static::$map as $code => $emoji) {
// Use word-boundary-style: the code must be followed by whitespace,
// end of string, or punctuation — not part of a word.
$escaped = preg_quote($code, '/');
$text = preg_replace(
'/(?<=\s|^)' . $escaped . '(?=\s|$|[.,!?;])/um',
$emoji,
$text
);
}
return $text;
}
/**
* Returns all codes that are present in the given text (for reporting).
*
* @return string[]
*/
public static function detect(string $text): array
{
$found = [];
foreach (array_keys(static::$map) as $code) {
$escaped = preg_quote($code, '/');
if (preg_match('/(?<=\s|^)' . $escaped . '(?=\s|$|[.,!?;])/um', $text)) {
$found[] = $code;
}
}
return $found;
}
/**
* Collapse consecutive runs of the same emoji that exceed $maxRun repetitions.
*
* Transforms e.g. "🍺 🍺 🍺 🍺 🍺 🍺 🍺 🍺" (8×) → "🍺 🍺 🍺 🍺 🍺 ×8"
* so that spam/flood content is stored compactly and rendered readably.
*
* Both whitespace-separated ("🍺 🍺 🍺") and run-together ("🍺🍺🍺") forms
* are collapsed. Only emoji from the common Unicode blocks are affected;
* regular text is never touched.
*
* @param int $maxRun Maximum number of identical emoji to keep (default 5).
*/
public static function collapseFlood(string $text, int $maxRun = 5): string
{
if (empty($text)) {
return $text;
}
$limit = max(1, $maxRun);
// Match one emoji "unit" (codepoint from common ranges + optional variation
// selector U+FE0E / U+FE0F), followed by $limit or more repetitions of
// (optional horizontal whitespace + the same unit).
// The \1 backreference works byte-for-byte in UTF-8, so it correctly
// matches the same multi-byte sequence each time.
$pattern = '/([\x{1F000}-\x{1FFFF}\x{2600}-\x{27EF}][\x{FE0E}\x{FE0F}]?)'
. '([ \t]*\1){' . $limit . ',}/u';
return preg_replace_callback(
$pattern,
static function (array $m) use ($limit): string {
$unit = $m[1];
// substr_count is byte-safe and correct for multi-byte sequences.
$count = substr_count($m[0], $unit);
return str_repeat($unit . ' ', $limit - 1) . $unit . ' ×' . $count;
},
$text
) ?? $text;
}
/**
* Get the full mapping array.
*
* @return array<string, string>
*/
public static function getMap(): array
{
return static::$map;
}
}