Files
SkinbaseNova/resources/js/utils/emojiFlood.js
2026-02-26 21:12:32 +01:00

139 lines
4.9 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* emojiFlood.js
*
* Utilities for detecting and collapsing emoji flood content on the client.
*
* These mirror the PHP-side logic in LegacySmileyMapper::collapseFlood() and
* ContentSanitizer::validate() so that the UI can apply the same rules without
* a round-trip to the server.
*/
// ── Constants ─────────────────────────────────────────────────────────────────
/** Absolute emoji count above which text is considered a flood for display. */
export const FLOOD_COUNT_THRESHOLD = 20
/** Ratio of emoji / total chars above which text is a density flood. */
export const FLOOD_DENSITY_THRESHOLD = 0.40
/** Maximum consecutive identical emoji kept before the rest are collapsed. */
export const COLLAPSE_MAX_RUN = 5
// ── Emoji detection ───────────────────────────────────────────────────────────
/**
* Regex that matches a single emoji "unit":
* • Codepoints U+1F000U+1FFFF (most modern emoji)
* • Codepoints U+2600U+27BF (misc symbols, dingbats)
* • Optionally followed by U+FE0F variation selector
*
* Note: ZWJ sequences and flags are not fully modelled here, but those
* multi-codepoint emoji are extremely unlikely in legacy flood spam.
*/
const EMOJI_UNIT_RE = /[\u{1F000}-\u{1FFFF}\u{2600}-\u{27BF}]\uFE0F?/gu
/**
* Count emoji in a string.
* @param {string} text
* @returns {number}
*/
export function countEmoji(text) {
if (!text) return 0
return (text.match(EMOJI_UNIT_RE) || []).length
}
// ── Flood detection ───────────────────────────────────────────────────────────
/**
* Returns true when the text qualifies as an emoji flood.
*
* Two independent flood signals:
* 1. Absolute count > FLOOD_COUNT_THRESHOLD emoji (e.g. 21 beer mugs)
* 2. Density > FLOOD_DENSITY_THRESHOLD (e.g. "🍺🍺🍺🍺" — 80% emoji, 0 text)
* Only applied when count > 5 to avoid false-positives on short strings.
*
* @param {string} text
* @param {object} [opts]
* @param {number} [opts.maxCount=FLOOD_COUNT_THRESHOLD]
* @param {number} [opts.maxDensity=FLOOD_DENSITY_THRESHOLD]
* @returns {boolean}
*/
export function isFlood(text, { maxCount = FLOOD_COUNT_THRESHOLD, maxDensity = FLOOD_DENSITY_THRESHOLD } = {}) {
if (!text) return false
const count = countEmoji(text)
if (count > maxCount) return true
if (count > 5 && text.length > 0 && count / text.length > maxDensity) return true
return false
}
// ── Flood collapsing ──────────────────────────────────────────────────────────
/**
* Collapse consecutive runs of the same emoji that exceed `maxRun` repetitions.
*
* Behaviour mirrors LegacySmileyMapper::collapseFlood() on the PHP side:
* "🍺 🍺 🍺 🍺 🍺 🍺 🍺 🍺" (8×) → "🍺 🍺 🍺 🍺 🍺 ×8"
*
* Algorithm:
* 1. Scan the string for all emoji "units" (codepoint + optional variation selector).
* 2. Group consecutive matches of the SAME emoji where the gap between each
* pair is pure horizontal whitespace (space / tab / no gap at all).
* 3. Runs longer than `maxRun` are replaced with `maxRun` copies + " ×N".
*
* @param {string} text
* @param {number} [maxRun=COLLAPSE_MAX_RUN]
* @returns {string}
*/
export function collapseEmojiRuns(text, maxRun = COLLAPSE_MAX_RUN) {
if (!text) return text
// Step 1 locate every emoji unit in the string.
const unitRe = /[\u{1F000}-\u{1FFFF}\u{2600}-\u{27BF}]\uFE0F?/gu
const hits = []
let m
while ((m = unitRe.exec(text)) !== null) {
hits.push({ index: m.index, val: m[0], end: m.index + m[0].length })
}
if (hits.length === 0) return text
// Step 2 group hits into "same-emoji runs separated only by whitespace".
const runs = []
let i = 0
while (i < hits.length) {
let j = i
while (
j + 1 < hits.length &&
hits[j + 1].val === hits[i].val &&
/^[ \t]*$/.test(text.slice(hits[j].end, hits[j + 1].index))
) {
j++
}
runs.push({ from: i, to: j, emoji: hits[i].val })
i = j + 1
}
// Step 3 rebuild the string, collapsing long runs.
if (!runs.some((r) => r.to - r.from + 1 > maxRun)) return text
let result = ''
let pos = 0
for (const run of runs) {
const count = run.to - run.from + 1
const spanStart = hits[run.from].index
const spanEnd = hits[run.to].end
result += text.slice(pos, spanStart)
if (count > maxRun) {
result += Array(maxRun).fill(run.emoji).join(' ') + ' ×' + count
} else {
result += text.slice(spanStart, spanEnd)
}
pos = spanEnd
}
result += text.slice(pos)
return result
}