139 lines
4.9 KiB
JavaScript
139 lines
4.9 KiB
JavaScript
/**
|
||
* emojiFlood.js
|
||
*
|
||
* Utilities for detecting and collapsing emoji flood content on the client.
|
||
*
|
||
* These mirror the PHP-side logic in LegacySmileyMapper::collapseFlood() and
|
||
* ContentSanitizer::validate() so that the UI can apply the same rules without
|
||
* a round-trip to the server.
|
||
*/
|
||
|
||
// ── Constants ─────────────────────────────────────────────────────────────────
|
||
|
||
/** Absolute emoji count above which text is considered a flood for display. */
|
||
export const FLOOD_COUNT_THRESHOLD = 20
|
||
|
||
/** Ratio of emoji / total chars above which text is a density flood. */
|
||
export const FLOOD_DENSITY_THRESHOLD = 0.40
|
||
|
||
/** Maximum consecutive identical emoji kept before the rest are collapsed. */
|
||
export const COLLAPSE_MAX_RUN = 5
|
||
|
||
// ── Emoji detection ───────────────────────────────────────────────────────────
|
||
|
||
/**
|
||
* Regex that matches a single emoji "unit":
|
||
* • Codepoints U+1F000–U+1FFFF (most modern emoji)
|
||
* • Codepoints U+2600–U+27BF (misc symbols, dingbats)
|
||
* • Optionally followed by U+FE0F variation selector
|
||
*
|
||
* Note: ZWJ sequences and flags are not fully modelled here, but those
|
||
* multi-codepoint emoji are extremely unlikely in legacy flood spam.
|
||
*/
|
||
const EMOJI_UNIT_RE = /[\u{1F000}-\u{1FFFF}\u{2600}-\u{27BF}]\uFE0F?/gu
|
||
|
||
/**
|
||
* Count emoji in a string.
|
||
* @param {string} text
|
||
* @returns {number}
|
||
*/
|
||
export function countEmoji(text) {
|
||
if (!text) return 0
|
||
return (text.match(EMOJI_UNIT_RE) || []).length
|
||
}
|
||
|
||
// ── Flood detection ───────────────────────────────────────────────────────────
|
||
|
||
/**
|
||
* Returns true when the text qualifies as an emoji flood.
|
||
*
|
||
* Two independent flood signals:
|
||
* 1. Absolute count > FLOOD_COUNT_THRESHOLD emoji (e.g. 21 beer mugs)
|
||
* 2. Density > FLOOD_DENSITY_THRESHOLD (e.g. "🍺🍺🍺🍺" — 80% emoji, 0 text)
|
||
* Only applied when count > 5 to avoid false-positives on short strings.
|
||
*
|
||
* @param {string} text
|
||
* @param {object} [opts]
|
||
* @param {number} [opts.maxCount=FLOOD_COUNT_THRESHOLD]
|
||
* @param {number} [opts.maxDensity=FLOOD_DENSITY_THRESHOLD]
|
||
* @returns {boolean}
|
||
*/
|
||
export function isFlood(text, { maxCount = FLOOD_COUNT_THRESHOLD, maxDensity = FLOOD_DENSITY_THRESHOLD } = {}) {
|
||
if (!text) return false
|
||
const count = countEmoji(text)
|
||
if (count > maxCount) return true
|
||
if (count > 5 && text.length > 0 && count / text.length > maxDensity) return true
|
||
return false
|
||
}
|
||
|
||
// ── Flood collapsing ──────────────────────────────────────────────────────────
|
||
|
||
/**
|
||
* Collapse consecutive runs of the same emoji that exceed `maxRun` repetitions.
|
||
*
|
||
* Behaviour mirrors LegacySmileyMapper::collapseFlood() on the PHP side:
|
||
* "🍺 🍺 🍺 🍺 🍺 🍺 🍺 🍺" (8×) → "🍺 🍺 🍺 🍺 🍺 ×8"
|
||
*
|
||
* Algorithm:
|
||
* 1. Scan the string for all emoji "units" (codepoint + optional variation selector).
|
||
* 2. Group consecutive matches of the SAME emoji where the gap between each
|
||
* pair is pure horizontal whitespace (space / tab / no gap at all).
|
||
* 3. Runs longer than `maxRun` are replaced with `maxRun` copies + " ×N".
|
||
*
|
||
* @param {string} text
|
||
* @param {number} [maxRun=COLLAPSE_MAX_RUN]
|
||
* @returns {string}
|
||
*/
|
||
export function collapseEmojiRuns(text, maxRun = COLLAPSE_MAX_RUN) {
|
||
if (!text) return text
|
||
|
||
// Step 1 – locate every emoji unit in the string.
|
||
const unitRe = /[\u{1F000}-\u{1FFFF}\u{2600}-\u{27BF}]\uFE0F?/gu
|
||
const hits = []
|
||
let m
|
||
while ((m = unitRe.exec(text)) !== null) {
|
||
hits.push({ index: m.index, val: m[0], end: m.index + m[0].length })
|
||
}
|
||
if (hits.length === 0) return text
|
||
|
||
// Step 2 – group hits into "same-emoji runs separated only by whitespace".
|
||
const runs = []
|
||
let i = 0
|
||
while (i < hits.length) {
|
||
let j = i
|
||
while (
|
||
j + 1 < hits.length &&
|
||
hits[j + 1].val === hits[i].val &&
|
||
/^[ \t]*$/.test(text.slice(hits[j].end, hits[j + 1].index))
|
||
) {
|
||
j++
|
||
}
|
||
runs.push({ from: i, to: j, emoji: hits[i].val })
|
||
i = j + 1
|
||
}
|
||
|
||
// Step 3 – rebuild the string, collapsing long runs.
|
||
if (!runs.some((r) => r.to - r.from + 1 > maxRun)) return text
|
||
|
||
let result = ''
|
||
let pos = 0
|
||
for (const run of runs) {
|
||
const count = run.to - run.from + 1
|
||
const spanStart = hits[run.from].index
|
||
const spanEnd = hits[run.to].end
|
||
|
||
result += text.slice(pos, spanStart)
|
||
|
||
if (count > maxRun) {
|
||
result += Array(maxRun).fill(run.emoji).join(' ') + ' ×' + count
|
||
} else {
|
||
result += text.slice(spanStart, spanEnd)
|
||
}
|
||
|
||
pos = spanEnd
|
||
}
|
||
result += text.slice(pos)
|
||
|
||
return result
|
||
}
|