Files
SkinbaseNova/app/Services/RankingService.php

343 lines
13 KiB
PHP

<?php
declare(strict_types=1);
namespace App\Services;
use App\Models\Artwork;
use App\Models\RankArtworkScore;
use App\Models\RankList;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Log;
/**
* RankingService — Skinbase Nova rank_v1
*
* Responsibilities:
* 1. Score computation — turn raw artwork signals into three float scores.
* 2. Diversity filtering — cap items per author while keeping rank order.
* 3. List read / cache — serve ranked lists from Redis, falling back to DB,
* and ultimately to latest-first if no list is built yet.
*/
final class RankingService
{
// ── Score computation ──────────────────────────────────────────────────
/**
* Compute all three ranking scores for a single artwork data row.
*
* @param object $row stdClass with fields:
* views_7d, favourites_7d, downloads_7d,
* views_all, favourites_all, downloads_all,
* views_24h, favourites_24h, downloads_24h,
* age_hours, tag_count, has_thumbnail (bool 0/1),
* is_public, is_approved
* @return array{score_trending: float, score_new_hot: float, score_best: float}
*/
public function computeScores(object $row): array
{
$cfg = config('ranking');
$wV = (float) $cfg['weights']['views'];
$wF = (float) $cfg['weights']['favourites'];
$wD = (float) $cfg['weights']['downloads'];
// 3.1 Base engagement (7-day window)
$E = ($wV * log(1 + (float) $row->views_7d))
+ ($wF * log(1 + (float) $row->favourites_7d))
+ ($wD * log(1 + (float) $row->downloads_7d));
// Base engagement (all-time, for "best" score)
$E_all = ($wV * log(1 + (float) $row->views_all))
+ ($wF * log(1 + (float) $row->favourites_all))
+ ($wD * log(1 + (float) $row->downloads_all));
// 3.2 Freshness decay
$ageH = max(0.0, (float) $row->age_hours);
$decayTrending = exp(-$ageH / (float) $cfg['half_life']['trending']);
$decayNewHot = exp(-$ageH / (float) $cfg['half_life']['new_hot']);
$decayBest = exp(-$ageH / (float) $cfg['half_life']['best']);
// 3.3 Quality modifier
$tagCount = (int) $row->tag_count;
$hasTags = $tagCount > 0;
$hasThumb = (bool) $row->has_thumbnail;
$isVisible = (bool) $row->is_public && (bool) $row->is_approved;
$Q = 1.0;
if ($hasTags) { $Q += (float) $cfg['quality']['has_tags']; }
if ($hasThumb) { $Q += (float) $cfg['quality']['has_thumbnail']; }
$Q += (float) $cfg['quality']['tag_count_bonus']
* (min($tagCount, (int) $cfg['quality']['tag_count_max'])
/ (float) $cfg['quality']['tag_count_max']);
if (! $isVisible) { $Q -= (float) $cfg['quality']['penalty_hidden']; }
// 3.4 Novelty boost (New & Hot)
$noveltyW = (float) $cfg['novelty_weight'];
$novelty = 1.0 + $noveltyW * exp(-$ageH / 24.0);
// Anti-spam damping on trending score only
$spamFactor = 1.0;
$spam = $cfg['spam'];
if (
(float) $row->views_24h > (float) $spam['views_24h_threshold']
&& (float) $row->views_24h > 0
) {
$rF = (float) $row->favourites_24h / (float) $row->views_24h;
$rD = (float) $row->downloads_24h / (float) $row->views_24h;
if ($rF < (float) $spam['fav_ratio_threshold']
&& $rD < (float) $spam['dl_ratio_threshold']
) {
$spamFactor = (float) $spam['trending_penalty_factor'];
}
}
$scoreTrending = $E * $decayTrending * (1.0 + $Q) * $spamFactor;
$scoreNewHot = $E * $decayNewHot * $novelty * (1.0 + $Q);
$scoreBest = $E_all * $decayBest * (1.0 + $Q);
return [
'score_trending' => max(0.0, $scoreTrending),
'score_new_hot' => max(0.0, $scoreNewHot),
'score_best' => max(0.0, $scoreBest),
];
}
// ── Diversity filtering ────────────────────────────────────────────────
/**
* Apply author-diversity cap to an already-ordered candidate array.
*
* @param array $candidates Ordered array, each element must have artwork_id + user_id.
* @param int $maxPerAuthor
* @param int $listSize
* @return array Filtered, at most $listSize elements.
*/
public function applyDiversity(array $candidates, int $maxPerAuthor, int $listSize): array
{
$result = [];
$authorCount = [];
foreach ($candidates as $item) {
$uid = (int) ($item->user_id ?? $item['user_id'] ?? 0);
if (($authorCount[$uid] ?? 0) >= $maxPerAuthor) {
continue;
}
$result[] = $item;
$authorCount[$uid] = ($authorCount[$uid] ?? 0) + 1;
if (count($result) >= $listSize) {
break;
}
}
return $result;
}
// ── List retrieval ─────────────────────────────────────────────────────
/**
* Retrieve a ranked list of artwork IDs.
*
* Order of precedence:
* 1. Redis cache
* 2. rank_lists table
* 3. Fallback: latest-first from artworks
*
* @param string $scopeType global | category | content_type
* @param int|null $scopeId category.id or content_type.id, null for global
* @param string $listType trending | new_hot | best
* @return array{ids: int[], computed_at: string|null, model_version: string, fallback: bool}
*/
public function getList(string $scopeType, ?int $scopeId, string $listType): array
{
$ttl = (int) config('ranking.cache.ttl', 900);
$cacheKey = $this->cacheKey($scopeType, $scopeId, $listType);
$modelVer = config('ranking.model_version', 'rank_v1');
// 1. Cache
$cached = Cache::get($cacheKey);
if ($cached !== null) {
return $cached;
}
// 2. DB
$rankList = RankList::where('scope_type', $scopeType)
->where('scope_id', RankList::resolveScope($scopeId))
->where('list_type', $listType)
->where('model_version', $modelVer)
->first();
if ($rankList !== null) {
$payload = [
'ids' => $rankList->artwork_ids,
'computed_at' => $rankList->computed_at?->toIso8601String(),
'model_version' => $rankList->model_version,
'fallback' => false,
];
Cache::put($cacheKey, $payload, $ttl);
return $payload;
}
// 3. Fallback — latest published artworks
Log::info('RankingService: no rank list found, falling back to latest', [
'scope_type' => $scopeType,
'scope_id' => $scopeId,
'list_type' => $listType,
]);
$ids = $this->fallbackIds($scopeType, $scopeId);
return [
'ids' => $ids,
'computed_at' => null,
'model_version' => 'fallback',
'fallback' => true,
];
}
/**
* Bust the Redis cache for a specific scope/type combination.
*/
public function bustCache(string $scopeType, ?int $scopeId, string $listType): void
{
Cache::forget($this->cacheKey($scopeType, $scopeId, $listType));
}
/**
* Bust all cache keys for a list type across scopes.
* (Convenience — used after full rebuild.)
*/
public function bustAllCaches(string $modelVersion): void
{
foreach (['trending', 'new_hot', 'best'] as $listType) {
Cache::forget($this->cacheKey('global', null, $listType));
}
// Category and content_type caches are keyed with scope_id, so they expire
// naturally after TTL or get replaced on next request.
}
/**
* Build the Redis cache key for a list.
*
* Format: rank:list:{scope_type}:{scope_id|global}:{list_type}:{model_version}
*/
public function cacheKey(string $scopeType, ?int $scopeId, string $listType): string
{
$prefix = config('ranking.cache.prefix', 'rank');
$version = config('ranking.model_version', 'rank_v1');
$sid = $scopeId !== null ? (string) $scopeId : 'global';
return "{$prefix}:list:{$scopeType}:{$sid}:{$listType}:{$version}";
}
// ── Private helpers ────────────────────────────────────────────────────
/**
* Latest-first fallback IDs (public, approved artworks).
* Applies category/content_type filter when relevant.
*
* @return int[]
*/
private function fallbackIds(string $scopeType, ?int $scopeId): array
{
$listSize = (int) config('ranking.diversity.list_size', 50);
$query = Artwork::query()
->select('artworks.id')
->where('artworks.is_public', true)
->where('artworks.is_approved', true)
->whereNull('artworks.deleted_at')
->whereNotNull('artworks.published_at')
->orderByDesc('artworks.published_at')
->limit($listSize);
if ($scopeType === 'category' && $scopeId !== null) {
$query->join('artwork_category', 'artwork_category.artwork_id', '=', 'artworks.id')
->where('artwork_category.category_id', $scopeId);
}
if ($scopeType === 'content_type' && $scopeId !== null) {
$query->join('artwork_category', 'artwork_category.artwork_id', '=', 'artworks.id')
->join('categories', 'categories.id', '=', 'artwork_category.category_id')
->where('categories.content_type_id', $scopeId);
}
return $query->pluck('artworks.id')->map(fn ($id) => (int) $id)->all();
}
// ── Signal query (used by RankComputeArtworkScoresJob) ─────────────────
/**
* Return a query builder that selects all artwork signals needed for score
* computation. Results are NOT paginated — callers chunk them.
*
* Columns returned:
* id, user_id, published_at, is_public, is_approved,
* thumb_ext (→ has_thumbnail),
* views_7d, downloads_7d, views_24h, downloads_24h,
* views_all, downloads_all, favourites_all,
* favourites_7d, favourites_24h, downloads_24h,
* tag_count,
* age_hours
*/
public function artworkSignalsQuery(): \Illuminate\Database\Query\Builder
{
return DB::table('artworks as a')
->select([
'a.id',
'a.user_id',
'a.published_at',
'a.is_public',
'a.is_approved',
DB::raw('(a.thumb_ext IS NOT NULL AND a.thumb_ext != "") AS has_thumbnail'),
DB::raw('COALESCE(ast.views_7d, 0) AS views_7d'),
DB::raw('COALESCE(ast.downloads_7d, 0) AS downloads_7d'),
DB::raw('COALESCE(ast.views_24h, 0) AS views_24h'),
DB::raw('COALESCE(ast.downloads_24h, 0) AS downloads_24h'),
DB::raw('COALESCE(ast.views, 0) AS views_all'),
DB::raw('COALESCE(ast.downloads, 0) AS downloads_all'),
DB::raw('COALESCE(ast.favorites, 0) AS favourites_all'),
DB::raw('COALESCE(fav7.cnt, 0) AS favourites_7d'),
DB::raw('COALESCE(fav1.cnt, 0) AS favourites_24h'),
DB::raw('COALESCE(tc.tag_count, 0) AS tag_count'),
DB::raw('GREATEST(TIMESTAMPDIFF(HOUR, a.published_at, NOW()), 0) AS age_hours'),
])
->leftJoin('artwork_stats as ast', 'ast.artwork_id', '=', 'a.id')
// Favourites (7 days)
->leftJoinSub(
DB::table('artwork_favourites')
->select('artwork_id', DB::raw('COUNT(*) as cnt'))
->where('created_at', '>=', DB::raw('DATE_SUB(NOW(), INTERVAL 7 DAY)'))
->groupBy('artwork_id'),
'fav7',
'fav7.artwork_id', '=', 'a.id'
)
// Favourites (24 hours)
->leftJoinSub(
DB::table('artwork_favourites')
->select('artwork_id', DB::raw('COUNT(*) as cnt'))
->where('created_at', '>=', DB::raw('DATE_SUB(NOW(), INTERVAL 1 DAY)'))
->groupBy('artwork_id'),
'fav1',
'fav1.artwork_id', '=', 'a.id'
)
// Tag count
->leftJoinSub(
DB::table('artwork_tag')
->select('artwork_id', DB::raw('COUNT(*) as tag_count'))
->groupBy('artwork_id'),
'tc',
'tc.artwork_id', '=', 'a.id'
)
->where('a.is_public', 1)
->where('a.is_approved', 1)
->whereNull('a.deleted_at')
->whereNotNull('a.published_at');
}
}