Studio: make grid checkbox rectangular and commit table changes

This commit is contained in:
2026-03-01 08:43:48 +01:00
parent 211dc58884
commit e3ca845a6d
89 changed files with 7323 additions and 475 deletions

View File

@@ -0,0 +1,148 @@
<?php
declare(strict_types=1);
namespace App\Jobs;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\DB;
/**
* Build item-item co-occurrence pairs from user favourites.
*
* Spec §7.1 runs hourly or every few hours.
* For each user: take last N favourites, create pairs, increment weights.
*
* Safety: limits per-user pairs to avoid O() explosion.
*/
final class RecBuildItemPairsFromFavouritesJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public int $tries = 2;
public int $timeout = 600;
public function __construct(
private readonly int $userBatchSize = 500,
) {
$queue = (string) config('recommendations.queue', 'default');
if ($queue !== '') {
$this->onQueue($queue);
}
}
public function handle(): void
{
$favCap = (int) config('recommendations.similarity.user_favourites_cap', 50);
// ── Pre-compute per-artwork total favourite counts for cosine normalization ──
$this->artworkLikeCounts = DB::table('artwork_favourites')
->select('artwork_id', DB::raw('COUNT(*) as cnt'))
->groupBy('artwork_id')
->pluck('cnt', 'artwork_id')
->all();
// ── Accumulate co-occurrence counts across all users ──
$coOccurrenceCounts = [];
DB::table('artwork_favourites')
->select('user_id')
->groupBy('user_id')
->orderBy('user_id')
->chunk($this->userBatchSize, function ($userRows) use ($favCap, &$coOccurrenceCounts) {
foreach ($userRows as $row) {
$pairs = $this->pairsForUser((int) $row->user_id, $favCap);
foreach ($pairs as $pair) {
$key = $pair[0] . ':' . $pair[1];
$coOccurrenceCounts[$key] = ($coOccurrenceCounts[$key] ?? 0) + 1;
}
}
});
// ── Normalize to cosine-like scores and flush ──
$normalized = [];
foreach ($coOccurrenceCounts as $key => $count) {
[$a, $b] = explode(':', $key);
$likesA = $this->artworkLikeCounts[(int) $a] ?? 1;
$likesB = $this->artworkLikeCounts[(int) $b] ?? 1;
$normalized[$key] = $count / sqrt($likesA * $likesB);
}
$this->flushPairs($normalized);
}
/** @var array<int, int> artwork_id => total favourite count */
private array $artworkLikeCounts = [];
/**
* Collect pairs from a single user's last N favourites.
*
* @return list<array{0: int, 1: int}>
*/
public function pairsForUser(int $userId, int $cap): array
{
$artworkIds = DB::table('artwork_favourites')
->where('user_id', $userId)
->orderByDesc('created_at')
->limit($cap)
->pluck('artwork_id')
->map(fn ($id) => (int) $id)
->all();
$count = count($artworkIds);
if ($count < 2) {
return [];
}
$pairs = [];
// Cap max pairs per user to avoid explosion: C(50,2) = 1225 worst case = acceptable
for ($i = 0; $i < $count - 1; $i++) {
for ($j = $i + 1; $j < $count; $j++) {
$a = min($artworkIds[$i], $artworkIds[$j]);
$b = max($artworkIds[$i], $artworkIds[$j]);
$pairs[] = [$a, $b];
}
}
return $pairs;
}
/**
* Upsert normalized pair weights into rec_item_pairs.
*
* Uses Laravel's DB-agnostic upsert (works on MySQL, Postgres, SQLite).
*
* @param array<string, float> $upserts key = "a:b", value = cosine-normalized weight
*/
private function flushPairs(array $upserts): void
{
if ($upserts === []) {
return;
}
$now = now();
foreach (array_chunk($upserts, 500, preserve_keys: true) as $chunk) {
$rows = [];
foreach ($chunk as $key => $weight) {
[$a, $b] = explode(':', $key);
$rows[] = [
'a_artwork_id' => (int) $a,
'b_artwork_id' => (int) $b,
'weight' => $weight,
'updated_at' => $now,
];
}
DB::table('rec_item_pairs')->upsert(
$rows,
['a_artwork_id', 'b_artwork_id'],
['weight', 'updated_at'],
);
}
}
}

View File

@@ -0,0 +1,129 @@
<?php
declare(strict_types=1);
namespace App\Jobs;
use App\Models\Artwork;
use App\Models\RecArtworkRec;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\DB;
/**
* Compute behavior-based (co-like) similarity from precomputed item pairs.
*
* Spec §7.3 runs nightly.
* For each artwork: read top pairs from rec_item_pairs, store top N.
*/
final class RecComputeSimilarByBehaviorJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public int $tries = 2;
public int $timeout = 600;
public function __construct(
private readonly ?int $artworkId = null,
private readonly int $batchSize = 200,
) {
$queue = (string) config('recommendations.queue', 'default');
if ($queue !== '') {
$this->onQueue($queue);
}
}
public function handle(): void
{
$modelVersion = (string) config('recommendations.similarity.model_version', 'sim_v1');
$resultLimit = (int) config('recommendations.similarity.result_limit', 30);
$maxPerAuthor = (int) config('recommendations.similarity.max_per_author', 2);
$query = Artwork::query()->public()->published()->select('id', 'user_id');
if ($this->artworkId !== null) {
$query->where('id', $this->artworkId);
}
$query->chunkById($this->batchSize, function ($artworks) use ($modelVersion, $resultLimit, $maxPerAuthor) {
foreach ($artworks as $artwork) {
$this->processArtwork($artwork, $modelVersion, $resultLimit, $maxPerAuthor);
}
});
}
private function processArtwork(
Artwork $artwork,
string $modelVersion,
int $resultLimit,
int $maxPerAuthor,
): void {
// Fetch top co-occurring artworks (bi-directional)
$candidates = DB::table('rec_item_pairs')
->where('a_artwork_id', $artwork->id)
->select(DB::raw('b_artwork_id AS related_id'), 'weight')
->union(
DB::table('rec_item_pairs')
->where('b_artwork_id', $artwork->id)
->select(DB::raw('a_artwork_id AS related_id'), 'weight')
)
->orderByDesc('weight')
->limit($resultLimit * 3)
->get();
if ($candidates->isEmpty()) {
return;
}
$relatedIds = $candidates->pluck('related_id')->map(fn ($id) => (int) $id)->all();
// Fetch author info for diversity filtering
$authorMap = DB::table('artworks')
->whereIn('id', $relatedIds)
->where('is_public', true)
->where('is_approved', true)
->whereNotNull('published_at')
->where('published_at', '<=', now())
->whereNull('deleted_at')
->pluck('user_id', 'id')
->all();
// Apply diversity cap
$authorCounts = [];
$final = [];
foreach ($candidates as $cand) {
$relatedId = (int) $cand->related_id;
if (! isset($authorMap[$relatedId])) {
continue; // not public/published
}
$authorId = (int) $authorMap[$relatedId];
$authorCounts[$authorId] = ($authorCounts[$authorId] ?? 0) + 1;
if ($authorCounts[$authorId] > $maxPerAuthor) {
continue;
}
$final[] = $relatedId;
if (count($final) >= $resultLimit) {
break;
}
}
if ($final === []) {
return;
}
RecArtworkRec::query()->updateOrCreate(
[
'artwork_id' => $artwork->id,
'rec_type' => 'similar_behavior',
'model_version' => $modelVersion,
],
[
'recs' => $final,
'computed_at' => now(),
],
);
}
}

View File

@@ -0,0 +1,225 @@
<?php
declare(strict_types=1);
namespace App\Jobs;
use App\Models\Artwork;
use App\Models\RecArtworkRec;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\DB;
/**
* Compute tag-based (+ category boost) similarity for artworks.
*
* Spec §7.2 runs nightly + on-demand.
* For each artwork: find candidates by shared tags/category, score with IDF-weighted
* tag overlap, apply diversity, store top N.
*/
final class RecComputeSimilarByTagsJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public int $tries = 2;
public int $timeout = 600;
public function __construct(
private readonly ?int $artworkId = null,
private readonly int $batchSize = 200,
) {
$queue = (string) config('recommendations.queue', 'default');
if ($queue !== '') {
$this->onQueue($queue);
}
}
public function handle(): void
{
$modelVersion = (string) config('recommendations.similarity.model_version', 'sim_v1');
$candidatePool = (int) config('recommendations.similarity.candidate_pool', 100);
$maxPerAuthor = (int) config('recommendations.similarity.max_per_author', 2);
$resultLimit = (int) config('recommendations.similarity.result_limit', 30);
// ── Tag IDF weights (global) ───────────────────────────────────────────
$tagFreqs = DB::table('artwork_tag')
->select('tag_id', DB::raw('COUNT(*) as cnt'))
->groupBy('tag_id')
->pluck('cnt', 'tag_id')
->all();
$query = Artwork::query()->public()->published()->select('id', 'user_id');
if ($this->artworkId !== null) {
$query->where('id', $this->artworkId);
}
$query->chunkById($this->batchSize, function ($artworks) use (
$tagFreqs, $modelVersion, $candidatePool, $maxPerAuthor, $resultLimit
) {
foreach ($artworks as $artwork) {
$this->processArtwork($artwork, $tagFreqs, $modelVersion, $candidatePool, $maxPerAuthor, $resultLimit);
}
});
}
private function processArtwork(
Artwork $artwork,
array $tagFreqs,
string $modelVersion,
int $candidatePool,
int $maxPerAuthor,
int $resultLimit,
): void {
// Get source artwork's tags and categories
$srcTagIds = DB::table('artwork_tag')
->where('artwork_id', $artwork->id)
->pluck('tag_id')
->all();
$srcCatIds = DB::table('artwork_category')
->where('artwork_id', $artwork->id)
->pluck('category_id')
->all();
// Source content_type_ids (via categories)
$srcContentTypeIds = $srcCatIds !== []
? DB::table('categories')
->whereIn('id', $srcCatIds)
->whereNotNull('content_type_id')
->pluck('content_type_id')
->unique()
->all()
: [];
if ($srcTagIds === [] && $srcCatIds === []) {
return;
}
// ── Find candidates that share at least one tag ────────────────────────
$candidateQuery = DB::table('artwork_tag')
->join('artworks', 'artworks.id', '=', 'artwork_tag.artwork_id')
->whereIn('artwork_tag.tag_id', $srcTagIds)
->where('artwork_tag.artwork_id', '!=', $artwork->id)
->where('artworks.is_public', true)
->where('artworks.is_approved', true)
->whereNotNull('artworks.published_at')
->where('artworks.published_at', '<=', now())
->whereNull('artworks.deleted_at')
->select('artwork_tag.artwork_id', 'artworks.user_id')
->groupBy('artwork_tag.artwork_id', 'artworks.user_id')
->orderByRaw('COUNT(*) DESC')
->limit($candidatePool * 3); // over-fetch before scoring
$candidates = $candidateQuery->get();
if ($candidates->isEmpty()) {
return;
}
// Gather tags for all candidates in one query
$candidateIds = $candidates->pluck('artwork_id')->all();
$candidateTagMap = DB::table('artwork_tag')
->whereIn('artwork_id', $candidateIds)
->select('artwork_id', 'tag_id')
->get()
->groupBy('artwork_id');
$candidateCatMap = DB::table('artwork_category')
->whereIn('artwork_id', $candidateIds)
->select('artwork_id', 'category_id')
->get()
->groupBy('artwork_id');
// Build content_type_id lookup for candidates (via categories table)
$allCandidateCatIds = $candidateCatMap->flatten(1)->pluck('category_id')->unique()->all();
$catContentTypeMap = $allCandidateCatIds !== []
? DB::table('categories')
->whereIn('id', $allCandidateCatIds)
->whereNotNull('content_type_id')
->pluck('content_type_id', 'id')
->all()
: [];
$srcContentTypeSet = array_flip($srcContentTypeIds);
$srcTagSet = array_flip($srcTagIds);
$srcCatSet = array_flip($srcCatIds);
// ── Score each candidate ───────────────────────────────────────────────
$scored = [];
foreach ($candidates as $cand) {
$cTagIds = $candidateTagMap->get($cand->artwork_id, collect())->pluck('tag_id')->all();
$cCatIds = $candidateCatMap->get($cand->artwork_id, collect())->pluck('category_id')->all();
// IDF-weighted tag overlap (spec §5.1)
$tagScore = 0.0;
foreach ($cTagIds as $tagId) {
if (isset($srcTagSet[$tagId])) {
$freq = $tagFreqs[$tagId] ?? 1;
$tagScore += 1.0 / log(2 + $freq);
}
}
// Category match bonus
$catScore = 0.0;
foreach ($cCatIds as $catId) {
if (isset($srcCatSet[$catId])) {
$catScore = 1.0;
break;
}
}
// Content type match bonus (spec §5.1)
$ctScore = 0.0;
foreach ($cCatIds as $catId) {
$ctId = $catContentTypeMap[$catId] ?? null;
if ($ctId !== null && isset($srcContentTypeSet[$ctId])) {
$ctScore = 1.0;
break;
}
}
$scored[] = [
'artwork_id' => (int) $cand->artwork_id,
'user_id' => (int) $cand->user_id,
'tag_score' => $tagScore,
'cat_score' => $catScore,
'score' => $tagScore + $catScore * 0.1 + $ctScore * 0.05,
];
}
// Sort by score descending
usort($scored, fn (array $a, array $b) => $b['score'] <=> $a['score']);
// ── Apply diversity (max per author) ───────────────────────────────────
$authorCounts = [];
$final = [];
foreach ($scored as $item) {
$authorId = $item['user_id'];
$authorCounts[$authorId] = ($authorCounts[$authorId] ?? 0) + 1;
if ($authorCounts[$authorId] > $maxPerAuthor) {
continue;
}
$final[] = $item['artwork_id'];
if (count($final) >= $resultLimit) {
break;
}
}
// ── Persist ────────────────────────────────────────────────────────────
RecArtworkRec::query()->updateOrCreate(
[
'artwork_id' => $artwork->id,
'rec_type' => 'similar_tags',
'model_version' => $modelVersion,
],
[
'recs' => $final,
'computed_at' => now(),
],
);
}
}

View File

@@ -0,0 +1,286 @@
<?php
declare(strict_types=1);
namespace App\Jobs;
use App\Models\Artwork;
use App\Models\RecArtworkRec;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Log;
/**
* Compute hybrid similarity by blending tag, behavior, and optionally visual scores.
*
* Spec §7.4 runs nightly.
* Merges candidates from tag + behavior + vector lists, applies hybrid blend weights,
* enforces diversity, stores top 30.
*/
final class RecComputeSimilarHybridJob implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
public int $tries = 2;
public int $timeout = 900;
public function __construct(
private readonly ?int $artworkId = null,
private readonly int $batchSize = 200,
) {
$queue = (string) config('recommendations.queue', 'default');
if ($queue !== '') {
$this->onQueue($queue);
}
}
public function handle(): void
{
$modelVersion = (string) config('recommendations.similarity.model_version', 'sim_v1');
$vectorEnabled = (bool) config('recommendations.similarity.vector_enabled', false);
$resultLimit = (int) config('recommendations.similarity.result_limit', 30);
$maxPerAuthor = (int) config('recommendations.similarity.max_per_author', 2);
$minCatsTop12 = (int) config('recommendations.similarity.min_categories_top12', 2);
$weights = $vectorEnabled
? (array) config('recommendations.similarity.weights_with_vector')
: (array) config('recommendations.similarity.weights_without_vector');
$query = Artwork::query()->public()->published()->select('id', 'user_id');
if ($this->artworkId !== null) {
$query->where('id', $this->artworkId);
}
$query->chunkById($this->batchSize, function ($artworks) use (
$modelVersion, $vectorEnabled, $resultLimit, $maxPerAuthor, $minCatsTop12, $weights
) {
foreach ($artworks as $artwork) {
try {
$this->processArtwork(
$artwork, $modelVersion, $vectorEnabled, $resultLimit,
$maxPerAuthor, $minCatsTop12, $weights
);
} catch (\Throwable $e) {
Log::warning("[RecComputeSimilarHybrid] Failed for artwork {$artwork->id}: {$e->getMessage()}");
}
}
});
}
private function processArtwork(
Artwork $artwork,
string $modelVersion,
bool $vectorEnabled,
int $resultLimit,
int $maxPerAuthor,
int $minCatsTop12,
array $weights,
): void {
// ── Collect sub-lists ──────────────────────────────────────────────────
$tagRec = RecArtworkRec::query()
->where('artwork_id', $artwork->id)
->where('rec_type', 'similar_tags')
->where('model_version', $modelVersion)
->first();
$behRec = RecArtworkRec::query()
->where('artwork_id', $artwork->id)
->where('rec_type', 'similar_behavior')
->where('model_version', $modelVersion)
->first();
$tagIds = $tagRec ? ($tagRec->recs ?? []) : [];
$behIds = $behRec ? ($behRec->recs ?? []) : [];
$vecIds = [];
$vecScores = [];
if ($vectorEnabled) {
$vecRec = RecArtworkRec::query()
->where('artwork_id', $artwork->id)
->where('rec_type', 'similar_visual')
->where('model_version', $modelVersion)
->first();
if ($vecRec) {
$vecIds = $vecRec->recs ?? [];
}
}
// Merge all candidate IDs
$allIds = array_values(array_unique(array_merge($tagIds, $behIds, $vecIds)));
if ($allIds === []) {
return;
}
// ── Build normalized score maps ────────────────────────────────────────
$tagScoreMap = $this->rankToScore($tagIds);
$behScoreMap = $this->rankToScore($behIds);
$vecScoreMap = $this->rankToScore($vecIds);
// Fetch artwork metadata for category + author diversity
$metaRows = DB::table('artworks')
->whereIn('id', $allIds)
->where('is_public', true)
->where('is_approved', true)
->whereNotNull('published_at')
->where('published_at', '<=', now())
->whereNull('deleted_at')
->select('id', 'user_id')
->get()
->keyBy('id');
$catMap = DB::table('artwork_category')
->whereIn('artwork_id', $allIds)
->select('artwork_id', 'category_id')
->get()
->groupBy('artwork_id');
// Source artwork categories
$srcCatIds = DB::table('artwork_category')
->where('artwork_id', $artwork->id)
->pluck('category_id')
->all();
$srcCatSet = array_flip($srcCatIds);
// ── Compute hybrid score ───────────────────────────────────────────────
$scored = [];
foreach ($allIds as $candidateId) {
if (! $metaRows->has($candidateId)) {
continue;
}
$meta = $metaRows->get($candidateId);
$candidateCats = $catMap->get($candidateId, collect())->pluck('category_id')->all();
// Category overlap
$catScore = 0.0;
foreach ($candidateCats as $catId) {
if (isset($srcCatSet[$catId])) {
$catScore = 1.0;
break;
}
}
$tagS = $tagScoreMap[$candidateId] ?? 0.0;
$behS = $behScoreMap[$candidateId] ?? 0.0;
$vecS = $vecScoreMap[$candidateId] ?? 0.0;
if ($vectorEnabled) {
$score = ($weights['visual'] ?? 0.45) * $vecS
+ ($weights['tag'] ?? 0.25) * $tagS
+ ($weights['behavior'] ?? 0.20) * $behS
+ ($weights['category'] ?? 0.10) * $catScore;
} else {
$score = ($weights['tag'] ?? 0.55) * $tagS
+ ($weights['behavior'] ?? 0.35) * $behS
+ ($weights['category'] ?? 0.10) * $catScore;
}
$scored[] = [
'artwork_id' => $candidateId,
'user_id' => (int) $meta->user_id,
'cat_ids' => $candidateCats,
'score' => $score,
];
}
usort($scored, fn (array $a, array $b) => $b['score'] <=> $a['score']);
// ── Diversity enforcement ──────────────────────────────────────────────
$authorCounts = [];
$final = [];
$catsInTop12 = [];
foreach ($scored as $item) {
$authorId = $item['user_id'];
$authorCounts[$authorId] = ($authorCounts[$authorId] ?? 0) + 1;
if ($authorCounts[$authorId] > $maxPerAuthor) {
continue;
}
$final[] = $item;
if (count($final) <= 12) {
foreach ($item['cat_ids'] as $cId) {
$catsInTop12[$cId] = true;
}
}
if (count($final) >= $resultLimit) {
break;
}
}
// ── Min-categories enforcement in top 12 (spec §6) ────────────────────
if (count($catsInTop12) < $minCatsTop12 && count($final) >= 12) {
// Find items beyond the initial selection that introduce a new category
$usedIds = array_flip(array_column($final, 'artwork_id'));
$promotable = [];
foreach ($scored as $item) {
if (isset($usedIds[$item['artwork_id']])) {
continue;
}
$newCats = array_diff($item['cat_ids'], array_keys($catsInTop12));
if ($newCats !== []) {
$promotable[] = $item;
if (count($promotable) >= ($minCatsTop12 - count($catsInTop12))) {
break;
}
}
}
// Inject promoted items at position 12 (end of visible top block)
if ($promotable !== []) {
$top = array_slice($final, 0, 11);
$rest = array_slice($final, 11);
$final = array_merge($top, $promotable, $rest);
$final = array_slice($final, 0, $resultLimit);
}
}
$finalIds = array_column($final, 'artwork_id');
if ($finalIds === []) {
return;
}
RecArtworkRec::query()->updateOrCreate(
[
'artwork_id' => $artwork->id,
'rec_type' => 'similar_hybrid',
'model_version' => $modelVersion,
],
[
'recs' => $finalIds,
'computed_at' => now(),
],
);
}
/**
* Convert a ranked list of IDs into a score map (1.0 at rank 0, decaying).
*
* @param list<int> $ids
* @return array<int, float>
*/
private function rankToScore(array $ids): array
{
$map = [];
$total = count($ids);
if ($total === 0) {
return $map;
}
foreach ($ids as $rank => $id) {
// Linear decay from 1.0 → ~0.0
$map[(int) $id] = 1.0 - ($rank / max(1, $total));
}
return $map;
}
}