Files
SkinbaseNova/.deploy/artwork-evolution-release/app/Http/Controllers/Api/LinkPreviewController.php
2026-04-18 17:02:56 +02:00

224 lines
7.1 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
namespace App\Http\Controllers\Api;
use App\Http\Controllers\Controller;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\TransferException;
use Illuminate\Http\JsonResponse;
use Illuminate\Http\Request;
class LinkPreviewController extends Controller
{
private const TIMEOUT = 8; // seconds
private const MAX_BYTES = 524_288; // 512 KB enough to get the <head>
private const USER_AGENT = 'Skinbase-LinkPreview/1.0 (+https://skinbase.org)';
/** Blocked IP ranges (SSRF protection). */
private const BLOCKED_CIDRS = [
'0.0.0.0/8',
'10.0.0.0/8',
'100.64.0.0/10',
'127.0.0.0/8',
'169.254.0.0/16',
'172.16.0.0/12',
'192.0.0.0/24',
'192.168.0.0/16',
'198.18.0.0/15',
'198.51.100.0/24',
'203.0.113.0/24',
'240.0.0.0/4',
'::1/128',
'fc00::/7',
'fe80::/10',
];
public function __invoke(Request $request): JsonResponse
{
$request->validate([
'url' => ['required', 'string', 'max:2048'],
]);
$rawUrl = trim((string) $request->input('url'));
// Must be http(s)
if (! preg_match('#^https?://#i', $rawUrl)) {
return response()->json(['error' => 'Invalid URL scheme.'], 422);
}
$parsed = parse_url($rawUrl);
$host = $parsed['host'] ?? '';
if (empty($host)) {
return response()->json(['error' => 'Invalid URL.'], 422);
}
// Resolve hostname and block private/loopback IPs (SSRF protection)
$resolved = gethostbyname($host);
if ($this->isBlockedIp($resolved)) {
return response()->json(['error' => 'URL not allowed.'], 422);
}
try {
$client = new Client([
'timeout' => self::TIMEOUT,
'connect_timeout' => 4,
'allow_redirects' => ['max' => 5, 'strict' => false],
'headers' => [
'User-Agent' => self::USER_AGENT,
'Accept' => 'text/html,application/xhtml+xml',
],
'verify' => true,
]);
$response = $client->get($rawUrl);
$status = $response->getStatusCode();
if ($status < 200 || $status >= 400) {
return response()->json(['error' => 'Could not fetch URL.'], 422);
}
// Read up to MAX_BYTES we only need the HTML <head>
$body = '';
$stream = $response->getBody();
while (! $stream->eof() && strlen($body) < self::MAX_BYTES) {
$body .= $stream->read(4096);
}
$stream->close();
} catch (TransferException $e) {
return response()->json(['error' => 'Could not reach URL.'], 422);
}
$preview = $this->extractMeta($body, $rawUrl);
return response()->json($preview);
}
/** Extract OG / Twitter / fallback meta tags. */
private function extractMeta(string $html, string $originalUrl): array
{
// Limit to roughly the <head> block for speed
$head = substr($html, 0, 50_000);
$og = [];
// OG / Twitter meta tags
preg_match_all(
'/<meta\s[^>]*(?:property|name)\s*=\s*["\']([^"\']+)["\'][^>]*content\s*=\s*["\']([^"\']*)["\'][^>]*>/i',
$head,
$m1,
PREG_SET_ORDER,
);
preg_match_all(
'/<meta\s[^>]*content\s*=\s*["\']([^"\']*)["\'][^>]*(?:property|name)\s*=\s*["\']([^"\']+)["\'][^>]*>/i',
$head,
$m2,
PREG_SET_ORDER,
);
$allMeta = array_merge(
array_map(fn ($r) => ['key' => strtolower($r[1]), 'value' => $r[2]], $m1),
array_map(fn ($r) => ['key' => strtolower($r[2]), 'value' => $r[1]], $m2),
);
$map = [];
foreach ($allMeta as $entry) {
$map[$entry['key']] ??= $entry['value'];
}
// Canonical URL
$canonical = $originalUrl;
if (preg_match('/<link[^>]+rel\s*=\s*["\']canonical["\'][^>]+href\s*=\s*["\']([^"\']+)["\'][^>]*>/i', $head, $mc)) {
$canonical = $mc[1];
} elseif (preg_match('/<link[^>]+href\s*=\s*["\']([^"\']+)["\'][^>]+rel\s*=\s*["\']canonical["\'][^>]*>/i', $head, $mc)) {
$canonical = $mc[1];
}
// Title
$title = $map['og:title']
?? $map['twitter:title']
?? null;
if (! $title && preg_match('/<title[^>]*>([^<]+)<\/title>/i', $head, $mt)) {
$title = trim(html_entity_decode($mt[1]));
}
// Description
$description = $map['og:description']
?? $map['twitter:description']
?? $map['description']
?? null;
// Image
$image = $map['og:image']
?? $map['twitter:image']
?? $map['twitter:image:src']
?? null;
// Resolve relative image URL
if ($image && ! preg_match('#^https?://#i', $image)) {
$parsed = parse_url($originalUrl);
$base = ($parsed['scheme'] ?? 'https') . '://' . ($parsed['host'] ?? '');
$image = $base . '/' . ltrim($image, '/');
}
// Site name
$siteName = $map['og:site_name'] ?? parse_url($originalUrl, PHP_URL_HOST) ?? null;
return [
'url' => $canonical,
'title' => $title ? html_entity_decode($title) : null,
'description' => $description ? html_entity_decode($description) : null,
'image' => $image,
'site_name' => $siteName,
];
}
private function isBlockedIp(string $ip): bool
{
if (! filter_var($ip, FILTER_VALIDATE_IP)) {
return true; // could not resolve
}
foreach (self::BLOCKED_CIDRS as $cidr) {
if ($this->ipInCidr($ip, $cidr)) {
return true;
}
}
return false;
}
private function ipInCidr(string $ip, string $cidr): bool
{
[$subnet, $bits] = explode('/', $cidr) + [1 => 32];
// IPv6
if (str_contains($cidr, ':')) {
if (! filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) {
return false;
}
$ipBin = inet_pton($ip);
$subnetBin = inet_pton($subnet);
if ($ipBin === false || $subnetBin === false) {
return false;
}
$bits = (int) $bits;
$mask = str_repeat("\xff", (int) ($bits / 8));
$remain = $bits % 8;
if ($remain) {
$mask .= chr(0xff << (8 - $remain));
}
$mask = str_pad($mask, strlen($subnetBin), "\x00");
return ($ipBin & $mask) === ($subnetBin & $mask);
}
// IPv4
if (! filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
return false;
}
$ipLong = ip2long($ip);
$subnetLong = ip2long($subnet);
$maskLong = $bits == 32 ? -1 : ~((1 << (32 - (int) $bits)) - 1);
return ($ipLong & $maskLong) === ($subnetLong & $maskLong);
}
}