Files
UploadShied/upload-logger.php
Gregor Klevze 037b176892 fix(scanner): avoid SVG/XML false positives; add allowlist and .gitignore
Relax payload scanner for XML/SVG by passing content-type into checks
Skip JS-style eval() detection when content-type is XML/SVG
Pass request Content-Type through sniff_file_for_php_payload() and raw-body checks
Add common XML/SVG content-types to allowlist.json
Add repository .gitignore (ignore logs, quarantine/, state/, env, vendor, IDE files)
2026-02-07 15:11:15 +01:00

1001 lines
33 KiB
PHP

<?php
/**
* Global Upload Logger (Hardened v3)
* Project: TheWallpapers
*
* Purpose:
* - Log ALL normal uploads via $_FILES (single + multi)
* - Detect common evasion (double extensions, fake images, path tricks, PHP payload in non-php files)
* - Log suspicious "raw body" uploads (php://input / octet-stream) that bypass $_FILES
* - Optional blocking mode
*
* Install:
* - Use as PHP-FPM pool `auto_prepend_file=.../upload_logger.php`
*
* Notes:
* - This cannot *guarantee* interception of every file-write exploit (file_put_contents, ZipArchive extract, etc.)
* but it catches most real-world upload vectors and provides strong forensic logging.
*/
// Ignore CLI
if (PHP_SAPI === 'cli') {
return;
}
/* ================= CONFIG ================= */
// Log file path (prefer per-vhost path outside webroot if possible)
$logFile = __DIR__ . '/logs/uploads.log';
// Block suspicious uploads (true = block request, false = log only)
$BLOCK_SUSPICIOUS = false;
// Warn if file > 50MB
$MAX_SIZE = 50 * 1024 * 1024;
// Treat payload > 500KB with no $_FILES as suspicious "raw upload"
$RAW_BODY_MIN = 500 * 1024;
// Flood detection (per-IP uploads per window)
$FLOOD_WINDOW_SEC = 60;
$FLOOD_MAX_UPLOADS = 40;
// Content sniffing: scan first N bytes for PHP/shell patterns (keep small for performance)
$SNIFF_MAX_BYTES = 8192; // 8KB
$SNIFF_MAX_FILESIZE = 2 * 1024 * 1024; // only sniff files up to 2MB
// If true, also log request headers that are useful in forensics (careful with privacy)
$LOG_USER_AGENT = true;
// Whether the logger may peek into php://input for a small head scan.
// WARNING: reading php://input can consume the request body for the application.
// Keep this false unless you accept the risk or run behind a proxy that buffers request bodies.
$PEEK_RAW_INPUT = false;
// Trusted proxy IPs that may set a header to indicate the request body was buffered
$TRUSTED_PROXY_IPS = ['127.0.0.1', '::1'];
// Environment variable name or marker file to explicitly allow peeking
$ALLOW_PEEK_ENV = 'UPLOAD_LOGGER_ALLOW_PEEK';
$PEEK_ALLOW_FILE = __DIR__ . '/.upload_logger_allow_peek';
// Auto-enable peek only when explicitly allowed by environment/file or when a
// trusted frontend indicates the body was buffered via header `X-Upload-Logger-Peek: 1`.
// This avoids consuming request bodies unexpectedly.
try {
$envAllow = getenv($ALLOW_PEEK_ENV) === '1';
} catch (Throwable $e) {
$envAllow = false;
}
// Base64/JSON detection thresholds
$BASE64_MIN_CHARS = 200; // minimum base64 chars to consider a blob
$BASE64_DECODE_CHUNK = 1024; // how many base64 chars to decode for fingerprinting
$BASE64_FINGERPRINT_BYTES = 128; // bytes of decoded head to hash for fingerprint
// Allowlist for known benign base64 sources. Patterns can be simple substrings
// (checked with `strpos`) or PCRE regex when wrapped with '#', e.g. '#^/internal/webhook#'.
// Default in-code allowlist (used if no allowlist file is present)
$BASE64_ALLOWLIST_URI = [
'/api/uploads/avatars',
'/api/v1/avatars',
'/user/avatar',
'/media/upload',
'/api/media',
'/api/uploads',
'/api/v1/uploads',
'/attachments/upload',
'/upload',
'#^/internal/webhook#',
'#/hooks/(github|gitlab|stripe|slack)#',
'/services/avatars',
'/api/profile/photo'
];
// Optional allowlist of content-types (exact match, without params)
$BASE64_ALLOWLIST_CTYPE = [];
// Allowlist file location and environment override
$ALLOWLIST_FILE_DEFAULT = __DIR__ . '/allowlist.json';
$ALLOWLIST_FILE = getenv('UPLOAD_LOGGER_ALLOWLIST') ?: $ALLOWLIST_FILE_DEFAULT;
if (is_file($ALLOWLIST_FILE)) {
$raw = @file_get_contents($ALLOWLIST_FILE);
$json = @json_decode($raw, true);
if (is_array($json)) {
if (!empty($json['uris']) && is_array($json['uris'])) {
$BASE64_ALLOWLIST_URI = $json['uris'];
}
if (!empty($json['ctypes']) && is_array($json['ctypes'])) {
$BASE64_ALLOWLIST_CTYPE = $json['ctypes'];
}
}
}
function base64_is_allowlisted(string $uri, string $ctype): bool
{
global $BASE64_ALLOWLIST_URI, $BASE64_ALLOWLIST_CTYPE;
foreach ($BASE64_ALLOWLIST_URI as $p) {
if (strlen($p) > 1 && $p[0] === '#' && substr($p, -1) === '#') {
// regex
if (@preg_match($p, $uri)) return true;
} else {
if (strpos($uri, $p) !== false) return true;
}
}
if (!empty($BASE64_ALLOWLIST_CTYPE) && $ctype !== '') {
$base = explode(';', $ctype, 2)[0];
foreach ($BASE64_ALLOWLIST_CTYPE as $ct) {
if (strtolower(trim($ct)) === strtolower(trim($base))) return true;
}
}
return false;
}
$fileAllow = is_file($PEEK_ALLOW_FILE);
$headerAllow = false;
if (isset($_SERVER['HTTP_X_UPLOAD_LOGGER_PEEK']) && $_SERVER['HTTP_X_UPLOAD_LOGGER_PEEK'] === '1') {
$clientIp = get_client_ip();
if (in_array($clientIp, $TRUSTED_PROXY_IPS, true)) {
$headerAllow = true;
}
}
if ($envAllow || $fileAllow || $headerAllow) {
$PEEK_RAW_INPUT = true;
if (function_exists('log_event')) {
log_event('config_info', ['msg' => 'peek_enabled', 'env' => $envAllow, 'file' => $fileAllow, 'header' => $headerAllow]);
}
}
// Store flood counters in a protected directory (avoid /tmp tampering)
$STATE_DIR = __DIR__ . '/state';
// Hash files up to this size for forensics
$HASH_MAX_FILESIZE = 10 * 1024 * 1024; // 10MB
// Quarantine suspicious uploads (move outside webroot, restrictive perms)
$QUARANTINE_ENABLED = true; // enabled by default for hardened deployments
$QUARANTINE_DIR = __DIR__ . '/quarantine';
// Archive inspection
$ARCHIVE_INSPECT = true; // inspect archives moved to quarantine
$ARCHIVE_BLOCK_ON_SUSPICIOUS = false; // optionally block request when archive contains suspicious entries
$ARCHIVE_MAX_ENTRIES = 200; // max entries to inspect in an archive
// Max archive file size to inspect (bytes). Larger archives will be skipped to avoid CPU/IO costs.
$ARCHIVE_MAX_INSPECT_SIZE = 50 * 1024 * 1024; // 50 MB
/* ========================================== */
// Ensure log dir
$logDir = dirname($logFile);
if (!is_dir($logDir)) {
@mkdir($logDir, 0750, true);
}
// Ensure state dir
if (!is_dir($STATE_DIR)) {
@mkdir($STATE_DIR, 0750, true);
}
// Ensure quarantine dir if enabled and enforce strict permissions
if ($QUARANTINE_ENABLED) {
if (!is_dir($QUARANTINE_DIR)) {
@mkdir($QUARANTINE_DIR, 0700, true);
}
if (is_dir($QUARANTINE_DIR)) {
// attempt to enforce strict permissions (owner only)
@chmod($QUARANTINE_DIR, 0700);
// verify perms: group/other bits must be zero
$perms = @fileperms($QUARANTINE_DIR);
if ($perms !== false) {
// mask to rwxrwxrwx (lower 9 bits)
$mask = $perms & 0x1FF;
// if any group/other bits set, warn
if (($mask & 0o077) !== 0) {
if (function_exists('log_event')) {
log_event('config_warning', [
'msg' => 'quarantine_dir_perms_not_strict',
'path' => $QUARANTINE_DIR,
'perms_octal' => sprintf('%o', $mask),
]);
}
}
}
} else {
if (function_exists('log_event')) {
log_event('config_error', ['msg' => 'quarantine_dir_missing', 'path' => $QUARANTINE_DIR]);
}
}
}
// Attempt to enforce owner:group for quarantine directory when possible
$DESIRED_QUARANTINE_OWNER = 'root';
$DESIRED_QUARANTINE_GROUP = 'www-data';
if ($QUARANTINE_ENABLED && is_dir($QUARANTINE_DIR)) {
// If running as root, attempt to chown/chgrp to desired values
if (function_exists('posix_geteuid') && posix_geteuid() === 0) {
@chown($QUARANTINE_DIR, $DESIRED_QUARANTINE_OWNER);
@chgrp($QUARANTINE_DIR, $DESIRED_QUARANTINE_GROUP);
} elseif (function_exists('posix_getegid') && function_exists('posix_getgrgid')) {
// Not root: try at least to set group to the process group
$egid = posix_getegid();
$gr = posix_getgrgid($egid);
if ($gr && isset($gr['name'])) {
@chgrp($QUARANTINE_DIR, $gr['name']);
}
}
// Verify owner/group and log if not matching desired values
$ownerOk = false;
$groupOk = false;
$statUid = @fileowner($QUARANTINE_DIR);
$statGid = @filegroup($QUARANTINE_DIR);
if ($statUid !== false && function_exists('posix_getpwuid')) {
$pw = posix_getpwuid($statUid);
if ($pw && isset($pw['name']) && $pw['name'] === $DESIRED_QUARANTINE_OWNER) {
$ownerOk = true;
}
}
if ($statGid !== false && function_exists('posix_getgrgid')) {
$gg = posix_getgrgid($statGid);
if ($gg && isset($gg['name']) && $gg['name'] === $DESIRED_QUARANTINE_GROUP) {
$groupOk = true;
}
}
if (!($ownerOk && $groupOk)) {
log_event('config_warning', [
'msg' => 'quarantine_owner_group_mismatch',
'path' => $QUARANTINE_DIR,
'desired_owner' => $DESIRED_QUARANTINE_OWNER,
'desired_group' => $DESIRED_QUARANTINE_GROUP,
'current_uid' => $statUid,
'current_gid' => $statGid,
]);
}
}
/* ---------- Utils ---------- */
function upload_clean($str): string
{
return str_replace(["\n", "\r", "\t"], '_', (string)$str);
}
function log_normalize_value($value)
{
if (is_array($value)) {
$out = [];
foreach ($value as $k => $v) {
$out[$k] = log_normalize_value($v);
}
return $out;
}
if (is_bool($value) || is_int($value) || is_float($value) || $value === null) {
return $value;
}
$str = (string)$value;
return preg_replace('/[\x00-\x1F\x7F]/', '_', $str);
}
function generate_request_id(): string
{
try {
return bin2hex(random_bytes(8));
} catch (Throwable $e) {
return uniqid('req', true);
}
}
function log_event(string $event, array $data = []): void
{
global $logFile, $REQUEST_CTX;
$payload = array_merge(
['ts' => gmdate('c'), 'event' => $event],
is_array($REQUEST_CTX) ? $REQUEST_CTX : [],
$data
);
$payload = log_normalize_value($payload);
$json = json_encode($payload, JSON_UNESCAPED_SLASHES);
if ($json === false) {
$json = json_encode([
'ts' => gmdate('c'),
'event' => 'log_error',
'error' => json_last_error_msg(),
], JSON_UNESCAPED_SLASHES);
}
@file_put_contents($logFile, $json . "\n", FILE_APPEND | LOCK_EX);
}
function get_client_ip(): string
{
// Prefer REMOTE_ADDR (trusted), but log proxy headers separately if needed.
return $_SERVER['REMOTE_ADDR'] ?? 'unknown';
}
function get_user_id(): string
{
// Avoid assuming session is started.
// If you have app-specific auth headers, extend here.
if (isset($_SESSION) && is_array($_SESSION) && isset($_SESSION['user_id'])) {
return (string)$_SESSION['user_id'];
}
if (!empty($_SERVER['PHP_AUTH_USER'])) {
return (string)$_SERVER['PHP_AUTH_USER'];
}
return 'guest';
}
function get_request_summary(): array
{
global $LOG_USER_AGENT;
$ip = get_client_ip();
$uri = $_SERVER['REQUEST_URI'] ?? 'unknown';
$method = $_SERVER['REQUEST_METHOD'] ?? 'unknown';
$ctype = $_SERVER['CONTENT_TYPE'] ?? '';
$clen = (int)($_SERVER['CONTENT_LENGTH'] ?? 0);
$ua = $LOG_USER_AGENT ? ($_SERVER['HTTP_USER_AGENT'] ?? '') : '';
$te = $_SERVER['HTTP_TRANSFER_ENCODING'] ?? '';
return [$ip, $uri, $method, $ctype, $clen, $ua, $te];
}
/**
* Simple per-IP flood counter in /tmp with TTL window.
* This is lightweight and avoids dependencies.
*/
function flood_check(string $ip): int
{
global $FLOOD_WINDOW_SEC, $STATE_DIR;
$key = $STATE_DIR . '/upl_' . md5('v3|' . $ip);
$now = time();
$count = 0;
$start = $now;
$fh = @fopen($key, 'c+');
if ($fh === false) {
return 1;
}
if (flock($fh, LOCK_EX)) {
$raw = stream_get_contents($fh);
if ($raw !== false) {
if (preg_match('/^(\d+):(\d+)$/', trim($raw), $m)) {
$start = (int)$m[1];
$count = (int)$m[2];
}
}
if (($now - $start) > $FLOOD_WINDOW_SEC) {
$start = $now;
$count = 0;
}
$count++;
rewind($fh);
ftruncate($fh, 0);
fwrite($fh, $start . ':' . $count);
fflush($fh);
flock($fh, LOCK_UN);
}
fclose($fh);
return $count;
}
function is_suspicious_filename(string $name): bool
{
$n = strtolower($name);
// Path traversal / weird separators in filename
if (strpos($n, '../') !== false || strpos($n, '..\\') !== false || strpos($n, "\0") !== false) {
return true;
}
// Dangerous extensions (final)
if (preg_match('/\.(php|phtml|phar|php\d|pl|cgi|sh|asp|aspx|jsp)$/i', $n)) {
return true;
}
// Double-extension tricks anywhere (e.g., image.php.jpg or image.jpg.php)
if (preg_match('/\.(php|phtml|phar|php\d|pl|cgi|sh|asp|aspx|jsp)\./i', $n)) {
return true;
}
// Hidden dotfile php-like names
if (preg_match('/^\.(php|phtml|phar|php\d)/i', $n)) {
return true;
}
return false;
}
function sniff_file_for_php_payload(string $tmpPath): bool
{
global $SNIFF_MAX_BYTES, $SNIFF_MAX_FILESIZE;
if (!is_uploaded_file($tmpPath)) return false;
$sz = @filesize($tmpPath);
if ($sz === false) return false;
if ($sz <= 0) return false;
if ($sz > $SNIFF_MAX_FILESIZE) return false;
$bytes = min($SNIFF_MAX_BYTES, $sz);
$head = @file_get_contents($tmpPath, false, null, 0, $bytes);
if ($head === false) return false;
// Pass the detected real mime to the scanner so it can relax JS-specific
// rules for SVG/XML payloads (avoids false positives on benign SVGs).
$realMime = detect_real_mime($tmpPath);
if (payload_contains_php_markers($head, $realMime)) {
return true;
}
return false;
}
function payload_contains_php_markers(string $text, string $contentType = ''): bool
{
// Determine if content-type suggests XML/SVG so we can be permissive
$isXmlLike = false;
if ($contentType !== '') {
$isXmlLike = (bool)preg_match('/xml|svg/i', $contentType);
}
// Always detect explicit PHP tags or short-open tags (but avoid '<?xml')
if (preg_match('/<\?php|<\?=|<\?(?!xml)/i', $text)) {
return true;
}
// Server-side PHP function patterns are strong indicators (always check)
if (preg_match('/base64_decode\s*\(|gzinflate\s*\(|shell_exec\s*\(|passthru\s*\(|system\s*\(|proc_open\s*\(|popen\s*\(|exec\s*\(/i', $text)) {
return true;
}
// 'eval(' is ambiguous: it commonly appears in JavaScript within SVGs.
// Only treat 'eval(' as suspicious when content is not XML/SVG.
if (!$isXmlLike && preg_match('/\beval\s*\(/i', $text)) {
return true;
}
return false;
}
/**
* Detect base64 blobs inside a JSON-like head and inspect decoded head bytes safely.
* Returns an array with keys: found(bool), decoded_head(string|null), reason(string|null)
*/
function detect_json_base64_head(string $head, int $maxDecoded = 1024): array
{
// Look for common JSON attributes that hold base64 content: file, data, payload, content
// This regex finds long base64-like sequences (>= 200 chars)
if (preg_match('/"(?:file|data|payload|content)"\s*:\s*"(?:data:[^,]+,)?([A-Za-z0-9+\/=]{200,})"/i', $head, $m)) {
$b64 = $m[1];
// Decode only the first N characters of base64 string safely (rounded up to multiple of 4)
$chunk = substr($b64, 0, 1024);
$pad = 4 - (strlen($chunk) % 4);
if ($pad < 4) $chunk .= str_repeat('=', $pad);
$decoded = @base64_decode($chunk, true);
if ($decoded === false) return ['found' => true, 'decoded_head' => null, 'reason' => 'base64_decode_failed'];
$decoded_head = substr($decoded, 0, $maxDecoded);
return ['found' => true, 'decoded_head' => $decoded_head, 'reason' => null];
}
// Also detect raw base64 body start (no JSON): long base64 string at start
if (preg_match('/^\s*([A-Za-z0-9+\/=]{400,})/s', $head, $m2)) {
$b64 = $m2[1];
$chunk = substr($b64, 0, 1024);
$pad = 4 - (strlen($chunk) % 4);
if ($pad < 4) $chunk .= str_repeat('=', $pad);
$decoded = @base64_decode($chunk, true);
if ($decoded === false) return ['found' => true, 'decoded_head' => null, 'reason' => 'base64_decode_failed'];
return ['found' => true, 'decoded_head' => substr($decoded, 0, $maxDecoded), 'reason' => null];
}
return ['found' => false, 'decoded_head' => null, 'reason' => null];
}
function detect_real_mime(string $tmpPath): string
{
$real = 'unknown';
if (is_uploaded_file($tmpPath) && function_exists('finfo_open')) {
$f = @finfo_open(FILEINFO_MIME_TYPE);
if ($f) {
$m = @finfo_file($f, $tmpPath);
if (is_string($m) && $m !== '') {
$real = $m;
}
@finfo_close($f);
}
}
return $real;
}
function is_fake_image(string $name, string $realMime): bool
{
// If filename looks like image but real mime is not image/*
if (preg_match('/\.(png|jpe?g|gif|webp|bmp|ico|svg)$/i', $name)) {
// SVG often returns image/svg+xml; still image/*
if (!preg_match('/^image\//', $realMime)) {
return true;
}
}
return false;
}
function is_archive(string $name, string $realMime): bool
{
// Archives often used to smuggle payloads
if (preg_match('/\.(zip|rar|7z|tar|gz|tgz)$/i', $name)) return true;
if (preg_match('/(zip|x-7z-compressed|x-rar|x-tar|gzip)/i', $realMime)) return true;
return false;
}
function compute_hashes(string $tmpPath, int $size): array
{
global $HASH_MAX_FILESIZE;
if (!is_uploaded_file($tmpPath)) return [];
if ($size <= 0 || $size > $HASH_MAX_FILESIZE) return [];
$sha1 = @hash_file('sha1', $tmpPath);
$md5 = @hash_file('md5', $tmpPath);
$out = [];
if (is_string($sha1)) $out['sha1'] = $sha1;
if (is_string($md5)) $out['md5'] = $md5;
return $out;
}
function quarantine_file(string $tmpPath, string $origName, array $hashes): array
{
global $QUARANTINE_ENABLED, $QUARANTINE_DIR;
if (!$QUARANTINE_ENABLED) return ['ok' => false, 'path' => ''];
if (!is_uploaded_file($tmpPath)) return ['ok' => false, 'path' => ''];
if (!is_dir($QUARANTINE_DIR)) return ['ok' => false, 'path' => ''];
$ext = strtolower((string)pathinfo($origName, PATHINFO_EXTENSION));
if (!preg_match('/^[a-z0-9]{1,10}$/', $ext)) {
$ext = '';
}
$base = $hashes['sha1'] ?? '';
if ($base === '') {
try {
$base = bin2hex(random_bytes(16));
} catch (Throwable $e) {
$base = uniqid('q', true);
}
}
$dest = rtrim($QUARANTINE_DIR, '/\\') . '/' . $base . ($ext ? '.' . $ext : '');
$ok = @move_uploaded_file($tmpPath, $dest);
if ($ok) {
@chmod($dest, 0600);
return ['ok' => true, 'path' => $dest];
}
return ['ok' => false, 'path' => $dest];
}
/**
* Inspect archive file in quarantine without extracting.
* Supports ZIP via ZipArchive and TAR (.tar, .tar.gz) via PharData if available.
* Returns summary array: ['entries'=>N, 'suspicious_entries'=> [...], 'unsupported'=>bool]
*/
function inspect_archive_quarantine(string $path): array
{
global $ARCHIVE_MAX_ENTRIES;
global $ARCHIVE_MAX_INSPECT_SIZE;
// Avoid inspecting extremely large archives
$fsz = @filesize($path);
if ($fsz !== false && $fsz > $ARCHIVE_MAX_INSPECT_SIZE) {
return ['entries' => 0, 'suspicious_entries' => [], 'unsupported' => false, 'too_large' => true];
}
$out = ['entries' => 0, 'suspicious_entries' => [], 'unsupported' => false];
if (!is_file($path)) {
$out['unsupported'] = true;
return $out;
}
$lower = strtolower($path);
// ZIP
if (class_exists('ZipArchive') && preg_match('/\.zip$/i', $lower)) {
$za = new ZipArchive();
if ($za->open($path) === true) {
$cnt = $za->numFiles;
$out['entries'] = min($cnt, $ARCHIVE_MAX_ENTRIES);
$limit = $out['entries'];
for ($i = 0; $i < $limit; $i++) {
$stat = $za->statIndex($i);
if ($stat && isset($stat['name'])) {
$name = $stat['name'];
$entry = ['name' => $name, 'suspicious' => false, 'reason' => null];
// traversal or absolute path
if (strpos($name, '../') !== false || strpos($name, '..\\') !== false || strpos($name, '/') === 0 || strpos($name, '\\') === 0) {
$entry['suspicious'] = true;
$entry['reason'] = 'path_traversal';
}
// suspicious extension
if (preg_match('/\.(php|phtml|phar|php\d|pl|cgi|sh|asp|aspx|jsp)$/i', $name)) {
$entry['suspicious'] = true;
$entry['reason'] = ($entry['reason'] ? $entry['reason'] . ',ext' : 'ext');
}
if ($entry['suspicious']) $out['suspicious_entries'][] = $entry;
}
}
$za->close();
} else {
$out['unsupported'] = true;
}
return $out;
}
// TAR (including .tar.gz) via PharData if available
if (class_exists('PharData') && preg_match('/\.(tar|tar\.gz|tgz|tar\.bz2)$/i', $lower)) {
try {
$ph = new PharData($path);
$it = new RecursiveIteratorIterator($ph);
$count = 0;
foreach ($it as $file) {
if ($count++ >= $ARCHIVE_MAX_ENTRIES) break;
$name = (string)$file;
$entry = ['name' => $name, 'suspicious' => false, 'reason' => null];
if (strpos($name, '../') !== false || strpos($name, '..\\') !== false || strpos($name, '/') === 0 || strpos($name, '\\') === 0) {
$entry['suspicious'] = true;
$entry['reason'] = 'path_traversal';
}
if (preg_match('/\.(php|phtml|phar|php\d|pl|cgi|sh|asp|aspx|jsp)$/i', $name)) {
$entry['suspicious'] = true;
$entry['reason'] = ($entry['reason'] ? $entry['reason'] . ',ext' : 'ext');
}
if ($entry['suspicious']) $out['suspicious_entries'][] = $entry;
}
$out['entries'] = $count;
} catch (Exception $e) {
$out['unsupported'] = true;
}
return $out;
}
// unsupported archive type
$out['unsupported'] = true;
return $out;
}
/* ---------- Context ---------- */
[$ip, $uri, $method, $ctype, $clen, $ua, $te] = get_request_summary();
$userId = get_user_id();
$requestId = generate_request_id();
$REQUEST_CTX = [
'request_id' => $requestId,
'ip' => $ip,
'uri' => $uri,
'method' => $method,
'ctype' => $ctype,
'clen' => (int)$clen,
'user' => $userId,
'ua' => $ua,
'transfer_encoding' => $te,
];
// Only upload-capable methods
if (!in_array($method, ['POST', 'PUT', 'PATCH'], true)) {
return;
}
// Log suspicious raw-body uploads that bypass $_FILES
// (Do this early so we capture endpoints that stream content into file_put_contents)
if (empty($_FILES)) {
$rawSuspicious = false;
if ($clen >= $RAW_BODY_MIN) $rawSuspicious = true;
if ($te !== '') $rawSuspicious = true;
if (stripos($ctype, 'application/octet-stream') !== false) $rawSuspicious = true;
if (stripos($ctype, 'application/json') !== false) $rawSuspicious = true;
// Guarded peek into php://input for JSON/base64 payload detection.
// Only perform when explicitly enabled and when CONTENT_LENGTH is small enough
// to avoid consuming large bodies or affecting application behavior.
global $PEEK_RAW_INPUT, $SNIFF_MAX_FILESIZE, $SNIFF_MAX_BYTES;
if ($PEEK_RAW_INPUT && $clen > 0 && $clen <= $SNIFF_MAX_FILESIZE) {
$peek = '';
$in = @fopen('php://input', 'r');
if ($in !== false) {
// read a small head only
$peek = @stream_get_contents($in, $SNIFF_MAX_BYTES);
@fclose($in);
}
if ($peek !== false && $peek !== '') {
// Detect JSON-embedded base64 and inspect decoded head
$b = detect_json_base64_head($peek, 1024);
if (!empty($b['found'])) {
// skip fingerprinting/inspection for allowlisted URIs/CTypes
if (base64_is_allowlisted($uri, $ctype)) {
log_event('raw_body_base64_ignored', ['uri' => $uri, 'ctype' => $ctype]);
// mark suspicious only if other raw indicators exist
// continue without further decoding/fingerprinting
$rawSuspicious = $rawSuspicious || false;
} else {
// log base64 blob detected; include fingerprint of decoded head when available
$fingerprints = [];
if (!empty($b['decoded_head'])) {
$decoded_head = $b['decoded_head'];
$sample = substr($decoded_head, 0, $BASE64_FINGERPRINT_BYTES);
$fingerprints['sha1'] = @sha1($sample);
$fingerprints['md5'] = @md5($sample);
if (payload_contains_php_markers($decoded_head, $ctype)) {
$rawSuspicious = true;
log_event('raw_body_php_payload', [
'len' => (int)$clen,
'ctype' => $ctype,
'reason' => $b['reason'] ?? 'base64_embedded',
'fingerprints' => $fingerprints,
]);
} else {
log_event('raw_body_base64', [
'len' => (int)$clen,
'ctype' => $ctype,
'reason' => $b['reason'] ?? 'base64_embedded',
'fingerprints' => $fingerprints,
]);
}
} else {
log_event('raw_body_base64', [
'len' => (int)$clen,
'ctype' => $ctype,
'reason' => $b['reason'] ?? 'base64_embedded',
]);
}
}
} else {
// Also scan the raw head itself for PHP markers (text/plain, octet-stream, etc.)
if (payload_contains_php_markers($peek, $ctype)) {
log_event('raw_body_php_payload', [
'len' => (int)$clen,
'ctype' => $ctype,
'reason' => 'head_php_markers',
]);
$rawSuspicious = true;
}
}
}
}
if ($rawSuspicious) {
log_event('raw_body', [
'len' => (int)$clen,
'ctype' => $ctype,
]);
}
}
// multipart/form-data but no $_FILES
if (
empty($_FILES) &&
$ctype &&
stripos($ctype, 'multipart/form-data') !== false
) {
log_event('multipart_no_files', []);
}
/* ---------- Upload Handling ---------- */
if (!empty($_FILES)) {
// Per request flood check: count each file below too
// (Optional: log the current counter at request-level)
$reqCount = flood_check($ip);
if ($reqCount > $GLOBALS['FLOOD_MAX_UPLOADS']) {
log_event('flood_alert', ['count' => (int)$reqCount]);
// Don't block purely on this here unless you want to
// if ($BLOCK_SUSPICIOUS) { http_response_code(429); exit('Too many uploads'); }
}
foreach ($_FILES as $file) {
if (!isset($file['name'])) continue;
// Multi upload field: name[]
if (is_array($file['name'])) {
$count = count($file['name']);
for ($i = 0; $i < $count; $i++) {
handle_file_v3(
$ip, $uri, $userId, $ua,
$file['name'][$i] ?? '',
$file['type'][$i] ?? '',
$file['size'][$i] ?? 0,
$file['tmp_name'][$i] ?? '',
$file['error'][$i] ?? UPLOAD_ERR_NO_FILE
);
}
} else {
handle_file_v3(
$ip, $uri, $userId, $ua,
$file['name'] ?? '',
$file['type'] ?? '',
$file['size'] ?? 0,
$file['tmp_name'] ?? '',
$file['error'] ?? UPLOAD_ERR_NO_FILE
);
}
}
}
/* ---------- Core ---------- */
function handle_file_v3($ip, $uri, $user, $ua, $name, $type, $size, $tmp, $err): void
{
global $BLOCK_SUSPICIOUS, $MAX_SIZE, $FLOOD_MAX_UPLOADS;
if ($err !== UPLOAD_ERR_OK) {
// Log non-OK upload errors for forensics
log_event('upload_error', [
'name' => $name,
'err' => (int)$err,
]);
return;
}
$origName = (string)$name;
$name = basename($origName);
$type = (string)$type;
$size = (int)$size;
$tmp = (string)$tmp;
// Flood count per file (stronger)
$count = flood_check($ip);
if ($count > $FLOOD_MAX_UPLOADS) {
log_event('flood_alert', ['count' => (int)$count]);
// Optional blocking:
// if ($BLOCK_SUSPICIOUS) { http_response_code(429); exit('Too many uploads'); }
}
// Real MIME
$real = detect_real_mime($tmp);
/* Detection */
$suspicious = false;
$reasons = [];
// Path components or modified basename
if ($origName !== $name || strpos($origName, '/') !== false || strpos($origName, '\\') !== false) {
$suspicious = true;
$reasons[] = 'bad_name';
}
// Dangerous / tricky filename
if (is_suspicious_filename($name)) {
$suspicious = true;
$reasons[] = 'bad_name';
}
// Fake images (name says image, MIME isn't)
if (is_fake_image($name, $real)) {
$suspicious = true;
$reasons[] = 'fake_image';
}
// Very large file
if ($size > $MAX_SIZE) {
log_event('big_upload', [
'name' => $name,
'size' => (int)$size,
]);
$reasons[] = 'big_file';
// (Not automatically suspicious; depends on your app)
}
// Archive uploads are higher risk (often used to smuggle payloads)
if (is_archive($name, $real)) {
$reasons[] = 'archive';
// Move to quarantine and inspect archive contents safely (no extraction)
log_event('archive_upload', [
'name' => $name,
'real_mime' => $real,
]);
if ($QUARANTINE_ENABLED) {
$qres = quarantine_file($tmp, $origName, $hashes ?? []);
if ($qres['ok']) {
$qpath = $qres['path'];
log_event('archive_quarantined', ['path' => $qpath]);
if ($ARCHIVE_INSPECT) {
$inspect = inspect_archive_quarantine($qpath);
log_event('archive_inspect', ['path' => $qpath, 'summary' => $inspect]);
if (!empty($inspect['suspicious_entries'])) {
$suspicious = true;
$reasons[] = 'archive_contains_suspicious';
if ($ARCHIVE_BLOCK_ON_SUSPICIOUS && $BLOCK_SUSPICIOUS) {
http_response_code(403);
exit('Upload blocked - suspicious archive');
}
}
}
} else {
log_event('archive_quarantine_failed', ['tmp' => $tmp, 'dest' => $qres['path']]);
}
}
}
// Content sniffing for PHP payload (fast head scan, only for small files)
if (sniff_file_for_php_payload($tmp)) {
$suspicious = true;
$reasons[] = 'php_payload';
}
/* Logging */
$hashes = compute_hashes($tmp, $size);
log_event('upload', [
'name' => $name,
'orig_name' => $origName,
'size' => (int)$size,
'type' => $type,
'real_mime' => $real,
'tmp' => $tmp,
'hashes' => $hashes,
'flags' => $reasons,
]);
/* Alert / Block */
if ($suspicious) {
$q = quarantine_file($tmp, $origName, $hashes);
log_event('suspicious', [
'name' => $name,
'orig_name' => $origName,
'real_mime' => $real,
'reasons' => $reasons,
'quarantine_ok' => $q['ok'],
'quarantine_path' => $q['path'],
]);
if ($BLOCK_SUSPICIOUS) {
http_response_code(403);
exit('Upload blocked');
}
}
}