userAgent()); if ($userAgent === '') { return $this->bot('suspicious_bot', 'Empty UA'); } $normalized = strtolower($userAgent); if ($family = $this->matchFamily($normalized, [ 'curl' => ['curl'], 'wget' => ['wget'], 'python-requests' => ['python-requests'], 'libwww-perl' => ['libwww-perl'], 'Go-http-client' => ['go-http-client'], 'Java' => ['java/'], 'scrapy' => ['scrapy'], 'httpclient' => ['httpclient'], 'masscan' => ['masscan'], 'nikto' => ['nikto'], 'sqlmap' => ['sqlmap'], ])) { return $this->bot('suspicious_bot', $family); } if ($family = $this->matchFamily($normalized, [ 'Googlebot' => ['googlebot'], 'Bingbot' => ['bingbot'], 'DuckDuckBot' => ['duckduckbot'], 'YandexBot' => ['yandexbot'], 'Baiduspider' => ['baiduspider'], 'Applebot' => ['applebot'], 'Slurp' => ['slurp'], ])) { return $this->bot('search_bot', $family); } if ($family = $this->matchFamily($normalized, [ 'GPTBot' => ['gptbot'], 'ChatGPT-User' => ['chatgpt-user'], 'OAI-SearchBot' => ['oai-searchbot'], 'ClaudeBot' => ['claudebot'], 'PerplexityBot' => ['perplexitybot'], 'Bytespider' => ['bytespider'], 'CCBot' => ['ccbot'], 'Google-Extended' => ['google-extended'], 'anthropic-ai' => ['anthropic-ai'], 'cohere-ai' => ['cohere-ai'], ])) { return $this->bot('ai_bot', $family); } if ($family = $this->matchFamily($normalized, [ 'AhrefsBot' => ['ahrefsbot'], 'SemrushBot' => ['semrushbot'], 'MJ12bot' => ['mj12bot'], 'DotBot' => ['dotbot'], 'PetalBot' => ['petalbot'], 'DataForSeoBot' => ['dataforseobot'], 'BLEXBot' => ['blexbot'], 'MauiBot' => ['mauibot'], 'serpstatbot' => ['serpstatbot'], ])) { return $this->bot('seo_bot', $family); } if ($family = $this->matchFamily($normalized, [ 'facebookexternalhit' => ['facebookexternalhit'], 'Twitterbot' => ['twitterbot'], 'LinkedInBot' => ['linkedinbot'], 'Slackbot' => ['slackbot'], 'Discordbot' => ['discordbot'], 'TelegramBot' => ['telegrambot'], 'WhatsApp' => ['whatsapp'], 'Pinterestbot' => ['pinterestbot'], ])) { return $this->bot('social_bot', $family); } if ($family = $this->matchFamily($normalized, [ 'UptimeRobot' => ['uptimerobot'], 'Pingdom' => ['pingdom'], 'StatusCake' => ['statuscake'], 'Better Stack' => ['better stack', 'betterstack'], 'BetterUptime' => ['betteruptime'], ])) { return $this->bot('monitoring_bot', $family); } if (strlen($userAgent) < 8) { return $this->bot('suspicious_bot', 'Short UA'); } if ($this->containsAny($normalized, ['bot', 'crawler', 'spider', 'crawl', 'preview'])) { return $this->bot('unknown_bot', 'Unknown crawler'); } return [ 'is_bot' => false, 'type' => null, 'family' => null, ]; } /** * @param array> $families */ private function matchFamily(string $normalizedUserAgent, array $families): ?string { foreach ($families as $family => $keywords) { if ($this->containsAny($normalizedUserAgent, $keywords)) { return $family; } } return null; } /** * @param array $keywords */ private function containsAny(string $haystack, array $keywords): bool { foreach ($keywords as $keyword) { if ($keyword !== '' && str_contains($haystack, $keyword)) { return true; } } return false; } /** * @return array{is_bot: bool, type: string, family: string} */ private function bot(string $type, string $family): array { return [ 'is_bot' => true, 'type' => $type, 'family' => $family, ]; } }