User-agent: * Allow: / # Disallow common AI/LLM training crawlers User-agent: GPTBot Disallow: / User-agent: OAI-SearchBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: anthropic-ai Disallow: / User-agent: ClaudeBot Disallow: / User-agent: claude-web Disallow: / User-agent: PerplexityBot Disallow: / User-agent: Perplexity-User Disallow: / User-agent: Google-Extended Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: FacebookBot Disallow: / User-agent: meta-externalagent Disallow: / User-agent: Bytespider Disallow: / User-agent: CCBot Disallow: / User-agent: OMGili Disallow: / User-agent: Timpibot Disallow: / User-agent: Image2dataset Disallow: / User-agent: ImageSiftBot Disallow: / # Disallow common SEO tools (if you don't use them or want to limit their access) User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: DotBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: Screaming Frog SEO Spider Disallow: / User-agent: Nutch Disallow: / User-agent: Rogerbot Disallow: / User-agent: Exabot Disallow: / User-agent: Swiftbot Disallow: / User-agent: Baiduspider Disallow: / # Baidu is a major search engine but often acts aggressively outside China User-agent: Sogou Disallow: / # Known to be aggressive and ignore robots.txt sometimes User-agent: PetalBot Disallow: / # Often reported for aggressive crawling # Disallow other various bots and scrapers often considered less desirable User-agent: Curl Disallow: / User-agent: Wget Disallow: / User-agent: Python-urllib Disallow: / User-agent: GuzzleHttp Disallow: / User-agent: ZGrab Disallow: / User-agent: masscan Disallow: / User-agent: scanbot Disallow: / User-agent: NetcraftSurveyAgent Disallow: / User-agent: LinkpadBot Disallow: / User-agent: SeznamBot Disallow: / User-agent: Mail.RU_Bot Disallow: / User-agent: Spip_Syndic Disallow: / User-agent: PHP/ Disallow: / # Catch-all for basic PHP-based scrapers User-agent: Go-http-client Disallow: / # Catch-all for basic Go-based clients/scrapers # Allow legitimate search engine bots to crawl everything User-agent: Googlebot Allow: /advent/ User-agent: Bingbot Allow: /advent/ User-agent: DuckDuckBot Allow: /advent/ User-agent: YandexBot Allow: /advent/ User-agent: Applebot Allow: /advent/ User-agent: Slurp Allow: /advent/ # Specify your sitemap