User-agent: * Disallow: /pics/ Disallow: /seti-at-home/ Disallow: /r Disallow: /misc/ Disallow: /impressum/ Disallow: /guides/ Disallow: /old/ Disallow: /webalizer/ Disallow: /webalizer.old Disallow: /forum/admin/ Disallow: /forum/db/ Disallow: /forum/images/ Disallow: /forum/includes/ Disallow: /forum/language/ Disallow: /forum/templates/ Disallow: /forum/common.php Disallow: /forum/groupcp.php Disallow: /forum/faq.php Disallow: /forum/privmsg.php Disallow: /forum/profile.php Disallow: /forum/groupcp.php Disallow: /forum/viewonline.php Disallow: /forum/printview.php Disallow: /forum/modcp.php Disallow: /forum/login.php Disallow: /wiki/admin/ Disallow: /forum/memberlist.php Disallow: /forum/search.php Disallow: /forum/ucp.php Disallow: /forum/posting.php Disallow: /forum/report.php Disallow: /forum/viewonline.php Disallow: /forum/download.php Disallow: /w2/ User-agent: Fasterfox Disallow: / User-agent: AhrefsBot Disallow: / # The Common Crawl dataset. Original source for GPT and others. User-agent: CCBot Disallow: / # The example for img2dataset, although the default is *None* User-agent: img2dataset Disallow: / # GPTBot is OpenAI's web crawler User-agent: GPTBot Disallow: / # ChatGPT-User takes direct actions on behalf of ChatGPT users User-agent: ChatGPT-User Disallow: / # Google's Bard and Vertex AI generative APIs User-agent: Google-Extended Disallow: / # Speculative blocks for Anthropic User-agent: anthropic-ai Disallow: / User-agent: Claude-Web Disallow: / # webz.io - they sell data for training LLMs. User-agent: Omgilibot Disallow: / User-agent: Omgili Disallow: / # Meta's bot that crawls public web pages to improve language models User-agent: FacebookBot Disallow: / # ByteDance's bot used to gather data for their LLMs, including Doubao. User-agent: Bytespider Disallow: / # Brandwatch - "AI to discover new trends" User-agent: magpie-crawler Disallow: / User-agent: Perplexity‑User Disallow: / User-agent: PerplexityBot Disallow: /