# ============================================ # ROBOTS.TXT - Tools House Online # Domain: https://www.toolshouse.online # Last Updated: 2026-06-13 # Purpose: Allow all search engines to index tools, block AI training bots # ============================================ # ============================================ # SECTION 1: ALLOW ALL MAJOR SEARCH ENGINES # ============================================ User-agent: Googlebot Allow: / Crawl-delay: 1 User-agent: Googlebot-Image Allow: / Crawl-delay: 1 User-agent: Googlebot-Video Allow: / Crawl-delay: 1 User-agent: Bingbot Allow: / Crawl-delay: 1 User-agent: Slurp Allow: / Crawl-delay: 1 User-agent: DuckDuckBot Allow: / Crawl-delay: 1 User-agent: Baiduspider Allow: / Crawl-delay: 2 User-agent: Yandexbot Allow: / Crawl-delay: 1 User-agent: Sogou Allow: / Crawl-delay: 2 # ============================================ # SECTION 2: ALLOW SOCIAL MEDIA BOTS # ============================================ User-agent: Twitterbot Allow: / User-agent: facebookexternalhit Allow: / User-agent: LinkedInBot Allow: / User-agent: Pinterestbot Allow: / User-agent: Discordbot Allow: / User-agent: WhatsApp Allow: / User-agent: TelegramBot Allow: / User-agent: Slackbot Allow: / User-agent: Mediapartners-Google Allow: / User-agent: Redditbot Allow: / User-agent: Tumblr Allow: / # ============================================ # SECTION 3: DEFAULT - ALLOW ALL OTHER BOTS # ============================================ User-agent: * Allow: / # ============================================ # SECTION 4: BLOCK AI TRAINING BOTS (UPDATED) # ============================================ User-agent: Amazonbot Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Bytespider Disallow: / User-agent: CCBot Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Claude-Web Disallow: / User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: Google-Extended Disallow: / User-agent: meta-externalagent Disallow: / User-agent: Meta-ExternalAgent Disallow: / User-agent: Omgilibot Disallow: / User-agent: PerplexityBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: cohere-ai Disallow: / User-agent: diffbot Disallow: / # ============================================ # SECTION 5: BLOCK MALICIOUS/BAD BOTS # ============================================ User-agent: MJ12bot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: DotBot Disallow: / User-agent: rogerbot Disallow: / User-agent: Exabot Disallow: / User-agent: spbot Disallow: / User-agent: MauiBot Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: ZoominfoBot Disallow: / User-agent: SiteAuditBot Disallow: / User-agent: SEOkicks Disallow: / User-agent: Barkrowler Disallow: / User-agent: BLEXBot Disallow: / User-agent: YandexBot Disallow: /admin/ Disallow: /private/ # ============================================ # SECTION 6: BLOCK ADMIN & SYSTEM PATHS # ============================================ User-agent: * Disallow: /admin/ Disallow: /cgi-bin/ Disallow: /tmp/ Disallow: /temp/ Disallow: /cache/ Disallow: /backup/ Disallow: /backups/ Disallow: /logs/ Disallow: /error_log Disallow: /php.ini Disallow: /.htaccess Disallow: /.env Disallow: /.git/ Disallow: /config/ Disallow: /database/ Disallow: /private/ Disallow: /secret/ Disallow: /vendor/ Disallow: /node_modules/ # ============================================ # SECTION 7: BLOCK SENSITIVE FILE TYPES # ============================================ User-agent: * Disallow: /*.tmp$ Disallow: /*.log$ Disallow: /*.sql$ Disallow: /*.bak$ Disallow: /*.zip$ Disallow: /*.rar$ Disallow: /*.7z$ Disallow: /*.gz$ Disallow: /*.key$ Disallow: /*.pem$ Disallow: /*.cert$ # ============================================ # SECTION 8: ALLOW TOOLS PAGES (EXPLICIT) # ============================================ User-agent: * Allow: /tools.html Allow: /*.html Allow: /pdf-*.html Allow: /image-*.html Allow: /video-*.html Allow: /audio-*.html Allow: /*-calculator.html Allow: /*-generator.html Allow: /*-checker.html Allow: /*-converter.html Allow: /character-counter.html Allow: /case-converter.html Allow: /remove-duplicate-lines.html Allow: /claude-ai-token-counter.html Allow: /Gemini-ai-token-counter.html Allow: /chatgpt-ai-token-counter.html Allow: /kimi-ai-token-counter.html Allow: /color-picker.html Allow: /text+developer.html Allow: /youtube-tags-generator.html Allow: /youtube-video-title-generator.html Allow: /facebook-tags-generator.html Allow: /meta-tag-generator.html Allow: /ulid-generator.html Allow: /rsa-key-pair-generator.html Allow: /integer-base-converter.html Allow: /keyword-density-checker.html Allow: /backlink-checker.html Allow: /domain-authority-checker.html Allow: /plagiarism-checker.html Allow: /favicon-generator.html Allow: /xml-sitemap-generator.html Allow: /md5-hash-generator.html # ============================================ # SECTION 9: CRAWL DELAY FOR ALL BOTS # ============================================ User-agent: * Crawl-delay: 1 # ============================================ # SECTION 10: SITEMAP LOCATION # ============================================ Sitemap: https://www.toolshouse.online/sitemap.xml # ============================================ # END OF ROBOTS.TXT # ============================================