# Master WordPress robots.txt - High-Security LEMP Stack # Version: 2.0 - Optimized for symlink deployment # Updated: 2025-06-30 # # Balances: Security | Performance | SEO | WordPress Protection # ============================================================================== # LEGITIMATE SEARCH ENGINES # ============================================================================== # Google - Full access with minimal restrictions User-agent: Googlebot User-agent: Googlebot-Image User-agent: Googlebot-Video Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/cache/ Disallow: /trackback/ Disallow: /xmlrpc.php Disallow: /*?replytocom Disallow: /wp-json/wp/v2/users Allow: /wp-admin/admin-ajax.php Allow: /wp-content/uploads/ Allow: /wp-includes/js/ Allow: /wp-includes/css/ Allow: /wp-includes/images/ Crawl-delay: 0 # Bing/Microsoft User-agent: Bingbot User-agent: msnbot Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/cache/ Disallow: /trackback/ Disallow: /xmlrpc.php Disallow: /*?replytocom Allow: /wp-admin/admin-ajax.php Allow: /wp-content/uploads/ Crawl-delay: 1 # Other legitimate search engines User-agent: Yandex User-agent: DuckDuckBot User-agent: Baiduspider User-agent: facebookexternalhit User-agent: Twitterbot User-agent: LinkedInBot User-agent: WhatsApp User-agent: Slackbot Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/cache/ Allow: /wp-content/uploads/ Crawl-delay: 1 # ============================================================================== # SEO TOOLS - Restricted Access # ============================================================================== # Ahrefs - Limited access User-agent: AhrefsBot User-agent: AhrefsSiteAudit Disallow: /wp-* Disallow: /author/ Disallow: /users/ Disallow: /?s= Disallow: /search/ Crawl-delay: 10 # SEMrush - Limited access User-agent: SemrushBot User-agent: SemrushBot-SA User-agent: SemrushBot-BA User-agent: SemrushBot-BM Disallow: /wp-* Disallow: /author/ Disallow: /?s= Disallow: /search/ Crawl-delay: 15 # Majestic - Heavily restricted User-agent: MJ12bot Disallow: /wp-* Disallow: /author/ Disallow: /category/ Disallow: /tag/ Disallow: /archive/ Disallow: /? Crawl-delay: 20 # Moz - Moderate restrictions User-agent: DotBot User-agent: rogerbot Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /wp-content/ Crawl-delay: 10 # ============================================================================== # AGGRESSIVE CRAWLERS - Significant Restrictions # ============================================================================== User-agent: AwarioBot User-agent: BLEXBot User-agent: DataForSeoBot User-agent: DomainCrawler User-agent: Bytespider User-agent: AspiegelBot Disallow: / Crawl-delay: 30 # ============================================================================== # AI/LLM CRAWLERS - Blocked by Default # ============================================================================== User-agent: GPTBot User-agent: ChatGPT-User User-agent: CCBot User-agent: anthropic-ai User-agent: Claude-Web User-agent: Google-Extended User-agent: PerplexityBot User-agent: YouBot User-agent: Omgilibot Disallow: / # ============================================================================== # SECURITY BLOCKS - All Bots # ============================================================================== User-agent: * # WordPress core files Disallow: /wp-config.php Disallow: /wp-config-sample.php Disallow: /license.txt Disallow: /readme.html Disallow: /.htaccess Disallow: /.user.ini Disallow: /wp-settings.php Disallow: /wp-load.php Disallow: /wp-blog-header.php Disallow: /wp-cron.php Disallow: /wp-links-opml.php Disallow: /wp-activate.php Disallow: /xmlrpc.php # Sensitive directories Disallow: /.git/ Disallow: /.svn/ Disallow: /.hg/ Disallow: /backup*/ Disallow: /backups/ Disallow: /cache/ Disallow: /tmp/ Disallow: /temp/ Disallow: /logs/ Disallow: /log/ # Sensitive files by extension Disallow: /*.sql Disallow: /*.sql.gz Disallow: /*.log Disallow: /*.ini Disallow: /*.inc Disallow: /*.bak Disallow: /*.old Disallow: /*.save Disallow: /*.orig Disallow: /*.config Disallow: /*.conf Disallow: /*.env # WordPress uploads security Disallow: /wp-content/uploads/*.php Disallow: /wp-content/uploads/wpforms/ Disallow: /wp-content/uploads/gravity_forms/ Disallow: /wp-content/uploads/ninja-forms/ # ============================================================================== # DEFAULT RULES - All Other User Agents # ============================================================================== User-agent: * # Core WordPress directories Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /wp-content/plugins/ Disallow: /wp-content/cache/ Disallow: /wp-content/themes/ Allow: /wp-admin/admin-ajax.php Allow: /wp-content/uploads/ Allow: /wp-includes/js/ Allow: /wp-includes/css/ Allow: /wp-includes/images/ # Prevent duplicate content Disallow: /*? Disallow: /*?s= Disallow: /*?p= Disallow: /*?page_id= Disallow: /*?attachment_id= Disallow: /*?replytocom= Disallow: /trackback/ Disallow: /feed/ Disallow: /comments/ Disallow: /author/ Disallow: /category/*/page/ Disallow: /tag/*/page/ Disallow: /page/ # Clean tracking parameters Disallow: /*utm_*= Disallow: /*fbclid= Disallow: /*gclid= Disallow: /*msclkid= # Default crawl delay Crawl-delay: 2 # Sitemap location (updated per site via symlink) # Sitemap: https://example.com/sitemap.xml