# Robots.txt for Baselight Data Platform # Generated for optimal SEO and crawling efficiency User-agent: * # Set crawl delay to be respectful (1 second) Crawl-delay: 1 # Link sitemap Sitemap: https://baselight.app/sitemap.xml # Allow indexing of main public pages Allow: / Allow: /catalog Allow: /catalog?* # Allow indexing of all public user content and profiles Allow: /u/*/ # Block private/authenticated areas Disallow: /auth Disallow: /register Disallow: /refresh Disallow: /logout Disallow: /invite Disallow: /settings/ Disallow: /my-* Disallow: /query/new # Block API endpoints and AJAX requests Disallow: /ajax/ Disallow: /_actions/ Disallow: /api/ # Block export/download endpoints (avoid bandwidth waste) Disallow: */results.* Disallow: */export.* # Block specific query edit pages (but allow view pages) Disallow: */edit Disallow: */edit/ # Block error pages Disallow: /403 Disallow: /404 Disallow: /500 Disallow: /error # Block development/admin paths Disallow: /admin # Set crawl delay for specific bots as prevent them from being too aggressive User-agent: Bingbot Crawl-delay: 2 User-agent: YandexBot Crawl-delay: 2 # Stop aggressive crawlers from consuming resources by setting a high crawl delay User-agent: AhrefsBot Crawl-delay: 10 User-agent: SemrushBot Crawl-delay: 10 User-agent: DotBot Crawl-delay: 10 User-agent: MJ12bot Crawl-delay: 10 User-agent: BLEXBot Crawl-delay: 10 # Social media crawlers inherit global rules (no special config needed) # facebookexternalhit/1.1, Twitterbot, LinkedInBot use default rules