# General Configuration for All Crawlers (Global SEO) User-agent: * # Allow access to essential files like robots.txt and sitemap.xml for proper indexing Allow: /robots.txt Allow: /sitemap.xml # Allow Crawling of All Image, CSS, JS Resources (important for Google, Bing, and other search engines) Allow: /assets/ Allow: /images/ Allow: /css/ Allow: /js/ # Block Unwanted or Duplicate Pages (Preventing duplicate content) Disallow: /admin/ Disallow: /login/ Disallow: /duplicate-page/ Disallow: /*?sessionid= Disallow: /*?* Disallow: /*?faq= Disallow: /search/ Disallow: /checkout/ Disallow: /user-account/ Disallow: /profile/ Disallow: /cart/ # Googlebot Specific Configuration User-agent: Googlebot # Block unnecessary dynamic content (FAQ, sessionid, query params) to avoid duplicate indexing Disallow: /*?faq= Disallow: /*? Disallow: /*?* Disallow: /*?sessionid= Disallow: /admin/ Disallow: /login/ Disallow: /checkout/ Disallow: /user-account/ Disallow: /profile/ Disallow: /cart/ # Allow Google to crawl essential pages like FAQ, Sitemap, and other relevant content Allow: /faq.php Allow: /sitemap.xml Allow: /robots.txt # Allow Googlebot to crawl CSS, JS, and image files for better page rendering and indexing Allow: /css/ Allow: /js/ Allow: /images/ # Sitemap Declaration for Googlebot Sitemap: https://itcertrocket.com/sitemap.xml # Bingbot Specific Configuration User-agent: Bingbot # Block Bingbot from sensitive pages like admin or login Disallow: /admin/ Disallow: /login/ Disallow: /duplicate-page/ Disallow: /checkout/ Disallow: /user-account/ Disallow: /profile/ Disallow: /cart/ # Allow Bingbot to crawl FAQ page explicitly Allow: /faq.php # Define crawl delay to avoid overloading the server Crawl-delay: 10 # Regional and Language-Specific Rules for Global SEO # This ensures international search engines like Baidu, Yandex, or regional search engines can crawl localized versions User-agent: Baidu Disallow: /admin/ Disallow: /login/ Disallow: /duplicate-page/ Disallow: /checkout/ Disallow: /user-account/ Disallow: /profile/ Disallow: /cart/ Allow: /sitemap.xml Allow: /faq.php User-agent: Yandex Disallow: /admin/ Disallow: /login/ Disallow: /duplicate-page/ Disallow: /checkout/ Disallow: /user-account/ Disallow: /profile/ Disallow: /cart/ Allow: /sitemap.xml Allow: /faq.php User-agent: Naver Disallow: /admin/ Disallow: /login/ Disallow: /duplicate-page/ Disallow: /checkout/ Disallow: /user-account/ Disallow: /profile/ Disallow: /cart/ Allow: /faq.php # Add hreflang-specific sitemaps for international pages (if applicable) Sitemap: https://itcertrocket.com/sitemap-en.xml # English version sitemap Sitemap: https://itcertrocket.com/sitemap-fr.xml # French version sitemap Sitemap: https://itcertrocket.com/sitemap-de.xml # German version sitemap Sitemap: https://itcertrocket.com/sitemap-es.xml # Spanish version sitemap # Allowing important URLs related to specific countries and languages Allow: /en/ Allow: /fr/ Allow: /de/ Allow: /es/ # Social Media & Crawlers Specific Rules (helps social media bots understand your content) User-agent: Facebookexternalhit Allow: /images/ Allow: /js/ Allow: /css/ Disallow: /admin/ Disallow: /login/ User-agent: Twitterbot Allow: /images/ Allow: /js/ Allow: /css/ Disallow: /admin/ Disallow: /login/ # Block All Crawlers from Private Content (Sensitive Data) User-agent: * Disallow: /.PRIVATE/ Disallow: /private/ Disallow: /backup/ # Allow crawlers to access and index common content like images, CSS, and JavaScript Allow: /images/ Allow: /styles/ Allow: /scripts/ # Sitemap for all crawlers Sitemap: https://itcertrocket.com/sitemap.xml # Provide some other best practices # Allow crawlers to access and crawl paginated content to help with SEO Allow: /page/ # Block any non-content URLs (e.g., tracking, session IDs) from being indexed Disallow: /*?sessionid= Disallow: /*&sessionid= Disallow: /*?tracking= Disallow: /*&tracking= # Block All Crawlers from Sensitive or Hidden Content (Sensitive Data) User-agent: * Disallow: /private/ Disallow: /admin/ Disallow: /login/ Disallow: /checkout/ Disallow: /user-account/ Disallow: /profile/ Disallow: /cart/ # Allow crawlers to access essential content like FAQ page, Sitemap, etc. Allow: /faq.php Allow: /sitemap.xml # Ensure crawlers do not index search, cart, or checkout pages Disallow: /search/ Disallow: /cart/ Disallow: /checkout/ Disallow: /order/ Disallow: /profile/ # Prevent indexing of duplicate or session-related content Disallow: /duplicate-page/ Disallow: /sessionid/ # Googlebot Configuration (SEO Best Practices) User-agent: Googlebot # Block search engines from crawling dynamic URL parameters (e.g., tracking or sorting parameters) Disallow: /*?faq= Disallow: /*?sessionid= Disallow: /*?* # Block Google from crawling any admin or login pages to prevent indexing of sensitive content Disallow: /admin/ Disallow: /login/ Disallow: /duplicate-page/ Disallow: /checkout/ Disallow: /user-account/ Disallow: /profile/ Disallow: /cart/ # Allow Google to crawl essential pages like FAQ, Sitemap, and other relevant content Allow: /faq.php Allow: /sitemap.xml Allow: /robots.txt # Allow Googlebot to crawl CSS, JS, and image files for better page rendering and indexing Allow: /css/ Allow: /js/ Allow: /images/ # Bingbot Configuration User-agent: Bingbot # Block Bingbot from sensitive pages like admin or login Disallow: /admin/ Disallow: /login/ Disallow: /duplicate-page/ # Allow Bingbot to crawl FAQ page explicitly Allow: /faq.php # Define crawl delay to avoid overloading the server Crawl-delay: 10 # General Rules for All Crawlers User-agent: * # Block any private or hidden content directories Disallow: /.PRIVATE/ # Allow crawlers to access and index FAQ page and Sitemap Allow: /faq.php Allow: /sitemap.xml Allow: /robots.txt # Block specific content from all bots (e.g., unnecessary files or scripts) Disallow: /cgi-bin/ Disallow: /tmp/ Disallow: /junk/ # Block private directories or files from being indexed Disallow: /private/ Disallow: /tmp/ Disallow: /backup/ # Additional optimizations for all crawlers Disallow: /search/ Disallow: /cart/ Disallow: /checkout/ Disallow: /order/ Disallow: /login/ Disallow: /profile/ Disallow: /admin/ # Let crawlers access and index common content like images, CSS, and JavaScript Allow: /images/ Allow: /styles/ Allow: /scripts/ # Sitemap for all crawlers Sitemap: https://itcertrocket.com/sitemap.xml # Provide some other best practices # Allow crawlers to access and crawl paginated content to help with SEO Allow: /page/ # Block any non-content URLs (e.g., tracking, session IDs) from being indexed Disallow: /*?sessionid= Disallow: /*&sessionid= # Block All Crawlers from Private Content (Sensitive Data) User-agent: * Disallow: /.PRIVATE/ Disallow: /private/ Disallow: /backup/ # Allow crawlers to access and index common content like images, CSS, and JavaScript Allow: /images/ Allow: /styles/ Allow: /scripts/ # Sitemap for all crawlers Sitemap: https://itcertrocket.com/sitemap.xml # Provide some other best practices # Allow crawlers to access and crawl paginated content to help with SEO Allow: /page/ # Block any non-content URLs (e.g., tracking, session IDs) from being indexed Disallow: /*?sessionid= Disallow: /*&sessionid=