User-agent: * Crawl-delay: 5 Disallow: /dbh/ViewUnit Disallow: /dbh/SearchHousingSubmit.html Disallow: /dbh/MFICalculator.html Disallow: /WebFile Disallow: /tenant/Search.html.ESP Disallow: /tenant/index.html.ESP Disallow: /alf/fl/ViewFacility.html Disallow: /sw/ # We have nothing of interest for yahoo china search here. # Yahoo! Slurp China User-agent: slurp china Disallow: / # We hate seeing psbot also. User-agent: psbot Disallow: / # We hate seeing twiceler/ Cuill also. User-agent: twiceler Disallow: / # And 'Speedy Spider' User-agent: Speedy Disallow: / # And try to stop findlinks (http://wortschatz.uni-leipzig.de/nextlinks/findlinks_en.html), but it does not say its User-agent ... User-agent: findlinks Disallow: / # http://irl.cs.tamu.edu/crawler/ User-agent: IRLbot Disallow: / # not sure about user agent .... "MQBOT/Nutch-0.9-dev (MQBOT Nutch Crawler; http://vwbot.cs.uiuc.edu; mqbot@cs.uiuc.edu)" User-agent: MQBOT Disallow: / User-agent: Nutch Disallow: / # Shopping robot ... http://www.become.com/site_owners.html User-agent: BecomeBot Disallow: / # http://www.worio.com/S User-agent: woriobot Disallow: / # Gigabot. Possibly many names. One annoying spider. User-agent: gigabot Disallow: / User-agent: Gigabot Disallow: / User-agent: Gigabot/2.0 Disallow: / User-agent: Gigabot/2.0att Disallow: / # Heretix # User agent string in logs report as "Mozilla/5.0 (compatible; heritrix/1.12.0 +http://www.accelobot.com)" # Seems to be an open-source bot: http://crawler.archive.org/ # But this instance coming from c01.ba.accelovation.com -> c08.ba.accelovation.com, # Which are 72.20.99.41 -> 72.20.99.48. Perhaps will just block at # firewall. User-agent: archive.org_bot Disallow: / # But vast a wide net. User-agent: heritrix Disallow: / User-agent: Heritrix Disallow: / User-agent: archive.org_bot Disallow: / # 'voyager/1.0' coming from crawl*.cosmixcorp.com, a lovely linkfarm. User-agent: voyager/1.0 Disallow: / User-agent: voyager Disallow: / User-agent: Voyager/1.0 Disallow: / User-agent: Voyager Disallow: / # Why another robot? User-agent: CazoodleBot Disallow: /