""" robotparser.py Copyright (C) 2000 Bastian Kleineidam You can choose between two licenses when using this package: 1) GNU GPLv2 2) PSF license for Python 2.2 The robots.txt Exclusion Protocol is implemented as specified in http://www.robotstxt.org/norobots-rfc.txt """ import collections import urllib.parse import urllib.request __all__ = ["RobotFileParser"] RequestRate = collections.namedtuple("RequestRate", "requests seconds") class RobotFileParser: """ This class provides a set of methods to read, parse and answer questions about a single robots.txt file. """ def __init__(self, url=''): self.entries = [] self.default_entry = None self.disallow_all = False self.allow_all = False self.set_url(url) self.last_checked = 0 def mtime(self): """Returns the time the robots.txt file was last fetched. This is useful for long-running web spiders that need to check for new robots.txt files periodically. """ return self.last_checked def modified(self): """Sets the time the robots.txt file was last fetched to the current time. """ import time self.last_checked = time.time() def set_url(self, url): """Sets the URL referring to a robots.txt file.""" self.url = url self.host, self.path = urllib.parse.urlparse(url)[1:3] def read(self): """Reads the robots.txt URL and feeds it to the parser.""" try: f = urllib.request.urlopen(self.url) except urllib.error.HTTPError as err: if err.code in (401, 403): self.disallow_all = True elif err.code >= 400 and err.code < 500: self.allow_all = True else: raw = f.read() self.parse(raw.decode("utf-8").splitlines()) def _add_entry(self, entry): if "*" in entry.useragents: # the default entry is considered last if self.default_entry is None: # the first default entry wins self.default_entry = entry else: self.entries.append(entry) def parse(self, lines): """Parse the input lines from a robots.txt file. We allow that a user-agent: line is not preceded by one or more blank lines. """ # states: # 0: start state # 1: saw user-agent line # 2: saw an allow or disallow line state = 0 entry = Entry() self.modified() for line in lines: if not line: if state == 1: entry = Entry() state = 0 elif state == 2: self._add_entry(entry) entry = Entry() state = 0 # remove optional comment and strip line i = line.find('#') if i >= 0: line = line[:i] line = line.strip() if not line: continue line = line.split(':', 1) if len(line) == 2: line[0] = line[0].strip().lower() line[1] = urllib.parse.unquote(line[1].strip()) if line[0] == "user-agent": if state == 2: self._add_entry(entry) entry = Entry() entry.useragents.append(line[1]) state = 1 elif line[0] == "disallow": if state != 0: entry.rulelines.append(RuleLine(line[1], False)) state = 2 elif line[0] == "allow": if state != 0: entry.rulelines.append(RuleLine(line[1], True)) state = 2 elif line[0] == "crawl-delay": if state != 0: # before trying to convert to int we need to make # sure that robots.txt has valid syntax otherwise # it will crash if line[1].strip().isdigit(): entry.delay = int(line[1]) state = 2 elif line[0] == "request-rate": if state != 0: numbers = line[1].split('/') # check if all values are sane if (len(numbers) == 2 and numbers[0].strip().isdigit() and numbers[1].strip().isdigit()): entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1])) state = 2 if state == 2: self._add_entry(entry) def can_fetch(self, useragent, url): """using the parsed robots.txt decide if useragent can fetch url""" if self.disallow_all: return False if self.allow_all: return True # Until the robots.txt file has been read or found not # to exist, we must assume that no url is allowable. # This prevents false positives when a user erroneously # calls can_fetch() before calling read(). if not self.last_checked: return False # search for given user agent matches # the first match counts parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url)) url = urllib.parse.urlunparse(('','',parsed_url.path, parsed_url.params,parsed_url.query, parsed_url.fragment)) url = urllib.parse.quote(url) if not url: url = "/" for entry in self.entries: if entry.applies_to(useragent): return entry.allowance(url) # try the default entry last if self.default_entry: return self.default_entry.allowance(url) # agent not found ==> access granted return True def crawl_delay(self, useragent): if not self.mtime(): return None for entry in self.entries: if entry.applies_to(useragent): return entry.delay return self.default_entry.delay def request_rate(self, useragent): if not self.mtime(): return None for entry in self.entries: if entry.applies_to(useragent): return entry.req_rate return self.default_entry.req_rate def __str__(self): entries = self.entries if self.default_entry is not None: entries = entries + [self.default_entry] return '\n'.join(map(str, entries)) + '\n' class RuleLine: """A rule line is a single "Allow:" (allowance==True) or "Disallow:" (allowance==False) followed by a path.""" def __init__(self, path, allowance): if path == '' and not allowance: # an empty value means allow all allowance = True path = urllib.parse.urlunparse(urllib.parse.urlparse(path)) self.path = urllib.parse.quote(path) self.allowance = allowance def applies_to(self, filename): return self.path == "*" or filename.startswith(self.path) def __str__(self): return ("Allow" if self.allowance else "Disallow") + ": " + self.path class Entry: """An entry has one or more user-agents and zero or more rulelines""" def __init__(self): self.useragents = [] self.rulelines = [] self.delay = None self.req_rate = None def __str__(self): ret = [] for agent in self.useragents: ret.append(f"User-agent: {agent}") if self.delay is not None: ret.append(f"Crawl-delay: {self.delay}") if self.req_rate is not None: rate = self.req_rate ret.append(f"Request-rate: {rate.requests}/{rate.seconds}") ret.extend(map(str, self.rulelines)) ret.append('') # for compatibility return '\n'.join(ret) def applies_to(self, useragent): """check if this entry applies to the specified agent""" # split the name token and make it lower case useragent = useragent.split("/")[0].lower() for agent in self.useragents: if agent == '*': # we have the catch-all agent return True agent = agent.lower() if agent in useragent: return True return False def allowance(self, filename): """Preconditions: - our agent applies to this entry - filename is URL decoded""" for line in self.rulelines: if line.applies_to(filename): return line.allowance return True

blog

Play at the Best Online Casinos in the UK 2025 Bonuses and Games.1813

June 7, 2025 blog 0

Play at the Best Online Casinos in the UK 2025 – Bonuses and Games ▶️ PLAY Содержимое Top 5 Online Casinos for UK Players 1. NetBet Casino 2. Slots Animal Casino 3. Trustly Casinos 4. Apple Pay Casinos 5. Mastercard Casinos What to Look for in an Online Casino Additional …

Best Online Casinos in the UK 2025 Trusted and Reliable Platforms.1765

June 7, 2025 blog 0

Best Online Casinos in the UK 2025 – Trusted and Reliable Platforms ▶️ PLAY Содержимое Top-Rated Online Casinos for UK Players Trustly Casinos: A Secure and Reliable Option Apple Pay Casino UK: A Convenient and Secure Option How to Choose the Best Online Casino for Your Needs Additional Tips to …

Nouveau casino en ligne en France Comment choisir le meilleur endroit pour jouer.667

June 7, 2025 blog 0

Содержимое Les avantages d’un casino en ligne français Les critères pour choisir le meilleur casino en ligne français Les conseils pour jouer responsablement au casino en ligne Contrôlez vos dépenses Prenez des pauses Choisissez le meilleur nouveau casino en ligne pour jouer en France Les critères à prendre en compte …

Gioco Plinko nei casinò online in Italia.1242

June 7, 2025 blog 0

Gioco Plinko nei casinò online in Italia ▶️ GIOCARE Содержимое La storia dietro il gioco Le origini del gioco La versione online Come funziona il gioco Plinko Le strategie per vincere al Plinko Le migliori piattaforme per giocare Plinko in Italia Consigli per giocatori esordienti Capisci il funzionamento del gioco …

казино – Официальный сайт Pin Up Casino вход на зеркало.858

June 7, 2025 blog 0

Пин Ап казино – Официальный сайт Pin Up Casino вход на зеркало ▶️ ИГРАТЬ Содержимое Pin Up Casino – Официальный сайт Вход на зеркало Преимущества использования зеркала В современном мире азартных игр, где каждый день появляются новые онлайн-казино, сложно найти надежный и проверенный игрок. Однако, pin up Casino – это …

казино – Официальный сайт Pin Up Casino вход на зеркало.71

June 7, 2025 blog 0

Содержимое Pin Up Casino – Официальный сайт Зарегистрироваться Играть Вход на зеркало Как работает зеркало Пин Ап казино – Официальный сайт Pin Up Casino вход на зеркало В современном мире азартных игр, где каждый день появляются новые онлайн-казино, сложно найти надежный и проверенный игрок. Однако, Pin Up Casino – это …

– Официальный Сайт Vavada Casino (2026).3962

June 7, 2025 blog 0

Вавада Казино – Официальный Сайт Vavada Casino (2025) ▶️ ИГРАТЬ Содержимое Преимущества и функции Vavada Casino Безопасность и конфиденциальность Виды игр и слотов на официальном сайте Vavada Casino Бонусы и акции Vavada Casino вавада Казино – это популярный онлайн-казино, которое предлагает игрокам широкий спектр игр и услуг. Вавада Казино – …

Pin Up Casino – Azərbaycanda onlayn kazino Pin-Up.10026

June 7, 2025 blog 0

Содержимое Pin Up Casino haqqında məlumatlar Pin Up Casino-dan giriş Pin Up Casino-da qeydiyyatdan keçmək Qeydiyyat prosesi Qeydiyyat prosesindən istifadə etmək Pin Up Casino-da oyun oynamaq Pin Up Casino-da xidmətlər və tələbə məlumatları Pin Up Casino – Azərbaycanda onlayn kazino Pin-Up Pin Up Casino Azərbaycanda populyarlaşan onlayn kazino platformasıdır. Pin …

казино – Официальный сайт Pin Up Casino вход на зеркало.70

June 7, 2025 blog 0

Пин Ап казино – Официальный сайт Pin Up Casino вход на зеркало ▶️ ИГРАТЬ Содержимое Пин Ап казино – Официальный сайт Преимущества официального сайта Pin Up Casino Вход на зеркало Преимущества и функции Pin Up Casino В современном мире азартных игр, где каждый день становится все более популярным, Pin Up …

казино – Официальный сайт Pin Up Casino вход на зеркало.939

June 7, 2025 blog 0

Пин Ап казино – Официальный сайт Pin Up Casino вход на зеркало ▶️ ИГРАТЬ Содержимое Пин Ап казино – Официальный сайт Вход на зеркало Преимущества использования зеркала Pin Up Casino Преимущества и функции Pin Up Casino В современном мире азартных игр, где каждый день появляются новые онлайн-казино, Pin Up Casino …

Page 16 of 17« First...10 «13 14 151617 »