Current File : //lib64/python2.7/robotparser.py
""" robotparser.py

    Copyright (C) 2000  Bastian Kleineidam

    You can choose between two licenses when using this package:
    1) GNU GPLv2
    2) PSF license for Python 2.2

    The robots.txt Exclusion Protocol is implemented as specified in
    http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
"""
import urlparse
import urllib

__all__ = ["RobotFileParser"]


class RobotFileParser:
    """ This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.

    """

    def __init__(self, url=''):
        self.entries = []
        self.default_entry = None
        self.disallow_all = False
        self.allow_all = False
        self.set_url(url)
        self.last_checked = 0

    def mtime(self):
        """Returns the time the robots.txt file was last fetched.

        This is useful for long-running web spiders that need to
        check for new robots.txt files periodically.

        """
        return self.last_checked

    def modified(self):
        """Sets the time the robots.txt file was last fetched to the
        current time.

        """
        import time
        self.last_checked = time.time()

    def set_url(self, url):
        """Sets the URL referring to a robots.txt file."""
        self.url = url
        self.host, self.path = urlparse.urlparse(url)[1:3]

    def read(self):
        """Reads the robots.txt URL and feeds it to the parser."""
        opener = URLopener()
        f = opener.open(self.url)
        lines = [line.strip() for line in f]
        f.close()
        self.errcode = opener.errcode
        if self.errcode in (401, 403):
            self.disallow_all = True
        elif self.errcode >= 400:
            self.allow_all = True
        elif self.errcode == 200 and lines:
            self.parse(lines)

    def _add_entry(self, entry):
        if "*" in entry.useragents:
            # the default entry is considered last
            if self.default_entry is None:
                # the first default entry wins
                self.default_entry = entry
        else:
            self.entries.append(entry)

    def parse(self, lines):
        """parse the input lines from a robots.txt file.
           We allow that a user-agent: line is not preceded by
           one or more blank lines."""
        # states:
        #   0: start state
        #   1: saw user-agent line
        #   2: saw an allow or disallow line
        state = 0
        linenumber = 0
        entry = Entry()

        for line in lines:
            linenumber += 1
            if not line:
                if state == 1:
                    entry = Entry()
                    state = 0
                elif state == 2:
                    self._add_entry(entry)
                    entry = Entry()
                    state = 0
            # remove optional comment and strip line
            i = line.find('#')
            if i >= 0:
                line = line[:i]
            line = line.strip()
            if not line:
                continue
            line = line.split(':', 1)
            if len(line) == 2:
                line[0] = line[0].strip().lower()
                line[1] = urllib.unquote(line[1].strip())
                if line[0] == "user-agent":
                    if state == 2:
                        self._add_entry(entry)
                        entry = Entry()
                    entry.useragents.append(line[1])
                    state = 1
                elif line[0] == "disallow":
                    if state != 0:
                        entry.rulelines.append(RuleLine(line[1], False))
                        state = 2
                elif line[0] == "allow":
                    if state != 0:
                        entry.rulelines.append(RuleLine(line[1], True))
                        state = 2
        if state == 2:
            self._add_entry(entry)


    def can_fetch(self, useragent, url):
        """using the parsed robots.txt decide if useragent can fetch url"""
        if self.disallow_all:
            return False
        if self.allow_all:
            return True
        # search for given user agent matches
        # the first match counts
        parsed_url = urlparse.urlparse(urllib.unquote(url))
        url = urlparse.urlunparse(('', '', parsed_url.path,
            parsed_url.params, parsed_url.query, parsed_url.fragment))
        url = urllib.quote(url)
        if not url:
            url = "/"
        for entry in self.entries:
            if entry.applies_to(useragent):
                return entry.allowance(url)
        # try the default entry last
        if self.default_entry:
            return self.default_entry.allowance(url)
        # agent not found ==> access granted
        return True


    def __str__(self):
        return ''.join([str(entry) + "\n" for entry in self.entries])


class RuleLine:
    """A rule line is a single "Allow:" (allowance==True) or "Disallow:"
       (allowance==False) followed by a path."""
    def __init__(self, path, allowance):
        if path == '' and not allowance:
            # an empty value means allow all
            allowance = True
        self.path = urllib.quote(path)
        self.allowance = allowance

    def applies_to(self, filename):
        return self.path == "*" or filename.startswith(self.path)

    def __str__(self):
        return (self.allowance and "Allow" or "Disallow") + ": " + self.path


class Entry:
    """An entry has one or more user-agents and zero or more rulelines"""
    def __init__(self):
        self.useragents = []
        self.rulelines = []

    def __str__(self):
        ret = []
        for agent in self.useragents:
            ret.extend(["User-agent: ", agent, "\n"])
        for line in self.rulelines:
            ret.extend([str(line), "\n"])
        return ''.join(ret)

    def applies_to(self, useragent):
        """check if this entry applies to the specified agent"""
        # split the name token and make it lower case
        useragent = useragent.split("/")[0].lower()
        for agent in self.useragents:
            if agent == '*':
                # we have the catch-all agent
                return True
            agent = agent.lower()
            if agent in useragent:
                return True
        return False

    def allowance(self, filename):
        """Preconditions:
        - our agent applies to this entry
        - filename is URL decoded"""
        for line in self.rulelines:
            if line.applies_to(filename):
                return line.allowance
        return True

class URLopener(urllib.FancyURLopener):
    def __init__(self, *args):
        urllib.FancyURLopener.__init__(self, *args)
        self.errcode = 200

    def prompt_user_passwd(self, host, realm):
        ## If robots.txt file is accessible only with a password,
        ## we act as if the file wasn't there.
        return None, None

    def http_error_default(self, url, fp, errcode, errmsg, headers):
        self.errcode = errcode
        return urllib.FancyURLopener.http_error_default(self, url, fp, errcode,
                                                        errmsg, headers)
blog

blog

1win — официальный сайт букмекерской конторы 1вин.1812

1win — официальный сайт букмекерской конторы 1вин ▶️ ИГРАТЬ Содержимое Официальный сайт букмекерской конторы 1вин Быстрый доступ к ставкам и линиям Преимущества быстрого доступа Преимущества и функции официального сайта 1вин Большой выбор ставок В мире ставок и азарта 1win – это имя, которое ассоциируется с надежностью, быстротой и комфортной игрой. …

Read More »

Mostbet onlayn kazino O‘zbekistonda xavfsizlik va litsenziya.121

Mostbet onlayn kazino O‘zbekistonda – xavfsizlik va litsenziya ▶️ O’YNANG Содержимое Mostbet onlayn kazinoning O‘zbekistondagi litsenziyasi Litsenziya haqida ma’lumot Xavfsizlik choralarining amalga oshirilishi Xavfsizlik choralarining turlari Xavfsizlik choralarini bajarishning afzalliklari Onlayn kazinoda moliyaviy operatsiyalar xavfsizligi Moliyaviy operatsiyalar xavfsizligi texnologiyalari Foydalanuvchilar uchun maslahatlar Mostbet onlayn kazinosining O‘zbekiston bozorida ishtiroki Mostbet onlayn …

Read More »

Mostbet onlayn kazino O‘zbekistonda mobil ilova.393

Mostbet onlayn kazino O‘zbekistonda – mobil ilova ▶️ O’YNANG Содержимое Mostbet mobil ilovasining afzalliklari Mostbet onlayn kazinoda o’yinlar va bonuslar Mostbet mobil ilovasini O‘zbekistonda yuklab olish va o’rnatish Mostbet uz kirish imkoniyati bilan, O‘zbekistonda yashovchi iste’molchilar endi o‘z sevimli kazino o‘yinlarini onlayn rejimda o‘ynashlari mumkin. Kazino online o‘yinlari dunyosi juda …

Read More »

8d25650162e5

Noxwin Gambling enterprise Canada ️ Rating C$a hundred Welcome Extra Blogs Safer, Prompt, and you will Legitimate Casino Financial Options for 2024 Exclusive Crypto Now offers A primary area of amount to your organization is the on the internet gaming world in the China and you can Europe. Video slots …

Read More »

Unique Casino (Avis 2025) Bonus 200% jusqu’à 500.1713

Unique Casino Avis 2025 Profitez d’un Bonus Exclusif de 200% Jusqu’à 500€ ▶️ JOUER Содержимое Unique Casino (Avis 2025) : Découvrez l’Expérience Ultime Bonus Exclusif : 200% Jusqu’à 500€ Pourquoi Choisir Unique Casino en 2025 ? Jeux de Casino Variés et Passionnants Sécurité et Fiabilité à Toute Épreuve Support Client …

Read More »

Los mejores casinos online de España.617

Содержимое ¿Qué es un casino online? ¿Cómo elegir el mejor casino online? Los mejores casinos online para jugadores españoles ¿Cómo elegir el mejor casino online para ti? Seguridad y responsabilidad en los casinos online Mejor casino online: ¿cómo elegir? Los mejores casinos online de España En la actualidad, el mundo …

Read More »

WinSpirit Online Casino Australia Real Money Play.659

WinSpirit Online Casino Australia Your Gateway to Real Money Gaming Excitement ▶️ PLAY Содержимое WinSpirit Online Casino Australia: Your Gateway to Real Money Play Why Choose WinSpirit Online Casino for Real Money Gaming? Explore the Best Casino Games at WinSpirit Australia Secure and Fast Real Money Transactions at WinSpirit Exclusive …

Read More »

1win — регистрация в букмекерской конторе 1вин.1299

Содержимое Шаги регистрации в 1win Как начать играть и получать бонусы в 1win 1win — регистрация в букмекерской конторе 1вин В мире ставок и азарта 1вин является одним из самых популярных букмекеров. Компания была основана в 2018 году и с тех пор стала одним из лидеров на рынке. 1вин предлагает …

Read More »

Casinos online populares en España.1533

Casinos online populares en España ▶️ JUGAR Содержимое Los mejores sitios de casino online en España ¿Qué son los casinos online? Características de los casinos online Tipos de casinos online Los mejores casinos online en España ¿Cómo elegir el mejor casino online para ti? Seguridad y responsabilidad en los casinos …

Read More »