Current File : //lib64/python2.7/markupbase.py
"""Shared support for scanning document type declarations in HTML and XHTML.

This module is used as a foundation for the HTMLParser and sgmllib
modules (indirectly, for htmllib as well).  It has no documented
public API and should not be used directly.

"""

import re

_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match
_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match
_commentclose = re.compile(r'--\s*>')
_markedsectionclose = re.compile(r']\s*]\s*>')

# An analysis of the MS-Word extensions is available at
# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf

_msmarkedsectionclose = re.compile(r']\s*>')

del re


class ParserBase:
    """Parser base class which provides some common support methods used
    by the SGML/HTML and XHTML parsers."""

    def __init__(self):
        if self.__class__ is ParserBase:
            raise RuntimeError(
                "markupbase.ParserBase must be subclassed")

    def error(self, message):
        raise NotImplementedError(
            "subclasses of ParserBase must override error()")

    def reset(self):
        self.lineno = 1
        self.offset = 0

    def getpos(self):
        """Return current line number and offset."""
        return self.lineno, self.offset

    # Internal -- update line number and offset.  This should be
    # called for each piece of data exactly once, in order -- in other
    # words the concatenation of all the input strings to this
    # function should be exactly the entire input.
    def updatepos(self, i, j):
        if i >= j:
            return j
        rawdata = self.rawdata
        nlines = rawdata.count("\n", i, j)
        if nlines:
            self.lineno = self.lineno + nlines
            pos = rawdata.rindex("\n", i, j) # Should not fail
            self.offset = j-(pos+1)
        else:
            self.offset = self.offset + j-i
        return j

    _decl_otherchars = ''

    # Internal -- parse declaration (for use by subclasses).
    def parse_declaration(self, i):
        # This is some sort of declaration; in "HTML as
        # deployed," this should only be the document type
        # declaration ("<!DOCTYPE html...>").
        # ISO 8879:1986, however, has more complex
        # declaration syntax for elements in <!...>, including:
        # --comment--
        # [marked section]
        # name in the following list: ENTITY, DOCTYPE, ELEMENT,
        # ATTLIST, NOTATION, SHORTREF, USEMAP,
        # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM
        rawdata = self.rawdata
        j = i + 2
        assert rawdata[i:j] == "<!", "unexpected call to parse_declaration"
        if rawdata[j:j+1] == ">":
            # the empty comment <!>
            return j + 1
        if rawdata[j:j+1] in ("-", ""):
            # Start of comment followed by buffer boundary,
            # or just a buffer boundary.
            return -1
        # A simple, practical version could look like: ((name|stringlit) S*) + '>'
        n = len(rawdata)
        if rawdata[j:j+2] == '--': #comment
            # Locate --.*-- as the body of the comment
            return self.parse_comment(i)
        elif rawdata[j] == '[': #marked section
            # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section
            # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA
            # Note that this is extended by Microsoft Office "Save as Web" function
            # to include [if...] and [endif].
            return self.parse_marked_section(i)
        else: #all other declaration elements
            decltype, j = self._scan_name(j, i)
        if j < 0:
            return j
        if decltype == "doctype":
            self._decl_otherchars = ''
        while j < n:
            c = rawdata[j]
            if c == ">":
                # end of declaration syntax
                data = rawdata[i+2:j]
                if decltype == "doctype":
                    self.handle_decl(data)
                else:
                    # According to the HTML5 specs sections "8.2.4.44 Bogus
                    # comment state" and "8.2.4.45 Markup declaration open
                    # state", a comment token should be emitted.
                    # Calling unknown_decl provides more flexibility though.
                    self.unknown_decl(data)
                return j + 1
            if c in "\"'":
                m = _declstringlit_match(rawdata, j)
                if not m:
                    return -1 # incomplete
                j = m.end()
            elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
                name, j = self._scan_name(j, i)
            elif c in self._decl_otherchars:
                j = j + 1
            elif c == "[":
                # this could be handled in a separate doctype parser
                if decltype == "doctype":
                    j = self._parse_doctype_subset(j + 1, i)
                elif decltype in ("attlist", "linktype", "link", "element"):
                    # must tolerate []'d groups in a content model in an element declaration
                    # also in data attribute specifications of attlist declaration
                    # also link type declaration subsets in linktype declarations
                    # also link attribute specification lists in link declarations
                    self.error("unsupported '[' char in %s declaration" % decltype)
                else:
                    self.error("unexpected '[' char in declaration")
            else:
                self.error(
                    "unexpected %r char in declaration" % rawdata[j])
            if j < 0:
                return j
        return -1 # incomplete

    # Internal -- parse a marked section
    # Override this to handle MS-word extension syntax <![if word]>content<![endif]>
    def parse_marked_section(self, i, report=1):
        rawdata= self.rawdata
        assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()"
        sectName, j = self._scan_name( i+3, i )
        if j < 0:
            return j
        if sectName in ("temp", "cdata", "ignore", "include", "rcdata"):
            # look for standard ]]> ending
            match= _markedsectionclose.search(rawdata, i+3)
        elif sectName in ("if", "else", "endif"):
            # look for MS Office ]> ending
            match= _msmarkedsectionclose.search(rawdata, i+3)
        else:
            self.error('unknown status keyword %r in marked section' % rawdata[i+3:j])
        if not match:
            return -1
        if report:
            j = match.start(0)
            self.unknown_decl(rawdata[i+3: j])
        return match.end(0)

    # Internal -- parse comment, return length or -1 if not terminated
    def parse_comment(self, i, report=1):
        rawdata = self.rawdata
        if rawdata[i:i+4] != '<!--':
            self.error('unexpected call to parse_comment()')
        match = _commentclose.search(rawdata, i+4)
        if not match:
            return -1
        if report:
            j = match.start(0)
            self.handle_comment(rawdata[i+4: j])
        return match.end(0)

    # Internal -- scan past the internal subset in a <!DOCTYPE declaration,
    # returning the index just past any whitespace following the trailing ']'.
    def _parse_doctype_subset(self, i, declstartpos):
        rawdata = self.rawdata
        n = len(rawdata)
        j = i
        while j < n:
            c = rawdata[j]
            if c == "<":
                s = rawdata[j:j+2]
                if s == "<":
                    # end of buffer; incomplete
                    return -1
                if s != "<!":
                    self.updatepos(declstartpos, j + 1)
                    self.error("unexpected char in internal subset (in %r)" % s)
                if (j + 2) == n:
                    # end of buffer; incomplete
                    return -1
                if (j + 4) > n:
                    # end of buffer; incomplete
                    return -1
                if rawdata[j:j+4] == "<!--":
                    j = self.parse_comment(j, report=0)
                    if j < 0:
                        return j
                    continue
                name, j = self._scan_name(j + 2, declstartpos)
                if j == -1:
                    return -1
                if name not in ("attlist", "element", "entity", "notation"):
                    self.updatepos(declstartpos, j + 2)
                    self.error(
                        "unknown declaration %r in internal subset" % name)
                # handle the individual names
                meth = getattr(self, "_parse_doctype_" + name)
                j = meth(j, declstartpos)
                if j < 0:
                    return j
            elif c == "%":
                # parameter entity reference
                if (j + 1) == n:
                    # end of buffer; incomplete
                    return -1
                s, j = self._scan_name(j + 1, declstartpos)
                if j < 0:
                    return j
                if rawdata[j] == ";":
                    j = j + 1
            elif c == "]":
                j = j + 1
                while j < n and rawdata[j].isspace():
                    j = j + 1
                if j < n:
                    if rawdata[j] == ">":
                        return j
                    self.updatepos(declstartpos, j)
                    self.error("unexpected char after internal subset")
                else:
                    return -1
            elif c.isspace():
                j = j + 1
            else:
                self.updatepos(declstartpos, j)
                self.error("unexpected char %r in internal subset" % c)
        # end of buffer reached
        return -1

    # Internal -- scan past <!ELEMENT declarations
    def _parse_doctype_element(self, i, declstartpos):
        name, j = self._scan_name(i, declstartpos)
        if j == -1:
            return -1
        # style content model; just skip until '>'
        rawdata = self.rawdata
        if '>' in rawdata[j:]:
            return rawdata.find(">", j) + 1
        return -1

    # Internal -- scan past <!ATTLIST declarations
    def _parse_doctype_attlist(self, i, declstartpos):
        rawdata = self.rawdata
        name, j = self._scan_name(i, declstartpos)
        c = rawdata[j:j+1]
        if c == "":
            return -1
        if c == ">":
            return j + 1
        while 1:
            # scan a series of attribute descriptions; simplified:
            #   name type [value] [#constraint]
            name, j = self._scan_name(j, declstartpos)
            if j < 0:
                return j
            c = rawdata[j:j+1]
            if c == "":
                return -1
            if c == "(":
                # an enumerated type; look for ')'
                if ")" in rawdata[j:]:
                    j = rawdata.find(")", j) + 1
                else:
                    return -1
                while rawdata[j:j+1].isspace():
                    j = j + 1
                if not rawdata[j:]:
                    # end of buffer, incomplete
                    return -1
            else:
                name, j = self._scan_name(j, declstartpos)
            c = rawdata[j:j+1]
            if not c:
                return -1
            if c in "'\"":
                m = _declstringlit_match(rawdata, j)
                if m:
                    j = m.end()
                else:
                    return -1
                c = rawdata[j:j+1]
                if not c:
                    return -1
            if c == "#":
                if rawdata[j:] == "#":
                    # end of buffer
                    return -1
                name, j = self._scan_name(j + 1, declstartpos)
                if j < 0:
                    return j
                c = rawdata[j:j+1]
                if not c:
                    return -1
            if c == '>':
                # all done
                return j + 1

    # Internal -- scan past <!NOTATION declarations
    def _parse_doctype_notation(self, i, declstartpos):
        name, j = self._scan_name(i, declstartpos)
        if j < 0:
            return j
        rawdata = self.rawdata
        while 1:
            c = rawdata[j:j+1]
            if not c:
                # end of buffer; incomplete
                return -1
            if c == '>':
                return j + 1
            if c in "'\"":
                m = _declstringlit_match(rawdata, j)
                if not m:
                    return -1
                j = m.end()
            else:
                name, j = self._scan_name(j, declstartpos)
                if j < 0:
                    return j

    # Internal -- scan past <!ENTITY declarations
    def _parse_doctype_entity(self, i, declstartpos):
        rawdata = self.rawdata
        if rawdata[i:i+1] == "%":
            j = i + 1
            while 1:
                c = rawdata[j:j+1]
                if not c:
                    return -1
                if c.isspace():
                    j = j + 1
                else:
                    break
        else:
            j = i
        name, j = self._scan_name(j, declstartpos)
        if j < 0:
            return j
        while 1:
            c = self.rawdata[j:j+1]
            if not c:
                return -1
            if c in "'\"":
                m = _declstringlit_match(rawdata, j)
                if m:
                    j = m.end()
                else:
                    return -1    # incomplete
            elif c == ">":
                return j + 1
            else:
                name, j = self._scan_name(j, declstartpos)
                if j < 0:
                    return j

    # Internal -- scan a name token and the new position and the token, or
    # return -1 if we've reached the end of the buffer.
    def _scan_name(self, i, declstartpos):
        rawdata = self.rawdata
        n = len(rawdata)
        if i == n:
            return None, -1
        m = _declname_match(rawdata, i)
        if m:
            s = m.group()
            name = s.strip()
            if (i + len(s)) == n:
                return None, -1  # end of buffer
            return name.lower(), m.end()
        else:
            self.updatepos(declstartpos, i)
            self.error("expected name token at %r"
                       % rawdata[declstartpos:declstartpos+20])

    # To be overridden -- handlers for unknown objects
    def unknown_decl(self, data):
        pass
BDM Cricket India: tips, teams, tournaments

Recent Posts

Laissez-vous ensorceler par lunivers fascinant de Win Unique Wiz !

Laissez-vous ensorceler par lunivers fascinant de Win Unique Wiz ! Plongée dans l’univers des jeux en ligne Les machines à sous révélatrices Les jeux de table : tradition et stratégie Les bonus et promotions La sécurité des joueurs L’impact des technologies modernes La mobile gaming L’importance du soutien client Les …

Read More »

Svět sázení na dosah ruky Stáhněte si aplikaci Mostbet ještě dnes!

Svět sázení na dosah ruky: Stáhněte si aplikaci Mostbet ještě dnes! Hlavní výhody používání aplikace Mostbet Jak stáhnout aplikaci Mostbet? Dostupné funkce v aplikaci Mostbet Podpora a zákaznický servis Bonusy a propagační akce Závěr a shrnutí Svět sázení na dosah ruky: Stáhněte si aplikaci Mostbet ještě dnes! Sázení je v …

Read More »

Les casinos en ligne en France une expérience à vivre

Les casinos en ligne en France : une expérience à vivre ? Le cadre légal des jeux en ligne en France Les types de jeux disponibles Les avantages des casinos en ligne Les inconvénients et risques associés Les tendances émergentes des casinos en ligne Les jeux mobiles et leur popularité …

Read More »

Step-by-step Guide: কিভাবে Mostbet থেকে টাকা তুলব Securely Locaminas Locação Na Relação À Veículos

“step-by-step Guide Book: কিভাবে Mostbet থেকে টাকা তুলব Securely Locaminas Locação Sobre Veículos Content What Will Be Definitely The Particular Regular Drawback Procedures On Mostbet? “payment Alternatives In Mostbet Software Throughout Bangladesh Federal Flood Commison Can I Move Away Cash Received From Mostbet Without Verifying My Personal Account? Step-by-step Guideline: …

Read More »

CASHlib Casinos in Deutschland – Was bieten sie?

CASHlib Casinos gewinnen in Deutschland immer mehr an Bedeutung. Diese Casinos ermöglichen es Spielern, anonym und sicher mit Prepaid-Guthaben zu bezahlen – ganz ohne Bankverbindung oder Kreditkarte. Besonders für Nutzer, die auf Datenschutz und schnelle Transaktionen Wert legen, sind sie eine interessante Alternative.

Was bieten CASHlib Casinos?

  • Schnelle und anonyme Einzahlungen ohne Registrierung bei Drittanbietern
  • Breites Spielangebot von Slots bis zu Live-Dealer-Spielen
  • Regelmäßige Aktionen wie Freispiele und Cashback
  • Attraktive Willkommensboni für neue Spieler
  • EU-lizenzierte Anbieter mit hohen Sicherheitsstandards

Ein großer Vorteil von CASHlib ist, dass keine sensiblen Bankdaten im Casino hinterlegt werden müssen. Die Gutscheine sind online oder in vielen Verkaufsstellen erhältlich und können sofort verwendet werden. Dadurch entfällt auch die Notwendigkeit, persönliche Daten bei Einzahlungen preiszugeben – ein echter Pluspunkt für sicherheitsbewusste Spieler.

Viele spielothekgermany.com/de/spielothek/cashlib-casinos/ bieten zudem mobile Kompatibilität, einfache Menüführung und professionellen Spielerschutz. Wer nach einem unkomplizierten Zahlungsweg mit solider Auswahl an Spielen und Bonusangeboten sucht, wird bei diesen Plattformen fündig. Die Kombination aus Bequemlichkeit, Sicherheit und einem attraktiven Bonusangebot macht CASHlib Casinos zu einer beliebten Wahl für deutsche Nutzer.

slot 7