"""Shared support for scanning document type declarations in HTML and XHTML. This module is used as a foundation for the HTMLParser and sgmllib modules (indirectly, for htmllib as well). It has no documented public API and should not be used directly. """ import re _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match _declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match _commentclose = re.compile(r'--\s*>') _markedsectionclose = re.compile(r']\s*]\s*>') # An analysis of the MS-Word extensions is available at # http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf _msmarkedsectionclose = re.compile(r']\s*>') del re class ParserBase: """Parser base class which provides some common support methods used by the SGML/HTML and XHTML parsers.""" def __init__(self): if self.__class__ is ParserBase: raise RuntimeError( "markupbase.ParserBase must be subclassed") def error(self, message): raise NotImplementedError( "subclasses of ParserBase must override error()") def reset(self): self.lineno = 1 self.offset = 0 def getpos(self): """Return current line number and offset.""" return self.lineno, self.offset # Internal -- update line number and offset. This should be # called for each piece of data exactly once, in order -- in other # words the concatenation of all the input strings to this # function should be exactly the entire input. def updatepos(self, i, j): if i >= j: return j rawdata = self.rawdata nlines = rawdata.count("\n", i, j) if nlines: self.lineno = self.lineno + nlines pos = rawdata.rindex("\n", i, j) # Should not fail self.offset = j-(pos+1) else: self.offset = self.offset + j-i return j _decl_otherchars = '' # Internal -- parse declaration (for use by subclasses). def parse_declaration(self, i): # This is some sort of declaration; in "HTML as # deployed," this should only be the document type # declaration ("<!DOCTYPE html...>"). # ISO 8879:1986, however, has more complex # declaration syntax for elements in <!...>, including: # --comment-- # [marked section] # name in the following list: ENTITY, DOCTYPE, ELEMENT, # ATTLIST, NOTATION, SHORTREF, USEMAP, # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM rawdata = self.rawdata j = i + 2 assert rawdata[i:j] == "<!", "unexpected call to parse_declaration" if rawdata[j:j+1] == ">": # the empty comment <!> return j + 1 if rawdata[j:j+1] in ("-", ""): # Start of comment followed by buffer boundary, # or just a buffer boundary. return -1 # A simple, practical version could look like: ((name|stringlit) S*) + '>' n = len(rawdata) if rawdata[j:j+2] == '--': #comment # Locate --.*-- as the body of the comment return self.parse_comment(i) elif rawdata[j] == '[': #marked section # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA # Note that this is extended by Microsoft Office "Save as Web" function # to include [if...] and [endif]. return self.parse_marked_section(i) else: #all other declaration elements decltype, j = self._scan_name(j, i) if j < 0: return j if decltype == "doctype": self._decl_otherchars = '' while j < n: c = rawdata[j] if c == ">": # end of declaration syntax data = rawdata[i+2:j] if decltype == "doctype": self.handle_decl(data) else: # According to the HTML5 specs sections "8.2.4.44 Bogus # comment state" and "8.2.4.45 Markup declaration open # state", a comment token should be emitted. # Calling unknown_decl provides more flexibility though. self.unknown_decl(data) return j + 1 if c in "\"'": m = _declstringlit_match(rawdata, j) if not m: return -1 # incomplete j = m.end() elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": name, j = self._scan_name(j, i) elif c in self._decl_otherchars: j = j + 1 elif c == "[": # this could be handled in a separate doctype parser if decltype == "doctype": j = self._parse_doctype_subset(j + 1, i) elif decltype in ("attlist", "linktype", "link", "element"): # must tolerate []'d groups in a content model in an element declaration # also in data attribute specifications of attlist declaration # also link type declaration subsets in linktype declarations # also link attribute specification lists in link declarations self.error("unsupported '[' char in %s declaration" % decltype) else: self.error("unexpected '[' char in declaration") else: self.error( "unexpected %r char in declaration" % rawdata[j]) if j < 0: return j return -1 # incomplete # Internal -- parse a marked section # Override this to handle MS-word extension syntax <![if word]>content<![endif]> def parse_marked_section(self, i, report=1): rawdata= self.rawdata assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()" sectName, j = self._scan_name( i+3, i ) if j < 0: return j if sectName in ("temp", "cdata", "ignore", "include", "rcdata"): # look for standard ]]> ending match= _markedsectionclose.search(rawdata, i+3) elif sectName in ("if", "else", "endif"): # look for MS Office ]> ending match= _msmarkedsectionclose.search(rawdata, i+3) else: self.error('unknown status keyword %r in marked section' % rawdata[i+3:j]) if not match: return -1 if report: j = match.start(0) self.unknown_decl(rawdata[i+3: j]) return match.end(0) # Internal -- parse comment, return length or -1 if not terminated def parse_comment(self, i, report=1): rawdata = self.rawdata if rawdata[i:i+4] != '<!--': self.error('unexpected call to parse_comment()') match = _commentclose.search(rawdata, i+4) if not match: return -1 if report: j = match.start(0) self.handle_comment(rawdata[i+4: j]) return match.end(0) # Internal -- scan past the internal subset in a <!DOCTYPE declaration, # returning the index just past any whitespace following the trailing ']'. def _parse_doctype_subset(self, i, declstartpos): rawdata = self.rawdata n = len(rawdata) j = i while j < n: c = rawdata[j] if c == "<": s = rawdata[j:j+2] if s == "<": # end of buffer; incomplete return -1 if s != "<!": self.updatepos(declstartpos, j + 1) self.error("unexpected char in internal subset (in %r)" % s) if (j + 2) == n: # end of buffer; incomplete return -1 if (j + 4) > n: # end of buffer; incomplete return -1 if rawdata[j:j+4] == "<!--": j = self.parse_comment(j, report=0) if j < 0: return j continue name, j = self._scan_name(j + 2, declstartpos) if j == -1: return -1 if name not in ("attlist", "element", "entity", "notation"): self.updatepos(declstartpos, j + 2) self.error( "unknown declaration %r in internal subset" % name) # handle the individual names meth = getattr(self, "_parse_doctype_" + name) j = meth(j, declstartpos) if j < 0: return j elif c == "%": # parameter entity reference if (j + 1) == n: # end of buffer; incomplete return -1 s, j = self._scan_name(j + 1, declstartpos) if j < 0: return j if rawdata[j] == ";": j = j + 1 elif c == "]": j = j + 1 while j < n and rawdata[j].isspace(): j = j + 1 if j < n: if rawdata[j] == ">": return j self.updatepos(declstartpos, j) self.error("unexpected char after internal subset") else: return -1 elif c.isspace(): j = j + 1 else: self.updatepos(declstartpos, j) self.error("unexpected char %r in internal subset" % c) # end of buffer reached return -1 # Internal -- scan past <!ELEMENT declarations def _parse_doctype_element(self, i, declstartpos): name, j = self._scan_name(i, declstartpos) if j == -1: return -1 # style content model; just skip until '>' rawdata = self.rawdata if '>' in rawdata[j:]: return rawdata.find(">", j) + 1 return -1 # Internal -- scan past <!ATTLIST declarations def _parse_doctype_attlist(self, i, declstartpos): rawdata = self.rawdata name, j = self._scan_name(i, declstartpos) c = rawdata[j:j+1] if c == "": return -1 if c == ">": return j + 1 while 1: # scan a series of attribute descriptions; simplified: # name type [value] [#constraint] name, j = self._scan_name(j, declstartpos) if j < 0: return j c = rawdata[j:j+1] if c == "": return -1 if c == "(": # an enumerated type; look for ')' if ")" in rawdata[j:]: j = rawdata.find(")", j) + 1 else: return -1 while rawdata[j:j+1].isspace(): j = j + 1 if not rawdata[j:]: # end of buffer, incomplete return -1 else: name, j = self._scan_name(j, declstartpos) c = rawdata[j:j+1] if not c: return -1 if c in "'\"": m = _declstringlit_match(rawdata, j) if m: j = m.end() else: return -1 c = rawdata[j:j+1] if not c: return -1 if c == "#": if rawdata[j:] == "#": # end of buffer return -1 name, j = self._scan_name(j + 1, declstartpos) if j < 0: return j c = rawdata[j:j+1] if not c: return -1 if c == '>': # all done return j + 1 # Internal -- scan past <!NOTATION declarations def _parse_doctype_notation(self, i, declstartpos): name, j = self._scan_name(i, declstartpos) if j < 0: return j rawdata = self.rawdata while 1: c = rawdata[j:j+1] if not c: # end of buffer; incomplete return -1 if c == '>': return j + 1 if c in "'\"": m = _declstringlit_match(rawdata, j) if not m: return -1 j = m.end() else: name, j = self._scan_name(j, declstartpos) if j < 0: return j # Internal -- scan past <!ENTITY declarations def _parse_doctype_entity(self, i, declstartpos): rawdata = self.rawdata if rawdata[i:i+1] == "%": j = i + 1 while 1: c = rawdata[j:j+1] if not c: return -1 if c.isspace(): j = j + 1 else: break else: j = i name, j = self._scan_name(j, declstartpos) if j < 0: return j while 1: c = self.rawdata[j:j+1] if not c: return -1 if c in "'\"": m = _declstringlit_match(rawdata, j) if m: j = m.end() else: return -1 # incomplete elif c == ">": return j + 1 else: name, j = self._scan_name(j, declstartpos) if j < 0: return j # Internal -- scan a name token and the new position and the token, or # return -1 if we've reached the end of the buffer. def _scan_name(self, i, declstartpos): rawdata = self.rawdata n = len(rawdata) if i == n: return None, -1 m = _declname_match(rawdata, i) if m: s = m.group() name = s.strip() if (i + len(s)) == n: return None, -1 # end of buffer return name.lower(), m.end() else: self.updatepos(declstartpos, i) self.error("expected name token at %r" % rawdata[declstartpos:declstartpos+20]) # To be overridden -- handlers for unknown objects def unknown_decl(self, data): pass

blog

казино – Официальный сайт Pin up играть онлайн Зеркало и вход.494

June 25, 2025 blog 0

Пин Ап казино – Официальный сайт Pin up играть онлайн | Зеркало и вход ▶️ ИГРАТЬ Содержимое Официальный сайт Pin Up Казино: Как играть онлайн, зеркало и вход Pin Up Casino – Официальный сайт Описание и функции Pin Up Казино Бонусы и акции Как играть онлайн и вход в Pin …

1Win Official Site for Sports Betting and Casino – Bonus Up to 100000.4289 (2)

June 25, 2025 blog 0

1Win – Official Site for Sports Betting and Casino – Bonus Up to ₹100,000 ▶️ PLAY Содержимое 1Win: A Leading Online Gaming Platform What is 1Win? Key Features of 1Win Why Choose 1Win? Why 1Win App is the Best Choice Why Choose 1Win for Your Betting Needs 1Win Bonuses and …

Beste Online Casinos in Deutschland.64

June 25, 2025 blog 0

Beste Online Casinos in Deutschland ▶️ SPIELEN Содержимое Regulierung und Sicherheit Top Casino-Anbieter 1. Bwin 2. LeoVegas 3. Betway 4. 888 Casino 5. Casino.de Bonusangebote und Promotions No-Deposit-Bonus Einwilligungsbasierte Bonus Freispins Wiederholungsbasierte Bonus Spiele und Anpassung an Spieler Vielfältige Spielangebote Anpassungsfunktionen für Spieler Wenn es um das Vergnügen und die …

91 Club Online Casino in India Demo Mode and Practice.211

June 25, 2025 blog 0

91 Club Online Casino in India – Demo Mode and Practice ▶️ PLAY Содержимое 91 Club Online Casino in India: A Comprehensive Guide What is 91 Club Online Casino? Games and Features Unlock the Fun with Demo Mode and Practice Start Your Journey with a Bang: Exclusive Offers and Promotions …

Best Online Casinos in Canada.947

June 25, 2025 blog 0

Best Online Casinos in Canada ▶️ PLAY Содержимое Top-Rated Online Casinos in Canada Best Online Casinos with Free Bonus How to Choose the Best Online Casino for You Consider Your Budget Canada is known for its rich history of gaming and entertainment, and the online casino industry is no exception. …

91 Club Online Casino in India Demo Mode and Practice.1118

June 25, 2025 blog 0

91 Club Online Casino in India – Demo Mode and Practice ▶️ PLAY Содержимое 91 Club Online Casino in India: A Comprehensive Guide Discover the Thrill of Online Casino Gaming with 91 Club Why Choose 91 Club? Practice Your Skills in Demo Mode and Get Ready to Win Big In …

– Официальный сайт Pinco Casino.5445

June 25, 2025 blog 0

Пинко Казино – Официальный сайт Pinco Casino ▶️ ИГРАТЬ Содержимое Преимущества игры на официальном сайте Pinco Casino Как начать играть на официальном сайте Pinco Casino Конечно, безопасность и конфиденциальность игроков В наше время интернета и онлайн-игр, казино стали одним из самых популярных развлечений для многих людей. И среди них есть …

1Win Azerbaijan – İdman Mərcləri və Casino saytı.3459

June 25, 2025 blog 0

1Win Azerbaijan – İdman Mərcləri və Casino saytı ▶️ OYNA Содержимое 1Win Azerbaijan haqqında məlumatlar 1Win Azerbaijanın məqsədi Idman mərcələrindən istifadə edən məsləhətlər 1Win Casino saytı haqqında məlumatlar Idman mərcələrindən və casino saytı ilə bağlı məlumatlar Idman mərcəzlərindən məlumatlar Casino saytı ilə bağlı məlumatlar 1Win Azerbaycan – bu idman mərcəzi …

Plinko – Online Casino Spel.921

June 25, 2025 blog 0

Plinko – Online Casino Spel ▶️ SPELEN Содержимое Welkom bij het spel Winnen met de Plinko-regels Spelen met de online casino-opties Als je op zoek bent naar een online casino spel dat een beetje extra heeft, dan is Plinko zeker een spel waard om te proberen. Dit klassieke spel is …

1Win Official Site for Sports Betting and Casino – Bonus Up to 100000.5457

June 25, 2025 blog 0

1Win – Official Site for Sports Betting and Casino – Bonus Up to ₹100,000 ▶️ PLAY Содержимое 1Win: A Leading Online Gaming Platform What is 1Win? Why Choose 1Win? 1Win Sports Betting: A Wide Range of Options Why Choose 1win for Sports Betting? How to Get Started with 1win Sports …

Page 5 of 16« First...«3 456 7 » 10...Last »