blog

"""Parse (absolute and relative) URLs. urlparse module is based upon the following RFC specifications. RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding and L. Masinter, January 2005. RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter and L.Masinter, December 1999. RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. Berners-Lee, R. Fielding, and L. Masinter, August 1998. RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zwinski, July 1998. RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June 1995. RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. McCahill, December 1994 RFC 3986 is considered the current standard and any future changes to urlparse module should conform with it. The urlparse module is currently not entirely compliant with this RFC due to defacto scenarios for parsing, and for backward compatibility purposes, some parsing quirks from older RFCs are retained. The testcases in test_urlparse.py provides a good indicator of parsing behavior. """ import re __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", "urlsplit", "urlunsplit", "parse_qs", "parse_qsl"] # A classification of schemes ('' means apply by default) uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', 'wais', 'file', 'https', 'shttp', 'mms', 'prospero', 'rtsp', 'rtspu', '', 'sftp', 'svn', 'svn+ssh'] uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 'svn', 'svn+ssh', 'sftp','nfs','git', 'git+ssh'] uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 'mms', '', 'sftp', 'tel'] # These are not actually used anymore, but should stay for backwards # compatibility. (They are undocumented, but have a public-looking name.) non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais', 'https', 'shttp', 'snews', 'file', 'prospero', ''] # Characters valid in scheme names scheme_chars = ('abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' '0123456789' '+-.') MAX_CACHE_SIZE = 20 _parse_cache = {} def clear_cache(): """Clear the parse cache.""" _parse_cache.clear() class ResultMixin(object): """Shared methods for the parsed result objects.""" @property def username(self): netloc = self.netloc if "@" in netloc: userinfo = netloc.rsplit("@", 1)[0] if ":" in userinfo: userinfo = userinfo.split(":", 1)[0] return userinfo return None @property def password(self): netloc = self.netloc if "@" in netloc: userinfo = netloc.rsplit("@", 1)[0] if ":" in userinfo: return userinfo.split(":", 1)[1] return None @property def hostname(self): netloc = self.netloc.split('@')[-1] if '[' in netloc and ']' in netloc: return netloc.split(']')[0][1:].lower() elif ':' in netloc: return netloc.split(':')[0].lower() elif netloc == '': return None else: return netloc.lower() @property def port(self): netloc = self.netloc.split('@')[-1].split(']')[-1] if ':' in netloc: port = netloc.split(':')[1] port = int(port, 10) # verify legal port if (0 <= port <= 65535): return port return None from collections import namedtuple class SplitResult(namedtuple('SplitResult', 'scheme netloc path query fragment'), ResultMixin): __slots__ = () def geturl(self): return urlunsplit(self) class ParseResult(namedtuple('ParseResult', 'scheme netloc path params query fragment'), ResultMixin): __slots__ = () def geturl(self): return urlunparse(self) def urlparse(url, scheme='', allow_fragments=True): """Parse a URL into 6 components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> Return a 6-tuple: (scheme, netloc, path, params, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" tuple = urlsplit(url, scheme, allow_fragments) scheme, netloc, url, query, fragment = tuple if scheme in uses_params and ';' in url: url, params = _splitparams(url) else: params = '' return ParseResult(scheme, netloc, url, params, query, fragment) def _splitparams(url): if '/' in url: i = url.find(';', url.rfind('/')) if i < 0: return url, '' else: i = url.find(';') return url[:i], url[i+1:] def _splitnetloc(url, start=0): delim = len(url) # position of end of domain part of url, default is end for c in '/?#': # look for delimiters; the order is NOT important wdelim = url.find(c, start) # find first of this delim if wdelim >= 0: # if found delim = min(delim, wdelim) # use earliest delim position return url[start:delim], url[delim:] # return (domain, rest) def _checknetloc(netloc): if not netloc or not isinstance(netloc, unicode): return # looking for characters like \u2100 that expand to 'a/c' # IDNA uses NFKC equivalence, so normalize for this check import unicodedata n = netloc.replace(u'@', u'') # ignore characters already included n = n.replace(u':', u'') # but not the surrounding text n = n.replace(u'#', u'') n = n.replace(u'?', u'') netloc2 = unicodedata.normalize('NFKC', n) if n == netloc2: return for c in '/?#@:': if c in netloc2: raise ValueError("netloc %r contains invalid characters " "under NFKC normalization" % netloc) def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> Return a 5-tuple: (scheme, netloc, path, query, fragment). Note that we don't break the components up in smaller bits (e.g. netloc is a single string) and we don't expand % escapes.""" allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments, type(url), type(scheme) cached = _parse_cache.get(key, None) if cached: return cached if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() netloc = query = fragment = '' i = url.find(':') if i > 0: if url[:i] == 'http': # optimize the common case scheme = url[:i].lower() url = url[i+1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) _checknetloc(netloc) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v for c in url[:i]: if c not in scheme_chars: break else: # make sure "url" is not actually a port number (in which case # "scheme" is really part of the path) rest = url[i+1:] if not rest or any(c not in '0123456789' for c in rest): # not a port number scheme, url = url[:i].lower(), rest if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or (']' in netloc and '[' not in netloc)): raise ValueError("Invalid IPv6 URL") if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) _checknetloc(netloc) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v def urlunparse(data): """Put a parsed URL back together again. This may result in a slightly different, but equivalent URL, if the URL that was parsed originally had redundant delimiters, e.g. a ? with an empty query (the draft states that these are equivalent).""" scheme, netloc, url, params, query, fragment = data if params: url = "%s;%s" % (url, params) return urlunsplit((scheme, netloc, url, query, fragment)) def urlunsplit(data): """Combine the elements of a tuple as returned by urlsplit() into a complete URL as a string. The data argument can be any five-item iterable. This may result in a slightly different, but equivalent URL, if the URL that was parsed originally had unnecessary delimiters (for example, a ? with an empty query; the RFC states that these are equivalent).""" scheme, netloc, url, query, fragment = data if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): if url and url[:1] != '/': url = '/' + url url = '//' + (netloc or '') + url if scheme: url = scheme + ':' + url if query: url = url + '?' + query if fragment: url = url + '#' + fragment return url def urljoin(base, url, allow_fragments=True): """Join a base URL and a possibly relative URL to form an absolute interpretation of the latter.""" if not base: return url if not url: return base bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ urlparse(base, '', allow_fragments) scheme, netloc, path, params, query, fragment = \ urlparse(url, bscheme, allow_fragments) if scheme != bscheme or scheme not in uses_relative: return url if scheme in uses_netloc: if netloc: return urlunparse((scheme, netloc, path, params, query, fragment)) netloc = bnetloc if path[:1] == '/': return urlunparse((scheme, netloc, path, params, query, fragment)) if not path and not params: path = bpath params = bparams if not query: query = bquery return urlunparse((scheme, netloc, path, params, query, fragment)) segments = bpath.split('/')[:-1] + path.split('/') # XXX The stuff below is bogus in various ways... if segments[-1] == '.': segments[-1] = '' while '.' in segments: segments.remove('.') while 1: i = 1 n = len(segments) - 1 while i < n: if (segments[i] == '..' and segments[i-1] not in ('', '..')): del segments[i-1:i+1] break i = i+1 else: break if segments == ['', '..']: segments[-1] = '' elif len(segments) >= 2 and segments[-1] == '..': segments[-2:] = [''] return urlunparse((scheme, netloc, '/'.join(segments), params, query, fragment)) def urldefrag(url): """Removes any existing fragment from URL. Returns a tuple of the defragmented URL and the fragment. If the URL contained no fragments, the second element is the empty string. """ if '#' in url: s, n, p, a, q, frag = urlparse(url) defrag = urlunparse((s, n, p, a, q, '')) return defrag, frag else: return url, '' try: unicode except NameError: def _is_unicode(x): return 0 else: def _is_unicode(x): return isinstance(x, unicode) # unquote method for parse_qs and parse_qsl # Cannot use directly from urllib as it would create a circular reference # because urllib uses urlparse methods (urljoin). If you update this function, # update it also in urllib. This code duplication does not existin in Python3. _hexdig = '0123456789ABCDEFabcdef' _hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig) _asciire = re.compile('([\x00-\x7f]+)') def unquote(s): """unquote('abc%20def') -> 'abc def'.""" if _is_unicode(s): if '%' not in s: return s bits = _asciire.split(s) res = [bits[0]] append = res.append for i in range(1, len(bits), 2): append(unquote(str(bits[i])).decode('latin1')) append(bits[i + 1]) return ''.join(res) bits = s.split('%') # fastpath if len(bits) == 1: return s res = [bits[0]] append = res.append for item in bits[1:]: try: append(_hextochr[item[:2]]) append(item[2:]) except KeyError: append('%') append(item) return ''.join(res) def parse_qs(qs, keep_blank_values=0, strict_parsing=0): """Parse a query given as a string argument. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. """ dict = {} for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): if name in dict: dict[name].append(value) else: dict[name] = [value] return dict def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): """Parse a query given as a string argument. Arguments: qs: percent-encoded query string to be parsed keep_blank_values: flag indicating whether blank values in percent-encoded queries should be treated as blank strings. A true value indicates that blanks should be retained as blank strings. The default false value indicates that blank values are to be ignored and treated as if they were not included. strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. Returns a list, as G-d intended. """ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: if not name_value and not strict_parsing: continue nv = name_value.split('=', 1) if len(nv) != 2: if strict_parsing: raise ValueError, "bad query field: %r" % (name_value,) # Handle case of a control-name with no equal sign if keep_blank_values: nv.append('') else: continue if len(nv[1]) or keep_blank_values: name = unquote(nv[0].replace('+', ' ')) value = unquote(nv[1].replace('+', ' ')) r.append((name, value)) return r

Meilleur Casino en Ligne 2025 – Sites Fiables.6959

June 8, 2025 blog 0

Содержимое Les Meilleurs Casinos en Ligne pour les Joueurs Français Les Meilleurs Casinos en Ligne Légaux pour les Joueurs Français Les Meilleurs Casinos en Ligne Fiables pour les Joueurs Français Les Meilleurs Casinos en Ligne Gratuits pour les Joueurs Français Comment Choisir un Casino en Ligne Fiable et Sécurisé Meilleur …

Best UK Casino Sites 2025 Trusted Reviews and Top Picks.1075

June 8, 2025 blog 0

Best UK Casino Sites 2025 – Trusted Reviews and Top Picks ▶️ PLAY Содержимое Top 5 Online Casinos for UK Players How to Choose the Best UK Online Casino Game Selection Customer Support UK Online Casino Bonuses and Promotions Secure and Reliable UK Online Casinos In the ever-evolving world of …

Best UK Casino Sites 2025 Trusted Reviews and Top Picks.299

June 8, 2025 blog 0

Содержимое Top 5 Online Casinos in the UK Mastercard Casinos: A Secure and Convenient Option Apple Pay Casino: A Convenient and Secure Option Animal Slots: A Fun and Exciting Option Conclusion How to Choose the Best Online Casino for You UK Online Casino Regulations and Licenses Popular Payment Methods in …

Los casinos online más populares de España.1496

June 8, 2025 blog 0

Los casinos online más populares de España ▶️ JUGAR Содержимое Los casinos online más populares de España Casino online con bono sin depósito Casino online confiable La lista de los mejores casinos online de España Características clave para elegir el mejor casino online En la actualidad, los casinos online han …

Meilleur Casino en ligne 2025 – Classement complet.6745

June 8, 2025 blog 0

Содержимое Les meilleurs casinos en ligne pour jouer en 2025 Les casinos en ligne français légaux Les nouveaux casinos en ligne Comment choisir le meilleur casino en ligne pour vous Meilleur Casino en ligne 2025 – Classement complet Les casinos en ligne sont devenus très populaires ces dernières années, offrant …

Neue Online Casinos in Deutschland.346

June 8, 2025 blog 0

Neue Online Casinos in Deutschland ▶️ SPIELEN Содержимое Die Top 5 Neuen Online Casinos in Deutschland Wie funktionieren Online Casinos in Deutschland? Das Online Casino Testbericht Die Vorteile von Online Casinos in Deutschland Mobile Online Casinos in Deutschland Die Online-Casinos-Szene in Deutschland ist ständig in Bewegung. Neue Anbieter kommen auf …

Erfahrungen mit Online Casinos in Deutschland.21

June 8, 2025 blog 0

Erfahrungen mit Online Casinos in Deutschland ▶️ SPIELEN Содержимое Die Vorteile von Online-Casinos Flexibilität Wahlmöglichkeiten Sicherheit Die Bedenken und Risiken Die Risiken von Online-Casinos Wie wählt man das richtige Online-Casino aus? Top Online Casinos in Deutschland Online Casinos in Deutschland – Ein Überblick Die wichtigsten Regeln und Vorschriften für Online …

Safe Online Casinos in the UK 2025 Licensed and Regulated Sites.1506

June 8, 2025 blog 0

Safe Online Casinos in the UK 2025 – Licensed and Regulated Sites ▶️ PLAY Содержимое Top 5 Online Casinos in the UK 2025: Secure and Reliable Options How to Choose the Best Online Casino in the UK 2025: A Guide to Licensed and Regulated Sites What to Look for in …

Most Reliable Online Casinos in the UK 2025 Secure and Fun.1457

June 8, 2025 blog 0

Most Reliable Online Casinos in the UK 2025 – Secure and Fun ▶️ PLAY Содержимое Top-Rated Online Casinos for UK Players Apple Pay Casinos: A New Era in Online Gaming Animal Slots: A Fun and Exciting Way to Play How to Choose the Best Online Casino for Your Needs Additional …

Meilleur Casino en ligne 2025 – Classement complet.4306

June 8, 2025 blog 0

Содержимое Les meilleurs casinos en ligne pour jouer en 2025 Comment choisir le meilleur casino en ligne pour vous Les critères pour choisir le meilleur casino en ligne Meilleur Casino en ligne 2025 – Classement complet Les casinos en ligne sont devenus très populaires ces dernières années, offrant une expérience …