123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180 |
- """
- General utilities.
- MIT license.
- Copyright (c) 2017 Isaac Muse <isaacmuse@gmail.com>
- """
- from __future__ import unicode_literals
- import sys
- import copy
- import re
- PY3 = sys.version_info >= (3, 0)
- PY34 = sys.version_info >= (3, 4)
- if PY3:
- uchr = chr # noqa
- from urllib.request import pathname2url, url2pathname # noqa
- from urllib.parse import urlparse, urlunparse, quote # noqa
- from html.parser import HTMLParser # noqa
- if PY34:
- import html # noqa
- html_unescape = html.unescape # noqa
- else: # pragma: no cover
- html_unescape = HTMLParser().unescape # noqa
- else:
- uchr = unichr # noqa
- from urllib import pathname2url, url2pathname, quote # noqa
- from urlparse import urlparse, urlunparse # noqa
- from HTMLParser import HTMLParser # noqa
- html_unescape = HTMLParser().unescape # noqa
- RE_WIN_DRIVE_LETTER = re.compile(r"^[A-Za-z]$")
- RE_WIN_DRIVE_PATH = re.compile(r"^[A-Za-z]:(?:\\.*)?$")
- RE_URL = re.compile('(http|ftp)s?|data|mailto|tel|news')
- IS_NARROW = sys.maxunicode == 0xFFFF
- if IS_NARROW:
- def get_code_points(s):
- """Get the Unicode code points."""
- pt = []
- def is_full_point(p, point):
- """
- Check if we have a full code point.
- Surrogates are stored in point.
- """
- v = ord(p)
- if 0xD800 <= v <= 0xDBFF:
- del point[:]
- point.append(p)
- return False
- if point and 0xDC00 <= v <= 0xDFFF:
- point.append(p)
- return True
- del point[:]
- return True
- return [(''.join(pt) if pt else c) for c in s if is_full_point(c, pt)]
- def get_ord(c):
- """Get Unicode ord."""
- if len(c) == 2:
- high, low = [ord(p) for p in c]
- ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
- else:
- ordinal = ord(c)
- return ordinal
- def get_char(value):
- """Get the Unicode char."""
- if value > 0xFFFF:
- c = ''.join(
- [
- uchr(int((value - 0x10000) / (0x400)) + 0xD800),
- uchr((value - 0x10000) % 0x400 + 0xDC00)
- ]
- )
- else:
- c = uchr(value)
- return c
- else:
- def get_code_points(s):
- """Get the Unicode code points."""
- return [c for c in s]
- def get_ord(c):
- """Get Unicode ord."""
- return ord(c)
- def get_char(value):
- """Get the Unicode char."""
- return uchr(value)
- def escape_chars(md, echrs):
- """
- Add chars to the escape list.
- Don't just append as it modifies the global list permanently.
- Make a copy and extend **that** copy so that only this Markdown
- instance gets modified.
- """
- escaped = copy.copy(md.ESCAPED_CHARS)
- for ec in echrs:
- if ec not in escaped:
- escaped.append(ec)
- md.ESCAPED_CHARS = escaped
- def parse_url(url):
- """
- Parse the URL.
- Try to determine if the following is a file path or
- (as we will call anything else) a URL.
- We return it slightly modified and combine the path parts.
- We also assume if we see something like c:/ it is a Windows path.
- We don't bother checking if this **is** a Windows system, but
- 'nix users really shouldn't be creating weird names like c: for their folder.
- """
- is_url = False
- is_absolute = False
- scheme, netloc, path, params, query, fragment = urlparse(html_unescape(url))
- if RE_URL.match(scheme):
- # Clearly a url
- is_url = True
- elif scheme == '' and netloc == '' and path == '':
- # Maybe just a url fragment
- is_url = True
- elif scheme == 'file' and (RE_WIN_DRIVE_PATH.match(netloc)):
- # file://c:/path or file://c:\path
- path = '/' + (netloc + path).replace('\\', '/')
- netloc = ''
- is_absolute = True
- elif scheme == 'file' and netloc.startswith('\\'):
- # file://\c:\path or file://\\path
- path = (netloc + path).replace('\\', '/')
- netloc = ''
- is_absolute = True
- elif scheme == 'file':
- # file:///path
- is_absolute = True
- elif RE_WIN_DRIVE_LETTER.match(scheme):
- # c:/path
- path = '/%s:%s' % (scheme, path.replace('\\', '/'))
- scheme = 'file'
- netloc = ''
- is_absolute = True
- elif scheme == '' and netloc != '' and url.startswith('//'):
- # //file/path
- path = '//' + netloc + path
- scheme = 'file'
- netloc = ''
- is_absolute = True
- elif scheme != '' and netloc != '':
- # A non-filepath or strange url
- is_url = True
- elif path.startswith(('/', '\\')):
- # /root path
- is_absolute = True
- return (scheme, netloc, path, params, query, fragment, is_url, is_absolute)
- class PymdownxDeprecationWarning(UserWarning): # pragma: no cover
- """Deprecation warning for Pymdownx that is not hidden."""
|