util.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. """
  2. General utilities.
  3. MIT license.
  4. Copyright (c) 2017 Isaac Muse <isaacmuse@gmail.com>
  5. """
  6. from __future__ import unicode_literals
  7. import sys
  8. import copy
  9. import re
  10. PY3 = sys.version_info >= (3, 0)
  11. PY34 = sys.version_info >= (3, 4)
  12. if PY3:
  13. uchr = chr # noqa
  14. from urllib.request import pathname2url, url2pathname # noqa
  15. from urllib.parse import urlparse, urlunparse, quote # noqa
  16. from html.parser import HTMLParser # noqa
  17. if PY34:
  18. import html # noqa
  19. html_unescape = html.unescape # noqa
  20. else: # pragma: no cover
  21. html_unescape = HTMLParser().unescape # noqa
  22. else:
  23. uchr = unichr # noqa
  24. from urllib import pathname2url, url2pathname, quote # noqa
  25. from urlparse import urlparse, urlunparse # noqa
  26. from HTMLParser import HTMLParser # noqa
  27. html_unescape = HTMLParser().unescape # noqa
  28. RE_WIN_DRIVE_LETTER = re.compile(r"^[A-Za-z]$")
  29. RE_WIN_DRIVE_PATH = re.compile(r"^[A-Za-z]:(?:\\.*)?$")
  30. RE_URL = re.compile('(http|ftp)s?|data|mailto|tel|news')
  31. IS_NARROW = sys.maxunicode == 0xFFFF
  32. if IS_NARROW:
  33. def get_code_points(s):
  34. """Get the Unicode code points."""
  35. pt = []
  36. def is_full_point(p, point):
  37. """
  38. Check if we have a full code point.
  39. Surrogates are stored in point.
  40. """
  41. v = ord(p)
  42. if 0xD800 <= v <= 0xDBFF:
  43. del point[:]
  44. point.append(p)
  45. return False
  46. if point and 0xDC00 <= v <= 0xDFFF:
  47. point.append(p)
  48. return True
  49. del point[:]
  50. return True
  51. return [(''.join(pt) if pt else c) for c in s if is_full_point(c, pt)]
  52. def get_ord(c):
  53. """Get Unicode ord."""
  54. if len(c) == 2:
  55. high, low = [ord(p) for p in c]
  56. ordinal = (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000
  57. else:
  58. ordinal = ord(c)
  59. return ordinal
  60. def get_char(value):
  61. """Get the Unicode char."""
  62. if value > 0xFFFF:
  63. c = ''.join(
  64. [
  65. uchr(int((value - 0x10000) / (0x400)) + 0xD800),
  66. uchr((value - 0x10000) % 0x400 + 0xDC00)
  67. ]
  68. )
  69. else:
  70. c = uchr(value)
  71. return c
  72. else:
  73. def get_code_points(s):
  74. """Get the Unicode code points."""
  75. return [c for c in s]
  76. def get_ord(c):
  77. """Get Unicode ord."""
  78. return ord(c)
  79. def get_char(value):
  80. """Get the Unicode char."""
  81. return uchr(value)
  82. def escape_chars(md, echrs):
  83. """
  84. Add chars to the escape list.
  85. Don't just append as it modifies the global list permanently.
  86. Make a copy and extend **that** copy so that only this Markdown
  87. instance gets modified.
  88. """
  89. escaped = copy.copy(md.ESCAPED_CHARS)
  90. for ec in echrs:
  91. if ec not in escaped:
  92. escaped.append(ec)
  93. md.ESCAPED_CHARS = escaped
  94. def parse_url(url):
  95. """
  96. Parse the URL.
  97. Try to determine if the following is a file path or
  98. (as we will call anything else) a URL.
  99. We return it slightly modified and combine the path parts.
  100. We also assume if we see something like c:/ it is a Windows path.
  101. We don't bother checking if this **is** a Windows system, but
  102. 'nix users really shouldn't be creating weird names like c: for their folder.
  103. """
  104. is_url = False
  105. is_absolute = False
  106. scheme, netloc, path, params, query, fragment = urlparse(html_unescape(url))
  107. if RE_URL.match(scheme):
  108. # Clearly a url
  109. is_url = True
  110. elif scheme == '' and netloc == '' and path == '':
  111. # Maybe just a url fragment
  112. is_url = True
  113. elif scheme == 'file' and (RE_WIN_DRIVE_PATH.match(netloc)):
  114. # file://c:/path or file://c:\path
  115. path = '/' + (netloc + path).replace('\\', '/')
  116. netloc = ''
  117. is_absolute = True
  118. elif scheme == 'file' and netloc.startswith('\\'):
  119. # file://\c:\path or file://\\path
  120. path = (netloc + path).replace('\\', '/')
  121. netloc = ''
  122. is_absolute = True
  123. elif scheme == 'file':
  124. # file:///path
  125. is_absolute = True
  126. elif RE_WIN_DRIVE_LETTER.match(scheme):
  127. # c:/path
  128. path = '/%s:%s' % (scheme, path.replace('\\', '/'))
  129. scheme = 'file'
  130. netloc = ''
  131. is_absolute = True
  132. elif scheme == '' and netloc != '' and url.startswith('//'):
  133. # //file/path
  134. path = '//' + netloc + path
  135. scheme = 'file'
  136. netloc = ''
  137. is_absolute = True
  138. elif scheme != '' and netloc != '':
  139. # A non-filepath or strange url
  140. is_url = True
  141. elif path.startswith(('/', '\\')):
  142. # /root path
  143. is_absolute = True
  144. return (scheme, netloc, path, params, query, fragment, is_url, is_absolute)
  145. class PymdownxDeprecationWarning(UserWarning): # pragma: no cover
  146. """Deprecation warning for Pymdownx that is not hidden."""