opt
/
hc_python
/
lib
/
python3.12
/
site-packages
/
lxml
/
Go to Home Directory
+
Upload
Create File
root@0UT1S:~$
Execute
By Order of Mr.0UT1S
[DIR] ..
N/A
[DIR] __pycache__
N/A
[DIR] html
N/A
[DIR] includes
N/A
[DIR] isoschematron
N/A
ElementInclude.py
8.36 KB
Rename
Delete
__init__.py
574 bytes
Rename
Delete
_elementpath.cpython-312-x86_64-linux-gnu.so
200.19 KB
Rename
Delete
_elementpath.py
10.63 KB
Rename
Delete
apihelpers.pxi
62.12 KB
Rename
Delete
builder.cpython-312-x86_64-linux-gnu.so
113.80 KB
Rename
Delete
builder.py
7.91 KB
Rename
Delete
classlookup.pxi
21.91 KB
Rename
Delete
cleanup.pxi
8.26 KB
Rename
Delete
cssselect.py
3.23 KB
Rename
Delete
debug.pxi
3.21 KB
Rename
Delete
docloader.pxi
5.64 KB
Rename
Delete
doctestcompare.py
17.32 KB
Rename
Delete
dtd.pxi
14.92 KB
Rename
Delete
etree.cpython-312-x86_64-linux-gnu.so
5.11 MB
Rename
Delete
etree.h
9.68 KB
Rename
Delete
etree.pyx
131.36 KB
Rename
Delete
etree_api.h
16.66 KB
Rename
Delete
extensions.pxi
31.34 KB
Rename
Delete
iterparse.pxi
16.13 KB
Rename
Delete
lxml.etree.h
9.68 KB
Rename
Delete
lxml.etree_api.h
16.67 KB
Rename
Delete
nsclasses.pxi
8.92 KB
Rename
Delete
objectify.cpython-312-x86_64-linux-gnu.so
2.94 MB
Rename
Delete
objectify.pyx
73.96 KB
Rename
Delete
objectpath.pxi
11.18 KB
Rename
Delete
parser.pxi
80.12 KB
Rename
Delete
parsertarget.pxi
6.18 KB
Rename
Delete
proxy.pxi
23.14 KB
Rename
Delete
public-api.pxi
6.51 KB
Rename
Delete
pyclasslookup.py
92 bytes
Rename
Delete
readonlytree.pxi
18.53 KB
Rename
Delete
relaxng.pxi
6.19 KB
Rename
Delete
sax.cpython-312-x86_64-linux-gnu.so
181.68 KB
Rename
Delete
sax.py
9.08 KB
Rename
Delete
saxparser.pxi
32.54 KB
Rename
Delete
schematron.pxi
5.77 KB
Rename
Delete
serializer.pxi
64.21 KB
Rename
Delete
usedoctest.py
230 bytes
Rename
Delete
xinclude.pxi
2.40 KB
Rename
Delete
xmlerror.pxi
48.69 KB
Rename
Delete
xmlid.pxi
5.93 KB
Rename
Delete
xmlschema.pxi
8.29 KB
Rename
Delete
xpath.pxi
18.68 KB
Rename
Delete
xslt.pxi
35.18 KB
Rename
Delete
xsltext.pxi
10.83 KB
Rename
Delete
""" lxml-based doctest output comparison. Note: normally, you should just import the `lxml.usedoctest` and `lxml.html.usedoctest` modules from within a doctest, instead of this one:: >>> import lxml.usedoctest # for XML output >>> import lxml.html.usedoctest # for HTML output To use this module directly, you must call ``lxmldoctest.install()``, which will cause doctest to use this in all subsequent calls. This changes the way output is checked and comparisons are made for XML or HTML-like content. XML or HTML content is noticed because the example starts with ``<`` (it's HTML if it starts with ``<html``). You can also use the ``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing. Some rough wildcard-like things are allowed. Whitespace is generally ignored (except in attributes). In text (attributes and text in the body) you can use ``...`` as a wildcard. In an example it also matches any trailing tags in the element, though it does not match leading tags. You may create a tag ``<any>`` or include an ``any`` attribute in the tag. An ``any`` tag matches any tag, while the attribute matches any and all attributes. When a match fails, the reformatted example and gotten text is displayed (indented), and a rough diff-like output is given. Anything marked with ``+`` is in the output but wasn't supposed to be, and similarly ``-`` means its in the example but wasn't in the output. You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` """ from lxml import etree import sys import re import doctest try: from html import escape as html_escape except ImportError: from cgi import escape as html_escape __all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker', 'LHTMLOutputChecker', 'install', 'temp_install'] PARSE_HTML = doctest.register_optionflag('PARSE_HTML') PARSE_XML = doctest.register_optionflag('PARSE_XML') NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP') OutputChecker = doctest.OutputChecker def strip(v): if v is None: return None else: return v.strip() def norm_whitespace(v): return _norm_whitespace_re.sub(' ', v) _html_parser = etree.HTMLParser(recover=False, remove_blank_text=True) def html_fromstring(html): return etree.fromstring(html, _html_parser) # We use this to distinguish repr()s from elements: _repr_re = re.compile(r'^<[^>]+ (at|object) ') _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+') class LXMLOutputChecker(OutputChecker): empty_tags = ( 'param', 'img', 'area', 'br', 'basefont', 'input', 'base', 'meta', 'link', 'col') def get_default_parser(self): return etree.XML def check_output(self, want, got, optionflags): alt_self = getattr(self, '_temp_override_self', None) if alt_self is not None: super_method = self._temp_call_super_check_output self = alt_self else: super_method = OutputChecker.check_output parser = self.get_parser(want, got, optionflags) if not parser: return super_method( self, want, got, optionflags) try: want_doc = parser(want) except etree.XMLSyntaxError: return False try: got_doc = parser(got) except etree.XMLSyntaxError: return False return self.compare_docs(want_doc, got_doc) def get_parser(self, want, got, optionflags): parser = None if NOPARSE_MARKUP & optionflags: return None if PARSE_HTML & optionflags: parser = html_fromstring elif PARSE_XML & optionflags: parser = etree.XML elif (want.strip().lower().startswith('<html') and got.strip().startswith('<html')): parser = html_fromstring elif (self._looks_like_markup(want) and self._looks_like_markup(got)): parser = self.get_default_parser() return parser def _looks_like_markup(self, s): s = s.strip() return (s.startswith('<') and not _repr_re.search(s)) def compare_docs(self, want, got): if not self.tag_compare(want.tag, got.tag): return False if not self.text_compare(want.text, got.text, True): return False if not self.text_compare(want.tail, got.tail, True): return False if 'any' not in want.attrib: want_keys = sorted(want.attrib.keys()) got_keys = sorted(got.attrib.keys()) if want_keys != got_keys: return False for key in want_keys: if not self.text_compare(want.attrib[key], got.attrib[key], False): return False if want.text != '...' or len(want): want_children = list(want) got_children = list(got) while want_children or got_children: if not want_children or not got_children: return False want_first = want_children.pop(0) got_first = got_children.pop(0) if not self.compare_docs(want_first, got_first): return False if not got_children and want_first.tail == '...': break return True def text_compare(self, want, got, strip): want = want or '' got = got or '' if strip: want = norm_whitespace(want).strip() got = norm_whitespace(got).strip() want = '^%s$' % re.escape(want) want = want.replace(r'\.\.\.', '.*') if re.search(want, got): return True else: return False def tag_compare(self, want, got): if want == 'any': return True if (not isinstance(want, (str, bytes)) or not isinstance(got, (str, bytes))): return want == got want = want or '' got = got or '' if want.startswith('{...}'): # Ellipsis on the namespace return want.split('}')[-1] == got.split('}')[-1] else: return want == got def output_difference(self, example, got, optionflags): want = example.want parser = self.get_parser(want, got, optionflags) errors = [] if parser is not None: try: want_doc = parser(want) except etree.XMLSyntaxError: e = sys.exc_info()[1] errors.append('In example: %s' % e) try: got_doc = parser(got) except etree.XMLSyntaxError: e = sys.exc_info()[1] errors.append('In actual output: %s' % e) if parser is None or errors: value = OutputChecker.output_difference( self, example, got, optionflags) if errors: errors.append(value) return '\n'.join(errors) else: return value html = parser is html_fromstring diff_parts = ['Expected:', self.format_doc(want_doc, html, 2), 'Got:', self.format_doc(got_doc, html, 2), 'Diff:', self.collect_diff(want_doc, got_doc, html, 2)] return '\n'.join(diff_parts) def html_empty_tag(self, el, html=True): if not html: return False if el.tag not in self.empty_tags: return False if el.text or len(el): # This shouldn't happen (contents in an empty tag) return False return True def format_doc(self, doc, html, indent, prefix=''): parts = [] if not len(doc): # No children... parts.append(' '*indent) parts.append(prefix) parts.append(self.format_tag(doc)) if not self.html_empty_tag(doc, html): if strip(doc.text): parts.append(self.format_text(doc.text)) parts.append(self.format_end_tag(doc)) if strip(doc.tail): parts.append(self.format_text(doc.tail)) parts.append('\n') return ''.join(parts) parts.append(' '*indent) parts.append(prefix) parts.append(self.format_tag(doc)) if not self.html_empty_tag(doc, html): parts.append('\n') if strip(doc.text): parts.append(' '*indent) parts.append(self.format_text(doc.text)) parts.append('\n') for el in doc: parts.append(self.format_doc(el, html, indent+2)) parts.append(' '*indent) parts.append(self.format_end_tag(doc)) parts.append('\n') if strip(doc.tail): parts.append(' '*indent) parts.append(self.format_text(doc.tail)) parts.append('\n') return ''.join(parts) def format_text(self, text, strip=True): if text is None: return '' if strip: text = text.strip() return html_escape(text, 1) def format_tag(self, el): attrs = [] if isinstance(el, etree.CommentBase): # FIXME: probably PIs should be handled specially too? return '<!--' for name, value in sorted(el.attrib.items()): attrs.append('%s="%s"' % (name, self.format_text(value, False))) if not attrs: return '<%s>' % el.tag return '<%s %s>' % (el.tag, ' '.join(attrs)) def format_end_tag(self, el): if isinstance(el, etree.CommentBase): # FIXME: probably PIs should be handled specially too? return '-->' return '</%s>' % el.tag def collect_diff(self, want, got, html, indent): parts = [] if not len(want) and not len(got): parts.append(' '*indent) parts.append(self.collect_diff_tag(want, got)) if not self.html_empty_tag(got, html): parts.append(self.collect_diff_text(want.text, got.text)) parts.append(self.collect_diff_end_tag(want, got)) parts.append(self.collect_diff_text(want.tail, got.tail)) parts.append('\n') return ''.join(parts) parts.append(' '*indent) parts.append(self.collect_diff_tag(want, got)) parts.append('\n') if strip(want.text) or strip(got.text): parts.append(' '*indent) parts.append(self.collect_diff_text(want.text, got.text)) parts.append('\n') want_children = list(want) got_children = list(got) while want_children or got_children: if not want_children: parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+')) continue if not got_children: parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-')) continue parts.append(self.collect_diff( want_children.pop(0), got_children.pop(0), html, indent+2)) parts.append(' '*indent) parts.append(self.collect_diff_end_tag(want, got)) parts.append('\n') if strip(want.tail) or strip(got.tail): parts.append(' '*indent) parts.append(self.collect_diff_text(want.tail, got.tail)) parts.append('\n') return ''.join(parts) def collect_diff_tag(self, want, got): if not self.tag_compare(want.tag, got.tag): tag = '%s (got: %s)' % (want.tag, got.tag) else: tag = got.tag attrs = [] any = want.tag == 'any' or 'any' in want.attrib for name, value in sorted(got.attrib.items()): if name not in want.attrib and not any: attrs.append('+%s="%s"' % (name, self.format_text(value, False))) else: if name in want.attrib: text = self.collect_diff_text(want.attrib[name], value, False) else: text = self.format_text(value, False) attrs.append('%s="%s"' % (name, text)) if not any: for name, value in sorted(want.attrib.items()): if name in got.attrib: continue attrs.append('-%s="%s"' % (name, self.format_text(value, False))) if attrs: tag = '<%s %s>' % (tag, ' '.join(attrs)) else: tag = '<%s>' % tag return tag def collect_diff_end_tag(self, want, got): if want.tag != got.tag: tag = '%s (got: %s)' % (want.tag, got.tag) else: tag = got.tag return '</%s>' % tag def collect_diff_text(self, want, got, strip=True): if self.text_compare(want, got, strip): if not got: return '' return self.format_text(got, strip) text = '%s (got: %s)' % (want, got) return self.format_text(text, strip) class LHTMLOutputChecker(LXMLOutputChecker): def get_default_parser(self): return html_fromstring def install(html=False): """ Install doctestcompare for all future doctests. If html is true, then by default the HTML parser will be used; otherwise the XML parser is used. """ if html: doctest.OutputChecker = LHTMLOutputChecker else: doctest.OutputChecker = LXMLOutputChecker def temp_install(html=False, del_module=None): """ Use this *inside* a doctest to enable this checker for this doctest only. If html is true, then by default the HTML parser will be used; otherwise the XML parser is used. """ if html: Checker = LHTMLOutputChecker else: Checker = LXMLOutputChecker frame = _find_doctest_frame() dt_self = frame.f_locals['self'] checker = Checker() old_checker = dt_self._checker dt_self._checker = checker # The unfortunate thing is that there is a local variable 'check' # in the function that runs the doctests, that is a bound method # into the output checker. We have to update that. We can't # modify the frame, so we have to modify the object in place. The # only way to do this is to actually change the func_code # attribute of the method. We change it, and then wait for # __record_outcome to be run, which signals the end of the __run # method, at which point we restore the previous check_output # implementation. check_func = frame.f_locals['check'].__func__ checker_check_func = checker.check_output.__func__ # Because we can't patch up func_globals, this is the only global # in check_output that we care about: doctest.etree = etree _RestoreChecker(dt_self, old_checker, checker, check_func, checker_check_func, del_module) class _RestoreChecker: def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func, del_module): self.dt_self = dt_self self.checker = old_checker self.checker._temp_call_super_check_output = self.call_super self.checker._temp_override_self = new_checker self.check_func = check_func self.clone_func = clone_func self.del_module = del_module self.install_clone() self.install_dt_self() def install_clone(self): self.func_code = self.check_func.__code__ self.func_globals = self.check_func.__globals__ self.check_func.__code__ = self.clone_func.__code__ def uninstall_clone(self): self.check_func.__code__ = self.func_code def install_dt_self(self): self.prev_func = self.dt_self._DocTestRunner__record_outcome self.dt_self._DocTestRunner__record_outcome = self def uninstall_dt_self(self): self.dt_self._DocTestRunner__record_outcome = self.prev_func def uninstall_module(self): if self.del_module: import sys del sys.modules[self.del_module] if '.' in self.del_module: package, module = self.del_module.rsplit('.', 1) package_mod = sys.modules[package] delattr(package_mod, module) def __call__(self, *args, **kw): self.uninstall_clone() self.uninstall_dt_self() del self.checker._temp_override_self del self.checker._temp_call_super_check_output result = self.prev_func(*args, **kw) self.uninstall_module() return result def call_super(self, *args, **kw): self.uninstall_clone() try: return self.check_func(*args, **kw) finally: self.install_clone() def _find_doctest_frame(): import sys frame = sys._getframe(1) while frame: l = frame.f_locals if 'BOOM' in l: # Sign of doctest return frame frame = frame.f_back raise LookupError( "Could not find doctest (only use this function *inside* a doctest)") __test__ = { 'basic': ''' >>> temp_install() >>> print """<xml a="1" b="2">stuff</xml>""" <xml b="2" a="1">...</xml> >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>""" <xml xmlns="..."> <tag attr="..." /> </xml> >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS <xml>...foo /></xml> '''} if __name__ == '__main__': import doctest doctest.testmod()
Save