lib64
/
python3.8
/
Go to Home Directory
+
Upload
Create File
root@0UT1S:~$
Execute
By Order of Mr.0UT1S
[DIR] ..
N/A
[DIR] __pycache__
N/A
[DIR] asyncio
N/A
[DIR] collections
N/A
[DIR] concurrent
N/A
[DIR] config-3.8-x86_64-linux-gnu
N/A
[DIR] ctypes
N/A
[DIR] curses
N/A
[DIR] dbm
N/A
[DIR] distutils
N/A
[DIR] email
N/A
[DIR] encodings
N/A
[DIR] ensurepip
N/A
[DIR] html
N/A
[DIR] http
N/A
[DIR] importlib
N/A
[DIR] json
N/A
[DIR] lib-dynload
N/A
[DIR] lib2to3
N/A
[DIR] logging
N/A
[DIR] multiprocessing
N/A
[DIR] pydoc_data
N/A
[DIR] site-packages
N/A
[DIR] sqlite3
N/A
[DIR] tkinter
N/A
[DIR] turtledemo
N/A
[DIR] unittest
N/A
[DIR] urllib
N/A
[DIR] venv
N/A
[DIR] wsgiref
N/A
[DIR] xml
N/A
[DIR] xmlrpc
N/A
LICENSE.txt
13.61 KB
Rename
Delete
__future__.py
5.03 KB
Rename
Delete
__phello__.foo.py
64 bytes
Rename
Delete
_bootlocale.py
1.76 KB
Rename
Delete
_collections_abc.py
25.49 KB
Rename
Delete
_compat_pickle.py
8.54 KB
Rename
Delete
_compression.py
5.21 KB
Rename
Delete
_dummy_thread.py
5.89 KB
Rename
Delete
_markupbase.py
14.26 KB
Rename
Delete
_osx_support.py
21.26 KB
Rename
Delete
_py_abc.py
6.04 KB
Rename
Delete
_pydecimal.py
223.31 KB
Rename
Delete
_pyio.py
90.99 KB
Rename
Delete
_sitebuiltins.py
3.04 KB
Rename
Delete
_strptime.py
24.68 KB
Rename
Delete
_sysconfigdata__linux_x86_64-linux-gnu.py
37.61 KB
Rename
Delete
_sysconfigdata_d_linux_x86_64-linux-gnu.py
37.34 KB
Rename
Delete
_threading_local.py
7.05 KB
Rename
Delete
_weakrefset.py
5.60 KB
Rename
Delete
abc.py
4.38 KB
Rename
Delete
aifc.py
32.04 KB
Rename
Delete
antigravity.py
477 bytes
Rename
Delete
argparse.py
93.76 KB
Rename
Delete
ast.py
18.78 KB
Rename
Delete
asynchat.py
11.06 KB
Rename
Delete
asyncore.py
19.62 KB
Rename
Delete
base64.py
19.90 KB
Rename
Delete
bdb.py
31.30 KB
Rename
Delete
binhex.py
13.63 KB
Rename
Delete
bisect.py
2.16 KB
Rename
Delete
bz2.py
12.26 KB
Rename
Delete
cProfile.py
6.85 KB
Rename
Delete
calendar.py
24.25 KB
Rename
Delete
cgi.py
33.14 KB
Rename
Delete
cgitb.py
11.81 KB
Rename
Delete
chunk.py
5.31 KB
Rename
Delete
cmd.py
14.51 KB
Rename
Delete
code.py
10.37 KB
Rename
Delete
codecs.py
35.81 KB
Rename
Delete
codeop.py
6.18 KB
Rename
Delete
colorsys.py
3.97 KB
Rename
Delete
compileall.py
13.36 KB
Rename
Delete
configparser.py
53.10 KB
Rename
Delete
contextlib.py
24.41 KB
Rename
Delete
contextvars.py
129 bytes
Rename
Delete
copy.py
8.46 KB
Rename
Delete
copyreg.py
6.97 KB
Rename
Delete
crypt.py
3.53 KB
Rename
Delete
csv.py
15.77 KB
Rename
Delete
dataclasses.py
48.80 KB
Rename
Delete
datetime.py
86.22 KB
Rename
Delete
decimal.py
320 bytes
Rename
Delete
difflib.py
82.09 KB
Rename
Delete
dis.py
20.09 KB
Rename
Delete
doctest.py
102.09 KB
Rename
Delete
dummy_threading.py
2.75 KB
Rename
Delete
enum.py
37.24 KB
Rename
Delete
filecmp.py
9.60 KB
Rename
Delete
fileinput.py
14.36 KB
Rename
Delete
fnmatch.py
3.98 KB
Rename
Delete
formatter.py
14.79 KB
Rename
Delete
fractions.py
23.76 KB
Rename
Delete
ftplib.py
34.31 KB
Rename
Delete
functools.py
36.53 KB
Rename
Delete
genericpath.py
4.86 KB
Rename
Delete
getopt.py
7.31 KB
Rename
Delete
getpass.py
5.85 KB
Rename
Delete
gettext.py
26.50 KB
Rename
Delete
glob.py
5.56 KB
Rename
Delete
gzip.py
20.91 KB
Rename
Delete
hashlib.py
8.14 KB
Rename
Delete
heapq.py
22.34 KB
Rename
Delete
hmac.py
7.67 KB
Rename
Delete
imaplib.py
52.35 KB
Rename
Delete
imghdr.py
3.72 KB
Rename
Delete
imp.py
10.29 KB
Rename
Delete
inspect.py
115.77 KB
Rename
Delete
io.py
3.46 KB
Rename
Delete
ipaddress.py
69.96 KB
Rename
Delete
keyword.py
945 bytes
Rename
Delete
linecache.py
5.21 KB
Rename
Delete
locale.py
76.36 KB
Rename
Delete
lzma.py
12.68 KB
Rename
Delete
mailbox.py
76.82 KB
Rename
Delete
mailcap.py
8.85 KB
Rename
Delete
mimetypes.py
21.16 KB
Rename
Delete
modulefinder.py
23.86 KB
Rename
Delete
netrc.py
5.44 KB
Rename
Delete
nntplib.py
42.25 KB
Rename
Delete
ntpath.py
27.08 KB
Rename
Delete
nturl2path.py
2.82 KB
Rename
Delete
numbers.py
10.00 KB
Rename
Delete
opcode.py
5.67 KB
Rename
Delete
operator.py
10.46 KB
Rename
Delete
optparse.py
58.95 KB
Rename
Delete
os.py
38.08 KB
Rename
Delete
pathlib.py
51.38 KB
Rename
Delete
pdb.py
61.27 KB
Rename
Delete
pickle.py
62.96 KB
Rename
Delete
pickletools.py
91.29 KB
Rename
Delete
pipes.py
8.71 KB
Rename
Delete
pkgutil.py
21.00 KB
Rename
Delete
platform.py
39.48 KB
Rename
Delete
plistlib.py
31.46 KB
Rename
Delete
poplib.py
14.72 KB
Rename
Delete
posixpath.py
15.26 KB
Rename
Delete
pprint.py
20.98 KB
Rename
Delete
profile.py
22.99 KB
Rename
Delete
pstats.py
26.70 KB
Rename
Delete
pty.py
4.69 KB
Rename
Delete
py_compile.py
8.01 KB
Rename
Delete
pyclbr.py
14.90 KB
Rename
Delete
pydoc.py
104.20 KB
Rename
Delete
queue.py
11.09 KB
Rename
Delete
quopri.py
7.08 KB
Rename
Delete
random.py
28.13 KB
Rename
Delete
re.py
15.49 KB
Rename
Delete
reprlib.py
5.14 KB
Rename
Delete
rlcompleter.py
6.93 KB
Rename
Delete
runpy.py
11.77 KB
Rename
Delete
sched.py
6.29 KB
Rename
Delete
secrets.py
1.99 KB
Rename
Delete
selectors.py
18.13 KB
Rename
Delete
shelve.py
8.33 KB
Rename
Delete
shlex.py
13.01 KB
Rename
Delete
shutil.py
50.55 KB
Rename
Delete
signal.py
2.22 KB
Rename
Delete
site.py
21.33 KB
Rename
Delete
smtpd.py
33.90 KB
Rename
Delete
smtplib.py
43.95 KB
Rename
Delete
sndhdr.py
6.93 KB
Rename
Delete
socket.py
34.42 KB
Rename
Delete
socketserver.py
26.66 KB
Rename
Delete
sre_compile.py
26.07 KB
Rename
Delete
sre_constants.py
6.99 KB
Rename
Delete
sre_parse.py
39.29 KB
Rename
Delete
ssl.py
49.57 KB
Rename
Delete
stat.py
5.36 KB
Rename
Delete
statistics.py
38.76 KB
Rename
Delete
string.py
10.29 KB
Rename
Delete
stringprep.py
12.61 KB
Rename
Delete
struct.py
257 bytes
Rename
Delete
subprocess.py
76.42 KB
Rename
Delete
sunau.py
17.94 KB
Rename
Delete
symbol.py
2.06 KB
Rename
Delete
symtable.py
7.83 KB
Rename
Delete
sysconfig.py
24.31 KB
Rename
Delete
tabnanny.py
11.14 KB
Rename
Delete
tarfile.py
103.98 KB
Rename
Delete
telnetlib.py
22.71 KB
Rename
Delete
tempfile.py
26.89 KB
Rename
Delete
textwrap.py
18.95 KB
Rename
Delete
this.py
1003 bytes
Rename
Delete
threading.py
49.63 KB
Rename
Delete
timeit.py
13.16 KB
Rename
Delete
token.py
2.31 KB
Rename
Delete
tokenize.py
25.24 KB
Rename
Delete
trace.py
29.17 KB
Rename
Delete
traceback.py
23.06 KB
Rename
Delete
tracemalloc.py
16.68 KB
Rename
Delete
tty.py
879 bytes
Rename
Delete
turtle.py
140.35 KB
Rename
Delete
types.py
9.49 KB
Rename
Delete
typing.py
67.35 KB
Rename
Delete
uu.py
7.11 KB
Rename
Delete
uuid.py
29.80 KB
Rename
Delete
warnings.py
19.23 KB
Rename
Delete
wave.py
17.80 KB
Rename
Delete
weakref.py
20.89 KB
Rename
Delete
webbrowser.py
23.52 KB
Rename
Delete
xdrlib.py
5.77 KB
Rename
Delete
zipapp.py
7.36 KB
Rename
Delete
zipfile.py
85.67 KB
Rename
Delete
zipimport.py
30.04 KB
Rename
Delete
# # Secret Labs' Regular Expression Engine # # convert re-style regular expression to sre pattern # # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. # # See the sre.py file for information on usage and redistribution. # """Internal support module for sre""" # XXX: show string offset and offending character for all errors from sre_constants import * SPECIAL_CHARS = ".\\[{()*+?^$|" REPEAT_CHARS = "*+?{" DIGITS = frozenset("0123456789") OCTDIGITS = frozenset("01234567") HEXDIGITS = frozenset("0123456789abcdefABCDEF") ASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") WHITESPACE = frozenset(" \t\n\r\v\f") _REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT}) _UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY}) ESCAPES = { r"\a": (LITERAL, ord("\a")), r"\b": (LITERAL, ord("\b")), r"\f": (LITERAL, ord("\f")), r"\n": (LITERAL, ord("\n")), r"\r": (LITERAL, ord("\r")), r"\t": (LITERAL, ord("\t")), r"\v": (LITERAL, ord("\v")), r"\\": (LITERAL, ord("\\")) } CATEGORIES = { r"\A": (AT, AT_BEGINNING_STRING), # start of string r"\b": (AT, AT_BOUNDARY), r"\B": (AT, AT_NON_BOUNDARY), r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), r"\Z": (AT, AT_END_STRING), # end of string } FLAGS = { # standard flags "i": SRE_FLAG_IGNORECASE, "L": SRE_FLAG_LOCALE, "m": SRE_FLAG_MULTILINE, "s": SRE_FLAG_DOTALL, "x": SRE_FLAG_VERBOSE, # extensions "a": SRE_FLAG_ASCII, "t": SRE_FLAG_TEMPLATE, "u": SRE_FLAG_UNICODE, } TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE GLOBAL_FLAGS = SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE class Verbose(Exception): pass class State: # keeps track of state for parsing def __init__(self): self.flags = 0 self.groupdict = {} self.groupwidths = [None] # group 0 self.lookbehindgroups = None @property def groups(self): return len(self.groupwidths) def opengroup(self, name=None): gid = self.groups self.groupwidths.append(None) if self.groups > MAXGROUPS: raise error("too many groups") if name is not None: ogid = self.groupdict.get(name, None) if ogid is not None: raise error("redefinition of group name %r as group %d; " "was group %d" % (name, gid, ogid)) self.groupdict[name] = gid return gid def closegroup(self, gid, p): self.groupwidths[gid] = p.getwidth() def checkgroup(self, gid): return gid < self.groups and self.groupwidths[gid] is not None def checklookbehindgroup(self, gid, source): if self.lookbehindgroups is not None: if not self.checkgroup(gid): raise source.error('cannot refer to an open group') if gid >= self.lookbehindgroups: raise source.error('cannot refer to group defined in the same ' 'lookbehind subpattern') class SubPattern: # a subpattern, in intermediate form def __init__(self, state, data=None): self.state = state if data is None: data = [] self.data = data self.width = None def dump(self, level=0): nl = True seqtypes = (tuple, list) for op, av in self.data: print(level*" " + str(op), end='') if op is IN: # member sublanguage print() for op, a in av: print((level+1)*" " + str(op), a) elif op is BRANCH: print() for i, a in enumerate(av[1]): if i: print(level*" " + "OR") a.dump(level+1) elif op is GROUPREF_EXISTS: condgroup, item_yes, item_no = av print('', condgroup) item_yes.dump(level+1) if item_no: print(level*" " + "ELSE") item_no.dump(level+1) elif isinstance(av, seqtypes): nl = False for a in av: if isinstance(a, SubPattern): if not nl: print() a.dump(level+1) nl = True else: if not nl: print(' ', end='') print(a, end='') nl = False if not nl: print() else: print('', av) def __repr__(self): return repr(self.data) def __len__(self): return len(self.data) def __delitem__(self, index): del self.data[index] def __getitem__(self, index): if isinstance(index, slice): return SubPattern(self.state, self.data[index]) return self.data[index] def __setitem__(self, index, code): self.data[index] = code def insert(self, index, code): self.data.insert(index, code) def append(self, code): self.data.append(code) def getwidth(self): # determine the width (min, max) for this subpattern if self.width is not None: return self.width lo = hi = 0 for op, av in self.data: if op is BRANCH: i = MAXREPEAT - 1 j = 0 for av in av[1]: l, h = av.getwidth() i = min(i, l) j = max(j, h) lo = lo + i hi = hi + j elif op is CALL: i, j = av.getwidth() lo = lo + i hi = hi + j elif op is SUBPATTERN: i, j = av[-1].getwidth() lo = lo + i hi = hi + j elif op in _REPEATCODES: i, j = av[2].getwidth() lo = lo + i * av[0] hi = hi + j * av[1] elif op in _UNITCODES: lo = lo + 1 hi = hi + 1 elif op is GROUPREF: i, j = self.state.groupwidths[av] lo = lo + i hi = hi + j elif op is GROUPREF_EXISTS: i, j = av[1].getwidth() if av[2] is not None: l, h = av[2].getwidth() i = min(i, l) j = max(j, h) else: i = 0 lo = lo + i hi = hi + j elif op is SUCCESS: break self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) return self.width class Tokenizer: def __init__(self, string): self.istext = isinstance(string, str) self.string = string if not self.istext: string = str(string, 'latin1') self.decoded_string = string self.index = 0 self.next = None self.__next() def __next(self): index = self.index try: char = self.decoded_string[index] except IndexError: self.next = None return if char == "\\": index += 1 try: char += self.decoded_string[index] except IndexError: raise error("bad escape (end of pattern)", self.string, len(self.string) - 1) from None self.index = index + 1 self.next = char def match(self, char): if char == self.next: self.__next() return True return False def get(self): this = self.next self.__next() return this def getwhile(self, n, charset): result = '' for _ in range(n): c = self.next if c not in charset: break result += c self.__next() return result def getuntil(self, terminator, name): result = '' while True: c = self.next self.__next() if c is None: if not result: raise self.error("missing " + name) raise self.error("missing %s, unterminated name" % terminator, len(result)) if c == terminator: if not result: raise self.error("missing " + name, 1) break result += c return result @property def pos(self): return self.index - len(self.next or '') def tell(self): return self.index - len(self.next or '') def seek(self, index): self.index = index self.__next() def error(self, msg, offset=0): return error(msg, self.string, self.tell() - offset) def _class_escape(source, escape): # handle escape code inside character class code = ESCAPES.get(escape) if code: return code code = CATEGORIES.get(escape) if code and code[0] is IN: return code try: c = escape[1:2] if c == "x": # hexadecimal escape (exactly two digits) escape += source.getwhile(2, HEXDIGITS) if len(escape) != 4: raise source.error("incomplete escape %s" % escape, len(escape)) return LITERAL, int(escape[2:], 16) elif c == "u" and source.istext: # unicode escape (exactly four digits) escape += source.getwhile(4, HEXDIGITS) if len(escape) != 6: raise source.error("incomplete escape %s" % escape, len(escape)) return LITERAL, int(escape[2:], 16) elif c == "U" and source.istext: # unicode escape (exactly eight digits) escape += source.getwhile(8, HEXDIGITS) if len(escape) != 10: raise source.error("incomplete escape %s" % escape, len(escape)) c = int(escape[2:], 16) chr(c) # raise ValueError for invalid code return LITERAL, c elif c == "N" and source.istext: import unicodedata # named unicode escape e.g. \N{EM DASH} if not source.match('{'): raise source.error("missing {") charname = source.getuntil('}', 'character name') try: c = ord(unicodedata.lookup(charname)) except KeyError: raise source.error("undefined character name %r" % charname, len(charname) + len(r'\N{}')) return LITERAL, c elif c in OCTDIGITS: # octal escape (up to three digits) escape += source.getwhile(2, OCTDIGITS) c = int(escape[1:], 8) if c > 0o377: raise source.error('octal escape value %s outside of ' 'range 0-0o377' % escape, len(escape)) return LITERAL, c elif c in DIGITS: raise ValueError if len(escape) == 2: if c in ASCIILETTERS: raise source.error('bad escape %s' % escape, len(escape)) return LITERAL, ord(escape[1]) except ValueError: pass raise source.error("bad escape %s" % escape, len(escape)) def _escape(source, escape, state): # handle escape code in expression code = CATEGORIES.get(escape) if code: return code code = ESCAPES.get(escape) if code: return code try: c = escape[1:2] if c == "x": # hexadecimal escape escape += source.getwhile(2, HEXDIGITS) if len(escape) != 4: raise source.error("incomplete escape %s" % escape, len(escape)) return LITERAL, int(escape[2:], 16) elif c == "u" and source.istext: # unicode escape (exactly four digits) escape += source.getwhile(4, HEXDIGITS) if len(escape) != 6: raise source.error("incomplete escape %s" % escape, len(escape)) return LITERAL, int(escape[2:], 16) elif c == "U" and source.istext: # unicode escape (exactly eight digits) escape += source.getwhile(8, HEXDIGITS) if len(escape) != 10: raise source.error("incomplete escape %s" % escape, len(escape)) c = int(escape[2:], 16) chr(c) # raise ValueError for invalid code return LITERAL, c elif c == "N" and source.istext: import unicodedata # named unicode escape e.g. \N{EM DASH} if not source.match('{'): raise source.error("missing {") charname = source.getuntil('}', 'character name') try: c = ord(unicodedata.lookup(charname)) except KeyError: raise source.error("undefined character name %r" % charname, len(charname) + len(r'\N{}')) return LITERAL, c elif c == "0": # octal escape escape += source.getwhile(2, OCTDIGITS) return LITERAL, int(escape[1:], 8) elif c in DIGITS: # octal escape *or* decimal group reference (sigh) if source.next in DIGITS: escape += source.get() if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and source.next in OCTDIGITS): # got three octal digits; this is an octal escape escape += source.get() c = int(escape[1:], 8) if c > 0o377: raise source.error('octal escape value %s outside of ' 'range 0-0o377' % escape, len(escape)) return LITERAL, c # not an octal escape, so this is a group reference group = int(escape[1:]) if group < state.groups: if not state.checkgroup(group): raise source.error("cannot refer to an open group", len(escape)) state.checklookbehindgroup(group, source) return GROUPREF, group raise source.error("invalid group reference %d" % group, len(escape) - 1) if len(escape) == 2: if c in ASCIILETTERS: raise source.error("bad escape %s" % escape, len(escape)) return LITERAL, ord(escape[1]) except ValueError: pass raise source.error("bad escape %s" % escape, len(escape)) def _uniq(items): return list(dict.fromkeys(items)) def _parse_sub(source, state, verbose, nested): # parse an alternation: a|b|c items = [] itemsappend = items.append sourcematch = source.match start = source.tell() while True: itemsappend(_parse(source, state, verbose, nested + 1, not nested and not items)) if not sourcematch("|"): break if len(items) == 1: return items[0] subpattern = SubPattern(state) # check if all items share a common prefix while True: prefix = None for item in items: if not item: break if prefix is None: prefix = item[0] elif item[0] != prefix: break else: # all subitems start with a common "prefix". # move it out of the branch for item in items: del item[0] subpattern.append(prefix) continue # check next one break # check if the branch can be replaced by a character set set = [] for item in items: if len(item) != 1: break op, av = item[0] if op is LITERAL: set.append((op, av)) elif op is IN and av[0][0] is not NEGATE: set.extend(av) else: break else: # we can store this as a character set instead of a # branch (the compiler may optimize this even more) subpattern.append((IN, _uniq(set))) return subpattern subpattern.append((BRANCH, (None, items))) return subpattern def _parse(source, state, verbose, nested, first=False): # parse a simple pattern subpattern = SubPattern(state) # precompute constants into local variables subpatternappend = subpattern.append sourceget = source.get sourcematch = source.match _len = len _ord = ord while True: this = source.next if this is None: break # end of pattern if this in "|)": break # end of subpattern sourceget() if verbose: # skip whitespace and comments if this in WHITESPACE: continue if this == "#": while True: this = sourceget() if this is None or this == "\n": break continue if this[0] == "\\": code = _escape(source, this, state) subpatternappend(code) elif this not in SPECIAL_CHARS: subpatternappend((LITERAL, _ord(this))) elif this == "[": here = source.tell() - 1 # character set set = [] setappend = set.append ## if sourcematch(":"): ## pass # handle character classes if source.next == '[': import warnings warnings.warn( 'Possible nested set at position %d' % source.tell(), FutureWarning, stacklevel=nested + 6 ) negate = sourcematch("^") # check remaining characters while True: this = sourceget() if this is None: raise source.error("unterminated character set", source.tell() - here) if this == "]" and set: break elif this[0] == "\\": code1 = _class_escape(source, this) else: if set and this in '-&~|' and source.next == this: import warnings warnings.warn( 'Possible set %s at position %d' % ( 'difference' if this == '-' else 'intersection' if this == '&' else 'symmetric difference' if this == '~' else 'union', source.tell() - 1), FutureWarning, stacklevel=nested + 6 ) code1 = LITERAL, _ord(this) if sourcematch("-"): # potential range that = sourceget() if that is None: raise source.error("unterminated character set", source.tell() - here) if that == "]": if code1[0] is IN: code1 = code1[1][0] setappend(code1) setappend((LITERAL, _ord("-"))) break if that[0] == "\\": code2 = _class_escape(source, that) else: if that == '-': import warnings warnings.warn( 'Possible set difference at position %d' % ( source.tell() - 2), FutureWarning, stacklevel=nested + 6 ) code2 = LITERAL, _ord(that) if code1[0] != LITERAL or code2[0] != LITERAL: msg = "bad character range %s-%s" % (this, that) raise source.error(msg, len(this) + 1 + len(that)) lo = code1[1] hi = code2[1] if hi < lo: msg = "bad character range %s-%s" % (this, that) raise source.error(msg, len(this) + 1 + len(that)) setappend((RANGE, (lo, hi))) else: if code1[0] is IN: code1 = code1[1][0] setappend(code1) set = _uniq(set) # XXX: <fl> should move set optimization to compiler! if _len(set) == 1 and set[0][0] is LITERAL: # optimization if negate: subpatternappend((NOT_LITERAL, set[0][1])) else: subpatternappend(set[0]) else: if negate: set.insert(0, (NEGATE, None)) # charmap optimization can't be added here because # global flags still are not known subpatternappend((IN, set)) elif this in REPEAT_CHARS: # repeat previous item here = source.tell() if this == "?": min, max = 0, 1 elif this == "*": min, max = 0, MAXREPEAT elif this == "+": min, max = 1, MAXREPEAT elif this == "{": if source.next == "}": subpatternappend((LITERAL, _ord(this))) continue min, max = 0, MAXREPEAT lo = hi = "" while source.next in DIGITS: lo += sourceget() if sourcematch(","): while source.next in DIGITS: hi += sourceget() else: hi = lo if not sourcematch("}"): subpatternappend((LITERAL, _ord(this))) source.seek(here) continue if lo: min = int(lo) if min >= MAXREPEAT: raise OverflowError("the repetition number is too large") if hi: max = int(hi) if max >= MAXREPEAT: raise OverflowError("the repetition number is too large") if max < min: raise source.error("min repeat greater than max repeat", source.tell() - here) else: raise AssertionError("unsupported quantifier %r" % (char,)) # figure out which item to repeat if subpattern: item = subpattern[-1:] else: item = None if not item or item[0][0] is AT: raise source.error("nothing to repeat", source.tell() - here + len(this)) if item[0][0] in _REPEATCODES: raise source.error("multiple repeat", source.tell() - here + len(this)) if item[0][0] is SUBPATTERN: group, add_flags, del_flags, p = item[0][1] if group is None and not add_flags and not del_flags: item = p if sourcematch("?"): subpattern[-1] = (MIN_REPEAT, (min, max, item)) else: subpattern[-1] = (MAX_REPEAT, (min, max, item)) elif this == ".": subpatternappend((ANY, None)) elif this == "(": start = source.tell() - 1 group = True name = None add_flags = 0 del_flags = 0 if sourcematch("?"): # options char = sourceget() if char is None: raise source.error("unexpected end of pattern") if char == "P": # python extensions if sourcematch("<"): # named group: skip forward to end of name name = source.getuntil(">", "group name") if not name.isidentifier(): msg = "bad character in group name %r" % name raise source.error(msg, len(name) + 1) elif sourcematch("="): # named backreference name = source.getuntil(")", "group name") if not name.isidentifier(): msg = "bad character in group name %r" % name raise source.error(msg, len(name) + 1) gid = state.groupdict.get(name) if gid is None: msg = "unknown group name %r" % name raise source.error(msg, len(name) + 1) if not state.checkgroup(gid): raise source.error("cannot refer to an open group", len(name) + 1) state.checklookbehindgroup(gid, source) subpatternappend((GROUPREF, gid)) continue else: char = sourceget() if char is None: raise source.error("unexpected end of pattern") raise source.error("unknown extension ?P" + char, len(char) + 2) elif char == ":": # non-capturing group group = None elif char == "#": # comment while True: if source.next is None: raise source.error("missing ), unterminated comment", source.tell() - start) if sourceget() == ")": break continue elif char in "=!<": # lookahead assertions dir = 1 if char == "<": char = sourceget() if char is None: raise source.error("unexpected end of pattern") if char not in "=!": raise source.error("unknown extension ?<" + char, len(char) + 2) dir = -1 # lookbehind lookbehindgroups = state.lookbehindgroups if lookbehindgroups is None: state.lookbehindgroups = state.groups p = _parse_sub(source, state, verbose, nested + 1) if dir < 0: if lookbehindgroups is None: state.lookbehindgroups = None if not sourcematch(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) if char == "=": subpatternappend((ASSERT, (dir, p))) else: subpatternappend((ASSERT_NOT, (dir, p))) continue elif char == "(": # conditional backreference group condname = source.getuntil(")", "group name") if condname.isidentifier(): condgroup = state.groupdict.get(condname) if condgroup is None: msg = "unknown group name %r" % condname raise source.error(msg, len(condname) + 1) else: try: condgroup = int(condname) if condgroup < 0: raise ValueError except ValueError: msg = "bad character in group name %r" % condname raise source.error(msg, len(condname) + 1) from None if not condgroup: raise source.error("bad group number", len(condname) + 1) if condgroup >= MAXGROUPS: msg = "invalid group reference %d" % condgroup raise source.error(msg, len(condname) + 1) state.checklookbehindgroup(condgroup, source) item_yes = _parse(source, state, verbose, nested + 1) if source.match("|"): item_no = _parse(source, state, verbose, nested + 1) if source.next == "|": raise source.error("conditional backref with more than two branches") else: item_no = None if not source.match(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) subpatternappend((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) continue elif char in FLAGS or char == "-": # flags flags = _parse_flags(source, state, char) if flags is None: # global flags if not first or subpattern: import warnings warnings.warn( 'Flags not at the start of the expression %r%s' % ( source.string[:20], # truncate long regexes ' (truncated)' if len(source.string) > 20 else '', ), DeprecationWarning, stacklevel=nested + 6 ) if (state.flags & SRE_FLAG_VERBOSE) and not verbose: raise Verbose continue add_flags, del_flags = flags group = None else: raise source.error("unknown extension ?" + char, len(char) + 1) # parse group contents if group is not None: try: group = state.opengroup(name) except error as err: raise source.error(err.msg, len(name) + 1) from None sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and not (del_flags & SRE_FLAG_VERBOSE)) p = _parse_sub(source, state, sub_verbose, nested + 1) if not source.match(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) if group is not None: state.closegroup(group, p) subpatternappend((SUBPATTERN, (group, add_flags, del_flags, p))) elif this == "^": subpatternappend((AT, AT_BEGINNING)) elif this == "$": subpatternappend((AT, AT_END)) else: raise AssertionError("unsupported special character %r" % (char,)) # unpack non-capturing groups for i in range(len(subpattern))[::-1]: op, av = subpattern[i] if op is SUBPATTERN: group, add_flags, del_flags, p = av if group is None and not add_flags and not del_flags: subpattern[i: i+1] = p return subpattern def _parse_flags(source, state, char): sourceget = source.get add_flags = 0 del_flags = 0 if char != "-": while True: flag = FLAGS[char] if source.istext: if char == 'L': msg = "bad inline flags: cannot use 'L' flag with a str pattern" raise source.error(msg) else: if char == 'u': msg = "bad inline flags: cannot use 'u' flag with a bytes pattern" raise source.error(msg) add_flags |= flag if (flag & TYPE_FLAGS) and (add_flags & TYPE_FLAGS) != flag: msg = "bad inline flags: flags 'a', 'u' and 'L' are incompatible" raise source.error(msg) char = sourceget() if char is None: raise source.error("missing -, : or )") if char in ")-:": break if char not in FLAGS: msg = "unknown flag" if char.isalpha() else "missing -, : or )" raise source.error(msg, len(char)) if char == ")": state.flags |= add_flags return None if add_flags & GLOBAL_FLAGS: raise source.error("bad inline flags: cannot turn on global flag", 1) if char == "-": char = sourceget() if char is None: raise source.error("missing flag") if char not in FLAGS: msg = "unknown flag" if char.isalpha() else "missing flag" raise source.error(msg, len(char)) while True: flag = FLAGS[char] if flag & TYPE_FLAGS: msg = "bad inline flags: cannot turn off flags 'a', 'u' and 'L'" raise source.error(msg) del_flags |= flag char = sourceget() if char is None: raise source.error("missing :") if char == ":": break if char not in FLAGS: msg = "unknown flag" if char.isalpha() else "missing :" raise source.error(msg, len(char)) assert char == ":" if del_flags & GLOBAL_FLAGS: raise source.error("bad inline flags: cannot turn off global flag", 1) if add_flags & del_flags: raise source.error("bad inline flags: flag turned on and off", 1) return add_flags, del_flags def fix_flags(src, flags): # Check and fix flags according to the type of pattern (str or bytes) if isinstance(src, str): if flags & SRE_FLAG_LOCALE: raise ValueError("cannot use LOCALE flag with a str pattern") if not flags & SRE_FLAG_ASCII: flags |= SRE_FLAG_UNICODE elif flags & SRE_FLAG_UNICODE: raise ValueError("ASCII and UNICODE flags are incompatible") else: if flags & SRE_FLAG_UNICODE: raise ValueError("cannot use UNICODE flag with a bytes pattern") if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII: raise ValueError("ASCII and LOCALE flags are incompatible") return flags def parse(str, flags=0, state=None): # parse 're' pattern into list of (opcode, argument) tuples source = Tokenizer(str) if state is None: state = State() state.flags = flags state.str = str try: p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0) except Verbose: # the VERBOSE flag was switched on inside the pattern. to be # on the safe side, we'll parse the whole thing again... state = State() state.flags = flags | SRE_FLAG_VERBOSE state.str = str source.seek(0) p = _parse_sub(source, state, True, 0) p.state.flags = fix_flags(str, p.state.flags) if source.next is not None: assert source.next == ")" raise source.error("unbalanced parenthesis") if flags & SRE_FLAG_DEBUG: p.dump() return p def parse_template(source, state): # parse 're' replacement string into list of literals and # group references s = Tokenizer(source) sget = s.get groups = [] literals = [] literal = [] lappend = literal.append def addgroup(index, pos): if index > state.groups: raise s.error("invalid group reference %d" % index, pos) if literal: literals.append(''.join(literal)) del literal[:] groups.append((len(literals), index)) literals.append(None) groupindex = state.groupindex while True: this = sget() if this is None: break # end of replacement string if this[0] == "\\": # group c = this[1] if c == "g": name = "" if not s.match("<"): raise s.error("missing <") name = s.getuntil(">", "group name") if name.isidentifier(): try: index = groupindex[name] except KeyError: raise IndexError("unknown group name %r" % name) else: try: index = int(name) if index < 0: raise ValueError except ValueError: raise s.error("bad character in group name %r" % name, len(name) + 1) from None if index >= MAXGROUPS: raise s.error("invalid group reference %d" % index, len(name) + 1) addgroup(index, len(name) + 1) elif c == "0": if s.next in OCTDIGITS: this += sget() if s.next in OCTDIGITS: this += sget() lappend(chr(int(this[1:], 8) & 0xff)) elif c in DIGITS: isoctal = False if s.next in DIGITS: this += sget() if (c in OCTDIGITS and this[2] in OCTDIGITS and s.next in OCTDIGITS): this += sget() isoctal = True c = int(this[1:], 8) if c > 0o377: raise s.error('octal escape value %s outside of ' 'range 0-0o377' % this, len(this)) lappend(chr(c)) if not isoctal: addgroup(int(this[1:]), len(this) - 1) else: try: this = chr(ESCAPES[this][1]) except KeyError: if c in ASCIILETTERS: raise s.error('bad escape %s' % this, len(this)) lappend(this) else: lappend(this) if literal: literals.append(''.join(literal)) if not isinstance(source, str): # The tokenizer implicitly decodes bytes objects as latin-1, we must # therefore re-encode the final representation. literals = [None if s is None else s.encode('latin-1') for s in literals] return groups, literals def expand_template(template, match): g = match.group empty = match.string[:0] groups, literals = template literals = literals[:] try: for index, group in groups: literals[index] = g(group) or empty except IndexError: raise error("invalid group reference %d" % index) return empty.join(literals)
Save