opt
/
hc_python
/
lib
/
python3.12
/
site-packages
/
charset_normalizer
/
Go to Home Directory
+
Upload
Create File
root@0UT1S:~$
Execute
By Order of Mr.0UT1S
[DIR] ..
N/A
[DIR] __pycache__
N/A
[DIR] cli
N/A
__init__.py
1.55 KB
Rename
Delete
__main__.py
109 bytes
Rename
Delete
api.py
22.09 KB
Rename
Delete
cd.py
12.23 KB
Rename
Delete
constant.py
39.53 KB
Rename
Delete
legacy.py
2.27 KB
Rename
Delete
md.cpython-312-x86_64-linux-gnu.so
15.69 KB
Rename
Delete
md.py
19.57 KB
Rename
Delete
md__mypyc.cpython-312-x86_64-linux-gnu.so
274.32 KB
Rename
Delete
models.py
12.10 KB
Rename
Delete
py.typed
0 bytes
Rename
Delete
utils.py
11.72 KB
Rename
Delete
version.py
115 bytes
Rename
Delete
from __future__ import annotations from typing import TYPE_CHECKING, Any from warnings import warn from .api import from_bytes from .constant import CHARDET_CORRESPONDENCE # TODO: remove this check when dropping Python 3.7 support if TYPE_CHECKING: from typing_extensions import TypedDict class ResultDict(TypedDict): encoding: str | None language: str confidence: float | None def detect( byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any ) -> ResultDict: """ chardet legacy method Detect the encoding of the given byte string. It should be mostly backward-compatible. Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it) This function is deprecated and should be used to migrate your project easily, consult the documentation for further information. Not planned for removal. :param byte_str: The byte sequence to examine. :param should_rename_legacy: Should we rename legacy encodings to their more modern equivalents? """ if len(kwargs): warn( f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()" ) if not isinstance(byte_str, (bytearray, bytes)): raise TypeError( # pragma: nocover "Expected object of type bytes or bytearray, got: " "{}".format( type(byte_str) ) ) if isinstance(byte_str, bytearray): byte_str = bytes(byte_str) r = from_bytes(byte_str).best() encoding = r.encoding if r is not None else None language = r.language if r is not None and r.language != "Unknown" else "" confidence = 1.0 - r.chaos if r is not None else None # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process # but chardet does return 'utf-8-sig' and it is a valid codec name. if r is not None and encoding == "utf_8" and r.bom: encoding += "_sig" if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE: encoding = CHARDET_CORRESPONDENCE[encoding] return { "encoding": encoding, "language": language, "confidence": confidence, }
Save