Path: blob/main/test/lib/python3.9/site-packages/pip/_internal/utils/encoding.py
4804 views
import codecs1import locale2import re3import sys4from typing import List, Tuple56BOMS: List[Tuple[bytes, str]] = [7(codecs.BOM_UTF8, "utf-8"),8(codecs.BOM_UTF16, "utf-16"),9(codecs.BOM_UTF16_BE, "utf-16-be"),10(codecs.BOM_UTF16_LE, "utf-16-le"),11(codecs.BOM_UTF32, "utf-32"),12(codecs.BOM_UTF32_BE, "utf-32-be"),13(codecs.BOM_UTF32_LE, "utf-32-le"),14]1516ENCODING_RE = re.compile(rb"coding[:=]\s*([-\w.]+)")171819def auto_decode(data: bytes) -> str:20"""Check a bytes string for a BOM to correctly detect the encoding2122Fallback to locale.getpreferredencoding(False) like open() on Python3"""23for bom, encoding in BOMS:24if data.startswith(bom):25return data[len(bom) :].decode(encoding)26# Lets check the first two lines as in PEP26327for line in data.split(b"\n")[:2]:28if line[0:1] == b"#" and ENCODING_RE.search(line):29result = ENCODING_RE.search(line)30assert result is not None31encoding = result.groups()[0].decode("ascii")32return data.decode(encoding)33return data.decode(34locale.getpreferredencoding(False) or sys.getdefaultencoding(),35)363738