Path: blob/master/venv/Lib/site-packages/chardet/cli/chardetect.py
811 views
#!/usr/bin/env python1"""2Script which takes one or more file paths and reports on their detected3encodings45Example::67% chardetect somefile someotherfile8somefile: windows-1252 with confidence 0.59someotherfile: ascii with confidence 1.01011If no paths are provided, it takes its input from stdin.1213"""1415from __future__ import absolute_import, print_function, unicode_literals1617import argparse18import sys1920from chardet import __version__21from chardet.compat import PY222from chardet.universaldetector import UniversalDetector232425def description_of(lines, name='stdin'):26"""27Return a string describing the probable encoding of a file or28list of strings.2930:param lines: The lines to get the encoding of.31:type lines: Iterable of bytes32:param name: Name of file or collection of lines33:type name: str34"""35u = UniversalDetector()36for line in lines:37line = bytearray(line)38u.feed(line)39# shortcut out of the loop to save reading further - particularly useful if we read a BOM.40if u.done:41break42u.close()43result = u.result44if PY2:45name = name.decode(sys.getfilesystemencoding(), 'ignore')46if result['encoding']:47return '{0}: {1} with confidence {2}'.format(name, result['encoding'],48result['confidence'])49else:50return '{0}: no result'.format(name)515253def main(argv=None):54"""55Handles command line arguments and gets things started.5657:param argv: List of arguments, as if specified on the command-line.58If None, ``sys.argv[1:]`` is used instead.59:type argv: list of str60"""61# Get command line arguments62parser = argparse.ArgumentParser(63description="Takes one or more file paths and reports their detected \64encodings")65parser.add_argument('input',66help='File whose encoding we would like to determine. \67(default: stdin)',68type=argparse.FileType('rb'), nargs='*',69default=[sys.stdin if PY2 else sys.stdin.buffer])70parser.add_argument('--version', action='version',71version='%(prog)s {0}'.format(__version__))72args = parser.parse_args(argv)7374for f in args.input:75if f.isatty():76print("You are running chardetect interactively. Press " +77"CTRL-D twice at the start of a blank line to signal the " +78"end of your input. If you want help, run chardetect " +79"--help\n", file=sys.stderr)80print(description_of(f, f.name))818283if __name__ == '__main__':84main()858687