Path: blob/master/ invest-robot-contest_TinkoffBotTwitch-main/venv/lib/python3.8/site-packages/aiohttp/http_parser.py
7730 views
import abc1import asyncio2import collections3import re4import string5import zlib6from contextlib import suppress7from enum import IntEnum8from typing import (9Any,10Generic,11List,12NamedTuple,13Optional,14Pattern,15Set,16Tuple,17Type,18TypeVar,19Union,20cast,21)2223from multidict import CIMultiDict, CIMultiDictProxy, istr24from yarl import URL2526from . import hdrs27from .base_protocol import BaseProtocol28from .helpers import NO_EXTENSIONS, BaseTimerContext29from .http_exceptions import (30BadHttpMessage,31BadStatusLine,32ContentEncodingError,33ContentLengthError,34InvalidHeader,35LineTooLong,36TransferEncodingError,37)38from .http_writer import HttpVersion, HttpVersion1039from .log import internal_logger40from .streams import EMPTY_PAYLOAD, StreamReader41from .typedefs import Final, RawHeaders4243try:44import brotli4546HAS_BROTLI = True47except ImportError: # pragma: no cover48HAS_BROTLI = False495051__all__ = (52"HeadersParser",53"HttpParser",54"HttpRequestParser",55"HttpResponseParser",56"RawRequestMessage",57"RawResponseMessage",58)5960ASCIISET: Final[Set[str]] = set(string.printable)6162# See https://tools.ietf.org/html/rfc7230#section-3.1.163# and https://tools.ietf.org/html/rfc7230#appendix-B64#65# method = token66# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /67# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA68# token = 1*tchar69METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")70VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)")71HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")727374class RawRequestMessage(NamedTuple):75method: str76path: str77version: HttpVersion78headers: "CIMultiDictProxy[str]"79raw_headers: RawHeaders80should_close: bool81compression: Optional[str]82upgrade: bool83chunked: bool84url: URL858687RawResponseMessage = collections.namedtuple(88"RawResponseMessage",89[90"version",91"code",92"reason",93"headers",94"raw_headers",95"should_close",96"compression",97"upgrade",98"chunked",99],100)101102103_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)104105106class ParseState(IntEnum):107108PARSE_NONE = 0109PARSE_LENGTH = 1110PARSE_CHUNKED = 2111PARSE_UNTIL_EOF = 3112113114class ChunkState(IntEnum):115PARSE_CHUNKED_SIZE = 0116PARSE_CHUNKED_CHUNK = 1117PARSE_CHUNKED_CHUNK_EOF = 2118PARSE_MAYBE_TRAILERS = 3119PARSE_TRAILERS = 4120121122class HeadersParser:123def __init__(124self,125max_line_size: int = 8190,126max_headers: int = 32768,127max_field_size: int = 8190,128) -> None:129self.max_line_size = max_line_size130self.max_headers = max_headers131self.max_field_size = max_field_size132133def parse_headers(134self, lines: List[bytes]135) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:136headers = CIMultiDict() # type: CIMultiDict[str]137raw_headers = []138139lines_idx = 1140line = lines[1]141line_count = len(lines)142143while line:144# Parse initial header name : value pair.145try:146bname, bvalue = line.split(b":", 1)147except ValueError:148raise InvalidHeader(line) from None149150bname = bname.strip(b" \t")151bvalue = bvalue.lstrip()152if HDRRE.search(bname):153raise InvalidHeader(bname)154if len(bname) > self.max_field_size:155raise LineTooLong(156"request header name {}".format(157bname.decode("utf8", "xmlcharrefreplace")158),159str(self.max_field_size),160str(len(bname)),161)162163header_length = len(bvalue)164165# next line166lines_idx += 1167line = lines[lines_idx]168169# consume continuation lines170continuation = line and line[0] in (32, 9) # (' ', '\t')171172if continuation:173bvalue_lst = [bvalue]174while continuation:175header_length += len(line)176if header_length > self.max_field_size:177raise LineTooLong(178"request header field {}".format(179bname.decode("utf8", "xmlcharrefreplace")180),181str(self.max_field_size),182str(header_length),183)184bvalue_lst.append(line)185186# next line187lines_idx += 1188if lines_idx < line_count:189line = lines[lines_idx]190if line:191continuation = line[0] in (32, 9) # (' ', '\t')192else:193line = b""194break195bvalue = b"".join(bvalue_lst)196else:197if header_length > self.max_field_size:198raise LineTooLong(199"request header field {}".format(200bname.decode("utf8", "xmlcharrefreplace")201),202str(self.max_field_size),203str(header_length),204)205206bvalue = bvalue.strip()207name = bname.decode("utf-8", "surrogateescape")208value = bvalue.decode("utf-8", "surrogateescape")209210headers.add(name, value)211raw_headers.append((bname, bvalue))212213return (CIMultiDictProxy(headers), tuple(raw_headers))214215216class HttpParser(abc.ABC, Generic[_MsgT]):217def __init__(218self,219protocol: Optional[BaseProtocol] = None,220loop: Optional[asyncio.AbstractEventLoop] = None,221limit: int = 2 ** 16,222max_line_size: int = 8190,223max_headers: int = 32768,224max_field_size: int = 8190,225timer: Optional[BaseTimerContext] = None,226code: Optional[int] = None,227method: Optional[str] = None,228readall: bool = False,229payload_exception: Optional[Type[BaseException]] = None,230response_with_body: bool = True,231read_until_eof: bool = False,232auto_decompress: bool = True,233) -> None:234self.protocol = protocol235self.loop = loop236self.max_line_size = max_line_size237self.max_headers = max_headers238self.max_field_size = max_field_size239self.timer = timer240self.code = code241self.method = method242self.readall = readall243self.payload_exception = payload_exception244self.response_with_body = response_with_body245self.read_until_eof = read_until_eof246247self._lines = [] # type: List[bytes]248self._tail = b""249self._upgraded = False250self._payload = None251self._payload_parser = None # type: Optional[HttpPayloadParser]252self._auto_decompress = auto_decompress253self._limit = limit254self._headers_parser = HeadersParser(max_line_size, max_headers, max_field_size)255256@abc.abstractmethod257def parse_message(self, lines: List[bytes]) -> _MsgT:258pass259260def feed_eof(self) -> Optional[_MsgT]:261if self._payload_parser is not None:262self._payload_parser.feed_eof()263self._payload_parser = None264else:265# try to extract partial message266if self._tail:267self._lines.append(self._tail)268269if self._lines:270if self._lines[-1] != "\r\n":271self._lines.append(b"")272with suppress(Exception):273return self.parse_message(self._lines)274return None275276def feed_data(277self,278data: bytes,279SEP: bytes = b"\r\n",280EMPTY: bytes = b"",281CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,282METH_CONNECT: str = hdrs.METH_CONNECT,283SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,284) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:285286messages = []287288if self._tail:289data, self._tail = self._tail + data, b""290291data_len = len(data)292start_pos = 0293loop = self.loop294295while start_pos < data_len:296297# read HTTP message (request/response line + headers), \r\n\r\n298# and split by lines299if self._payload_parser is None and not self._upgraded:300pos = data.find(SEP, start_pos)301# consume \r\n302if pos == start_pos and not self._lines:303start_pos = pos + 2304continue305306if pos >= start_pos:307# line found308self._lines.append(data[start_pos:pos])309start_pos = pos + 2310311# \r\n\r\n found312if self._lines[-1] == EMPTY:313try:314msg: _MsgT = self.parse_message(self._lines)315finally:316self._lines.clear()317318def get_content_length() -> Optional[int]:319# payload length320length_hdr = msg.headers.get(CONTENT_LENGTH)321if length_hdr is None:322return None323324try:325length = int(length_hdr)326except ValueError:327raise InvalidHeader(CONTENT_LENGTH)328329if length < 0:330raise InvalidHeader(CONTENT_LENGTH)331332return length333334length = get_content_length()335# do not support old websocket spec336if SEC_WEBSOCKET_KEY1 in msg.headers:337raise InvalidHeader(SEC_WEBSOCKET_KEY1)338339self._upgraded = msg.upgrade340341method = getattr(msg, "method", self.method)342343assert self.protocol is not None344# calculate payload345if (346(length is not None and length > 0)347or msg.chunked348and not msg.upgrade349):350payload = StreamReader(351self.protocol,352timer=self.timer,353loop=loop,354limit=self._limit,355)356payload_parser = HttpPayloadParser(357payload,358length=length,359chunked=msg.chunked,360method=method,361compression=msg.compression,362code=self.code,363readall=self.readall,364response_with_body=self.response_with_body,365auto_decompress=self._auto_decompress,366)367if not payload_parser.done:368self._payload_parser = payload_parser369elif method == METH_CONNECT:370assert isinstance(msg, RawRequestMessage)371payload = StreamReader(372self.protocol,373timer=self.timer,374loop=loop,375limit=self._limit,376)377self._upgraded = True378self._payload_parser = HttpPayloadParser(379payload,380method=msg.method,381compression=msg.compression,382readall=True,383auto_decompress=self._auto_decompress,384)385else:386if (387getattr(msg, "code", 100) >= 199388and length is None389and self.read_until_eof390):391payload = StreamReader(392self.protocol,393timer=self.timer,394loop=loop,395limit=self._limit,396)397payload_parser = HttpPayloadParser(398payload,399length=length,400chunked=msg.chunked,401method=method,402compression=msg.compression,403code=self.code,404readall=True,405response_with_body=self.response_with_body,406auto_decompress=self._auto_decompress,407)408if not payload_parser.done:409self._payload_parser = payload_parser410else:411payload = EMPTY_PAYLOAD412413messages.append((msg, payload))414else:415self._tail = data[start_pos:]416data = EMPTY417break418419# no parser, just store420elif self._payload_parser is None and self._upgraded:421assert not self._lines422break423424# feed payload425elif data and start_pos < data_len:426assert not self._lines427assert self._payload_parser is not None428try:429eof, data = self._payload_parser.feed_data(data[start_pos:])430except BaseException as exc:431if self.payload_exception is not None:432self._payload_parser.payload.set_exception(433self.payload_exception(str(exc))434)435else:436self._payload_parser.payload.set_exception(exc)437438eof = True439data = b""440441if eof:442start_pos = 0443data_len = len(data)444self._payload_parser = None445continue446else:447break448449if data and start_pos < data_len:450data = data[start_pos:]451else:452data = EMPTY453454return messages, self._upgraded, data455456def parse_headers(457self, lines: List[bytes]458) -> Tuple[459"CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool460]:461"""Parses RFC 5322 headers from a stream.462463Line continuations are supported. Returns list of header name464and value pairs. Header name is in upper case.465"""466headers, raw_headers = self._headers_parser.parse_headers(lines)467close_conn = None468encoding = None469upgrade = False470chunked = False471472# keep-alive473conn = headers.get(hdrs.CONNECTION)474if conn:475v = conn.lower()476if v == "close":477close_conn = True478elif v == "keep-alive":479close_conn = False480elif v == "upgrade":481upgrade = True482483# encoding484enc = headers.get(hdrs.CONTENT_ENCODING)485if enc:486enc = enc.lower()487if enc in ("gzip", "deflate", "br"):488encoding = enc489490# chunking491te = headers.get(hdrs.TRANSFER_ENCODING)492if te is not None:493if "chunked" == te.lower():494chunked = True495else:496raise BadHttpMessage("Request has invalid `Transfer-Encoding`")497498if hdrs.CONTENT_LENGTH in headers:499raise BadHttpMessage(500"Content-Length can't be present with Transfer-Encoding",501)502503return (headers, raw_headers, close_conn, encoding, upgrade, chunked)504505def set_upgraded(self, val: bool) -> None:506"""Set connection upgraded (to websocket) mode.507508:param bool val: new state.509"""510self._upgraded = val511512513class HttpRequestParser(HttpParser[RawRequestMessage]):514"""Read request status line.515516Exception .http_exceptions.BadStatusLine517could be raised in case of any errors in status line.518Returns RawRequestMessage.519"""520521def parse_message(self, lines: List[bytes]) -> RawRequestMessage:522# request line523line = lines[0].decode("utf-8", "surrogateescape")524try:525method, path, version = line.split(None, 2)526except ValueError:527raise BadStatusLine(line) from None528529if len(path) > self.max_line_size:530raise LineTooLong(531"Status line is too long", str(self.max_line_size), str(len(path))532)533534path_part, _hash_separator, url_fragment = path.partition("#")535path_part, _question_mark_separator, qs_part = path_part.partition("?")536537# method538if not METHRE.match(method):539raise BadStatusLine(method)540541# version542try:543if version.startswith("HTTP/"):544n1, n2 = version[5:].split(".", 1)545version_o = HttpVersion(int(n1), int(n2))546else:547raise BadStatusLine(version)548except Exception:549raise BadStatusLine(version)550551# read headers552(553headers,554raw_headers,555close,556compression,557upgrade,558chunked,559) = self.parse_headers(lines)560561if close is None: # then the headers weren't set in the request562if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close563close = True564else: # HTTP 1.1 must ask to close.565close = False566567return RawRequestMessage(568method,569path,570version_o,571headers,572raw_headers,573close,574compression,575upgrade,576chunked,577# NOTE: `yarl.URL.build()` is used to mimic what the Cython-based578# NOTE: parser does, otherwise it results into the same579# NOTE: HTTP Request-Line input producing different580# NOTE: `yarl.URL()` objects581URL.build(582path=path_part,583query_string=qs_part,584fragment=url_fragment,585encoded=True,586),587)588589590class HttpResponseParser(HttpParser[RawResponseMessage]):591"""Read response status line and headers.592593BadStatusLine could be raised in case of any errors in status line.594Returns RawResponseMessage.595"""596597def parse_message(self, lines: List[bytes]) -> RawResponseMessage:598line = lines[0].decode("utf-8", "surrogateescape")599try:600version, status = line.split(None, 1)601except ValueError:602raise BadStatusLine(line) from None603604try:605status, reason = status.split(None, 1)606except ValueError:607reason = ""608609if len(reason) > self.max_line_size:610raise LineTooLong(611"Status line is too long", str(self.max_line_size), str(len(reason))612)613614# version615match = VERSRE.match(version)616if match is None:617raise BadStatusLine(line)618version_o = HttpVersion(int(match.group(1)), int(match.group(2)))619620# The status code is a three-digit number621try:622status_i = int(status)623except ValueError:624raise BadStatusLine(line) from None625626if status_i > 999:627raise BadStatusLine(line)628629# read headers630(631headers,632raw_headers,633close,634compression,635upgrade,636chunked,637) = self.parse_headers(lines)638639if close is None:640close = version_o <= HttpVersion10641642return RawResponseMessage(643version_o,644status_i,645reason.strip(),646headers,647raw_headers,648close,649compression,650upgrade,651chunked,652)653654655class HttpPayloadParser:656def __init__(657self,658payload: StreamReader,659length: Optional[int] = None,660chunked: bool = False,661compression: Optional[str] = None,662code: Optional[int] = None,663method: Optional[str] = None,664readall: bool = False,665response_with_body: bool = True,666auto_decompress: bool = True,667) -> None:668self._length = 0669self._type = ParseState.PARSE_NONE670self._chunk = ChunkState.PARSE_CHUNKED_SIZE671self._chunk_size = 0672self._chunk_tail = b""673self._auto_decompress = auto_decompress674self.done = False675676# payload decompression wrapper677if response_with_body and compression and self._auto_decompress:678real_payload = DeflateBuffer(679payload, compression680) # type: Union[StreamReader, DeflateBuffer]681else:682real_payload = payload683684# payload parser685if not response_with_body:686# don't parse payload if it's not expected to be received687self._type = ParseState.PARSE_NONE688real_payload.feed_eof()689self.done = True690691elif chunked:692self._type = ParseState.PARSE_CHUNKED693elif length is not None:694self._type = ParseState.PARSE_LENGTH695self._length = length696if self._length == 0:697real_payload.feed_eof()698self.done = True699else:700if readall and code != 204:701self._type = ParseState.PARSE_UNTIL_EOF702elif method in ("PUT", "POST"):703internal_logger.warning( # pragma: no cover704"Content-Length or Transfer-Encoding header is required"705)706self._type = ParseState.PARSE_NONE707real_payload.feed_eof()708self.done = True709710self.payload = real_payload711712def feed_eof(self) -> None:713if self._type == ParseState.PARSE_UNTIL_EOF:714self.payload.feed_eof()715elif self._type == ParseState.PARSE_LENGTH:716raise ContentLengthError(717"Not enough data for satisfy content length header."718)719elif self._type == ParseState.PARSE_CHUNKED:720raise TransferEncodingError(721"Not enough data for satisfy transfer length header."722)723724def feed_data(725self, chunk: bytes, SEP: bytes = b"\r\n", CHUNK_EXT: bytes = b";"726) -> Tuple[bool, bytes]:727# Read specified amount of bytes728if self._type == ParseState.PARSE_LENGTH:729required = self._length730chunk_len = len(chunk)731732if required >= chunk_len:733self._length = required - chunk_len734self.payload.feed_data(chunk, chunk_len)735if self._length == 0:736self.payload.feed_eof()737return True, b""738else:739self._length = 0740self.payload.feed_data(chunk[:required], required)741self.payload.feed_eof()742return True, chunk[required:]743744# Chunked transfer encoding parser745elif self._type == ParseState.PARSE_CHUNKED:746if self._chunk_tail:747chunk = self._chunk_tail + chunk748self._chunk_tail = b""749750while chunk:751752# read next chunk size753if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:754pos = chunk.find(SEP)755if pos >= 0:756i = chunk.find(CHUNK_EXT, 0, pos)757if i >= 0:758size_b = chunk[:i] # strip chunk-extensions759else:760size_b = chunk[:pos]761762try:763size = int(bytes(size_b), 16)764except ValueError:765exc = TransferEncodingError(766chunk[:pos].decode("ascii", "surrogateescape")767)768self.payload.set_exception(exc)769raise exc from None770771chunk = chunk[pos + 2 :]772if size == 0: # eof marker773self._chunk = ChunkState.PARSE_MAYBE_TRAILERS774else:775self._chunk = ChunkState.PARSE_CHUNKED_CHUNK776self._chunk_size = size777self.payload.begin_http_chunk_receiving()778else:779self._chunk_tail = chunk780return False, b""781782# read chunk and feed buffer783if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:784required = self._chunk_size785chunk_len = len(chunk)786787if required > chunk_len:788self._chunk_size = required - chunk_len789self.payload.feed_data(chunk, chunk_len)790return False, b""791else:792self._chunk_size = 0793self.payload.feed_data(chunk[:required], required)794chunk = chunk[required:]795self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF796self.payload.end_http_chunk_receiving()797798# toss the CRLF at the end of the chunk799if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:800if chunk[:2] == SEP:801chunk = chunk[2:]802self._chunk = ChunkState.PARSE_CHUNKED_SIZE803else:804self._chunk_tail = chunk805return False, b""806807# if stream does not contain trailer, after 0\r\n808# we should get another \r\n otherwise809# trailers needs to be skiped until \r\n\r\n810if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:811head = chunk[:2]812if head == SEP:813# end of stream814self.payload.feed_eof()815return True, chunk[2:]816# Both CR and LF, or only LF may not be received yet. It is817# expected that CRLF or LF will be shown at the very first818# byte next time, otherwise trailers should come. The last819# CRLF which marks the end of response might not be820# contained in the same TCP segment which delivered the821# size indicator.822if not head:823return False, b""824if head == SEP[:1]:825self._chunk_tail = head826return False, b""827self._chunk = ChunkState.PARSE_TRAILERS828829# read and discard trailer up to the CRLF terminator830if self._chunk == ChunkState.PARSE_TRAILERS:831pos = chunk.find(SEP)832if pos >= 0:833chunk = chunk[pos + 2 :]834self._chunk = ChunkState.PARSE_MAYBE_TRAILERS835else:836self._chunk_tail = chunk837return False, b""838839# Read all bytes until eof840elif self._type == ParseState.PARSE_UNTIL_EOF:841self.payload.feed_data(chunk, len(chunk))842843return False, b""844845846class DeflateBuffer:847"""DeflateStream decompress stream and feed data into specified stream."""848849decompressor: Any850851def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:852self.out = out853self.size = 0854self.encoding = encoding855self._started_decoding = False856857if encoding == "br":858if not HAS_BROTLI: # pragma: no cover859raise ContentEncodingError(860"Can not decode content-encoding: brotli (br). "861"Please install `Brotli`"862)863864class BrotliDecoder:865# Supports both 'brotlipy' and 'Brotli' packages866# since they share an import name. The top branches867# are for 'brotlipy' and bottom branches for 'Brotli'868def __init__(self) -> None:869self._obj = brotli.Decompressor()870871def decompress(self, data: bytes) -> bytes:872if hasattr(self._obj, "decompress"):873return cast(bytes, self._obj.decompress(data))874return cast(bytes, self._obj.process(data))875876def flush(self) -> bytes:877if hasattr(self._obj, "flush"):878return cast(bytes, self._obj.flush())879return b""880881self.decompressor = BrotliDecoder()882else:883zlib_mode = 16 + zlib.MAX_WBITS if encoding == "gzip" else zlib.MAX_WBITS884self.decompressor = zlib.decompressobj(wbits=zlib_mode)885886def set_exception(self, exc: BaseException) -> None:887self.out.set_exception(exc)888889def feed_data(self, chunk: bytes, size: int) -> None:890if not size:891return892893self.size += size894895# RFC1950896# bits 0..3 = CM = 0b1000 = 8 = "deflate"897# bits 4..7 = CINFO = 1..7 = windows size.898if (899not self._started_decoding900and self.encoding == "deflate"901and chunk[0] & 0xF != 8902):903# Change the decoder to decompress incorrectly compressed data904# Actually we should issue a warning about non-RFC-compliant data.905self.decompressor = zlib.decompressobj(wbits=-zlib.MAX_WBITS)906907try:908chunk = self.decompressor.decompress(chunk)909except Exception:910raise ContentEncodingError(911"Can not decode content-encoding: %s" % self.encoding912)913914self._started_decoding = True915916if chunk:917self.out.feed_data(chunk, len(chunk))918919def feed_eof(self) -> None:920chunk = self.decompressor.flush()921922if chunk or self.size > 0:923self.out.feed_data(chunk, len(chunk))924if self.encoding == "deflate" and not self.decompressor.eof:925raise ContentEncodingError("deflate")926927self.out.feed_eof()928929def begin_http_chunk_receiving(self) -> None:930self.out.begin_http_chunk_receiving()931932def end_http_chunk_receiving(self) -> None:933self.out.end_http_chunk_receiving()934935936HttpRequestParserPy = HttpRequestParser937HttpResponseParserPy = HttpResponseParser938RawRequestMessagePy = RawRequestMessage939RawResponseMessagePy = RawResponseMessage940941try:942if not NO_EXTENSIONS:943from ._http_parser import ( # type: ignore[import,no-redef]944HttpRequestParser,945HttpResponseParser,946RawRequestMessage,947RawResponseMessage,948)949950HttpRequestParserC = HttpRequestParser951HttpResponseParserC = HttpResponseParser952RawRequestMessageC = RawRequestMessage953RawResponseMessageC = RawResponseMessage954except ImportError: # pragma: no cover955pass956957958