use utils.deadline_process() in utils.http._request() so background threads can

call _request()
This commit is contained in:
jesopo 2019-09-17 13:41:11 +01:00
parent fa95eaa9eb
commit 94c3ff962b

View file

@ -116,10 +116,11 @@ class Request(object):
class Response(object): class Response(object):
def __init__(self, code: int, data: typing.Any, def __init__(self, code: int, data: typing.Any,
headers: typing.Dict[str, str]): headers: typing.Dict[str, str], encoding: str):
self.code = code self.code = code
self.data = data self.data = data
self.headers = headers self.headers = headers
self.encoding = encoding
def _meta_content(s: str) -> typing.Dict[str, str]: def _meta_content(s: str) -> typing.Dict[str, str]:
out = {} out = {}
@ -154,59 +155,65 @@ def request(request_obj: typing.Union[str, Request], **kwargs) -> Response:
def _request(request_obj: Request) -> Response: def _request(request_obj: Request) -> Response:
headers = request_obj.get_headers() headers = request_obj.get_headers()
with utils.deadline(seconds=5): def _wrap():
try: response = requests.request(
response = requests.request( request_obj.method,
request_obj.method, request_obj.url,
request_obj.url, headers=headers,
headers=headers, params=request_obj.get_params,
params=request_obj.get_params, data=request_obj.get_body(),
data=request_obj.get_body(), allow_redirects=request_obj.allow_redirects,
allow_redirects=request_obj.allow_redirects, stream=True
stream=True )
) response_content = response.raw.read(RESPONSE_MAX,
response_content = response.raw.read(RESPONSE_MAX, decode_content=True)
decode_content=True) if not response_content or not response.raw.read(1) == b"":
if not response_content or not response.raw.read(1) == b"": raise ValueError("Response too large")
# response too large!
pass our_response = Response(response.status_code, response_content,
except utils.DeadlineExceededException: headers=utils.CaseInsensitiveDict(dict(response.headers)),
raise HTTPTimeoutException() encoding=response.encoding)
return our_response
try:
response = utils.deadline_process(_wrap)
except utils.DeadlineExceededException:
raise HTTPTimeoutException()
response_headers = utils.CaseInsensitiveDict(dict(response.headers))
content_type = response.headers.get("Content-Type", "").split(";", 1)[0] content_type = response.headers.get("Content-Type", "").split(";", 1)[0]
encoding = response.encoding or request_obj.fallback_encoding encoding = response.encoding or request_obj.fallback_encoding
if (request_obj.detect_encoding and if (request_obj.detect_encoding and
content_type and content_type in SOUP_CONTENT_TYPES): content_type and content_type in SOUP_CONTENT_TYPES):
souped = bs4.BeautifulSoup(response_content, request_obj.parser) souped = bs4.BeautifulSoup(response.data, request_obj.parser)
encoding = _find_encoding(souped) or encoding encoding = _find_encoding(souped) or encoding
def _decode_data(): def _decode_data():
return response_content.decode(encoding) return response.data.decode(encoding)
if request_obj.parse: if request_obj.parse:
if (not request_obj.check_content_type or if (not request_obj.check_content_type or
content_type in SOUP_CONTENT_TYPES): content_type in SOUP_CONTENT_TYPES):
souped = bs4.BeautifulSoup(_decode_data(), request_obj.parser) souped = bs4.BeautifulSoup(_decode_data(), request_obj.parser)
return Response(response.status_code, souped, response_headers) response.data = souped
return response
else: else:
raise HTTPWrongContentTypeException( raise HTTPWrongContentTypeException(
"Tried to soup non-html/non-xml data (%s)" % content_type) "Tried to soup non-html/non-xml data (%s)" % content_type)
if request_obj.json and response_content: if request_obj.json and response.data:
data = _decode_data() data = _decode_data()
try: try:
return Response(response.status_code, _json.loads(data), response.data = _json.loads(data)
response_headers) return response
except _json.decoder.JSONDecodeError as e: except _json.decoder.JSONDecodeError as e:
raise HTTPParsingException(str(e), data) raise HTTPParsingException(str(e), data)
if content_type in DECODE_CONTENT_TYPES: if content_type in DECODE_CONTENT_TYPES:
return Response(response.status_code, _decode_data(), response_headers) response.data = _decode_data()
return response
else: else:
return Response(response.status_code, response_content, return response
response_headers)
def request_many(urls: typing.List[str]) -> typing.Dict[str, Response]: def request_many(urls: typing.List[str]) -> typing.Dict[str, Response]:
responses = {} responses = {}