From 4a97c9eb0dc8279a7382c2fddf51f0ed93a99c1a Mon Sep 17 00:00:00 2001 From: jesopo Date: Wed, 11 Sep 2019 17:44:07 +0100 Subject: [PATCH] refactor utils.http.requests to support a Request object --- modules/acronym.py | 2 +- modules/eval_lua.py | 3 +- modules/eval_rust.py | 4 +- modules/fediverse.py | 2 +- modules/title.py | 2 +- src/utils/http.py | 104 ++++++++++++++++++++++++++++++++----------- 6 files changed, 83 insertions(+), 34 deletions(-) diff --git a/modules/acronym.py b/modules/acronym.py index 91dbe8bc..cba92521 100644 --- a/modules/acronym.py +++ b/modules/acronym.py @@ -9,7 +9,7 @@ class Module(ModuleManager.BaseModule): @utils.kwarg("usage", "") def acronym(self, event): query = event["args_split"][0].upper() - response = utils.http.request(API % query, soup=True) + response = utils.http.request(API % query, parse=True) if response.data: acronyms = [] for element in response.data.find_all("acro"): diff --git a/modules/eval_lua.py b/modules/eval_lua.py index d219c2e3..102aadc7 100644 --- a/modules/eval_lua.py +++ b/modules/eval_lua.py @@ -12,8 +12,7 @@ class Module(ModuleManager.BaseModule): try: page = utils.http.request(EVAL_URL, post_data={"input": event["args"]}, - method="POST", - soup=True) + method="POST", parse=True) except socket.timeout: raise utils.EventError("%s: eval timed out" % event["user"].nickname) diff --git a/modules/eval_rust.py b/modules/eval_rust.py index 3d98a622..75d51d95 100644 --- a/modules/eval_rust.py +++ b/modules/eval_rust.py @@ -32,8 +32,8 @@ class Module(ModuleManager.BaseModule): args = API_ARGS.copy() args["code"] = FN_TEMPLATE % event["args"] try: - page = utils.http.request(EVAL_URL, json_data=args, - method="POST", json=True) + page = utils.http.request(EVAL_URL, post_data=args, + method="POST", json=True, content_type="application/json") except socket.timeout: raise utils.EventError("%s: eval timed out" % event["user"].nickname) diff --git a/modules/fediverse.py b/modules/fediverse.py index 599629fc..e1080552 100644 --- a/modules/fediverse.py +++ b/modules/fediverse.py @@ -56,7 +56,7 @@ class Module(ModuleManager.BaseModule): raise utils.EventError("Please provide @@") hostmeta = utils.http.request(HOSTMETA % instance, - soup=True, check_content_type=False) + parse=True, check_content_type=False) webfinger_url = None for item in hostmeta.data.find_all("link"): if item["rel"] and item["rel"][0] == "lrdd": diff --git a/modules/title.py b/modules/title.py index 9308dddc..dfaf691c 100644 --- a/modules/title.py +++ b/modules/title.py @@ -26,7 +26,7 @@ class Module(ModuleManager.BaseModule): return None try: - page = utils.http.request(url, soup=True) + page = utils.http.request(url, parse=True) except utils.http.HTTPWrongContentTypeException: return None except Exception as e: diff --git a/src/utils/http.py b/src/utils/http.py index 5b5f36bb..4dfb450d 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -52,6 +52,62 @@ class HTTPWrongContentTypeException(HTTPException): def throw_timeout(): raise HTTPTimeoutException() +class Request(object): + def __init__(self, url: str, method: str="GET", + get_params: typing.Dict[str, str]={}, post_data: typing.Any=None, + headers: typing.Dict[str, str]={}, + + json: bool=False, allow_redirects: bool=True, + check_content_type: bool=True, parse: bool=False, + detect_encoding: bool=True, + + parser: str="lxml", fallback_encoding="iso-8859-1", + content_type: str=None, + + **kwargs): + self.set_url(url) + self.method = method.upper() + self.get_params = get_params + self.post_data = post_data + self.headers = headers + + self.json = json + self.allow_redirects = allow_redirects + self.check_content_type = check_content_type + self.parse = parse + self.detect_encoding = detect_encoding + + self.parser = parser + self.fallback_encoding = fallback_encoding + self.content_type = content_type + + if kwargs: + if method == "POST": + self.post_data = kwargs + else: + self.get_params.update(kwargs) + + def set_url(self, url: str): + if not urllib.parse.urlparse(url).scheme: + url = "http://%s" % url + self.url = url + + def get_headers(self) -> typing.Dict[str, str]: + headers = self.headers.copy() + if not "Accept-Language" in headers: + headers["Accept-Language"] = "en-GB" + if not "User-Agent" in headers: + headers["User-Agent"] = USER_AGENT + if not "Content-Type" in headers and self.content_type: + headers["Content-Type"] = self.content_type + return headers + + def get_body(self) -> typing.Any: + if self.content_type == "application/json": + return _json.dumps(self.post_data) + else: + return self.post_data + class Response(object): def __init__(self, code: int, data: typing.Any, headers: typing.Dict[str, str]): @@ -84,31 +140,23 @@ def _find_encoding(soup: bs4.BeautifulSoup) -> typing.Optional[str]: return None -def request(url: str, method: str="GET", get_params: dict={}, - post_data: typing.Any=None, headers: dict={}, - json_data: typing.Any=None, code: bool=False, json: bool=False, - soup: bool=False, parser: str="lxml", detect_encoding: bool=True, - fallback_encoding: str="utf8", allow_redirects: bool=True, - check_content_type: bool=True) -> Response: +def request(request_obj: typing.Union[str, Request], **kwargs) -> Response: + if type(request_obj) == str: + request_obj = Request(request_obj, **kwargs) + return _request(request_obj) - if not urllib.parse.urlparse(url).scheme: - url = "http://%s" % url - - if not "Accept-Language" in headers: - headers["Accept-Language"] = "en-GB" - if not "User-Agent" in headers: - headers["User-Agent"] = USER_AGENT +def _request(request_obj: Request) -> Response: + headers = request_obj.get_headers() with utils.deadline(seconds=5): try: response = requests.request( - method.upper(), - url, + request_obj.method, + request_obj.url, headers=headers, - params=get_params, - data=post_data, - json=json_data, - allow_redirects=allow_redirects, + params=request_obj.get_params, + data=request_obj.get_body(), + allow_redirects=request_obj.allow_redirects, stream=True ) response_content = response.raw.read(RESPONSE_MAX, @@ -122,23 +170,25 @@ def request(url: str, method: str="GET", get_params: dict={}, response_headers = utils.CaseInsensitiveDict(dict(response.headers)) content_type = response.headers.get("Content-Type", "").split(";", 1)[0] - encoding = response.encoding or "iso-8859-1" - if detect_encoding and content_type and content_type in SOUP_CONTENT_TYPES: - souped = bs4.BeautifulSoup(response_content, parser) + encoding = response.encoding or request_obj.fallback_encoding + if (request_obj.detect_encoding and + content_type and content_type in SOUP_CONTENT_TYPES): + souped = bs4.BeautifulSoup(response_content, request_obj.parser) encoding = _find_encoding(souped) or encoding def _decode_data(): return response_content.decode(encoding) - if soup: - if not check_content_type or content_type in SOUP_CONTENT_TYPES: - soup = bs4.BeautifulSoup(_decode_data(), parser) - return Response(response.status_code, soup, response_headers) + if request_obj.parse: + if (not request_obj.check_content_type or + content_type in SOUP_CONTENT_TYPES): + souped = bs4.BeautifulSoup(_decode_data(), request_obj.parser) + return Response(response.status_code, souped, response_headers) else: raise HTTPWrongContentTypeException( "Tried to soup non-html/non-xml data (%s)" % content_type) - if json and response_content: + if request_obj.json and response_content: data = _decode_data() try: return Response(response.status_code, _json.loads(data),