From c655668bbec83445776928d2be3eabcdd9cf0028 Mon Sep 17 00:00:00 2001 From: jesopo Date: Wed, 10 Oct 2018 23:49:42 +0100 Subject: [PATCH] Add fallback_encoding to utils.http.get_url, in case a page has no implicit encoding --- src/utils/http.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/utils/http.py b/src/utils/http.py index d8b96dc7..a91808ed 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -19,7 +19,8 @@ def throw_timeout(): raise HTTPTimeoutException() def get_url(url, method="GET", get_params={}, post_data=None, headers={}, - json_data=None, code=False, json=False, soup=False, parser="lxml"): + json_data=None, code=False, json=False, soup=False, parser="lxml", + fallback_encoding="utf8"): if not urllib.parse.urlparse(url).scheme: url = "http://%s" % url @@ -53,7 +54,7 @@ def get_url(url, method="GET", get_params={}, post_data=None, headers={}, return response.code, soup return soup - data = response_content.decode(response.encoding) + data = response_content.decode(response.encoding or fallback_encoding) if json and data: try: data = _json.loads(data)