Add fallback_encoding to utils.http.get_url, in case a page has no implicit
encoding
This commit is contained in:
parent
1e25990ce7
commit
c655668bbe
1 changed files with 3 additions and 2 deletions
|
@ -19,7 +19,8 @@ def throw_timeout():
|
||||||
raise HTTPTimeoutException()
|
raise HTTPTimeoutException()
|
||||||
|
|
||||||
def get_url(url, method="GET", get_params={}, post_data=None, headers={},
|
def get_url(url, method="GET", get_params={}, post_data=None, headers={},
|
||||||
json_data=None, code=False, json=False, soup=False, parser="lxml"):
|
json_data=None, code=False, json=False, soup=False, parser="lxml",
|
||||||
|
fallback_encoding="utf8"):
|
||||||
|
|
||||||
if not urllib.parse.urlparse(url).scheme:
|
if not urllib.parse.urlparse(url).scheme:
|
||||||
url = "http://%s" % url
|
url = "http://%s" % url
|
||||||
|
@ -53,7 +54,7 @@ def get_url(url, method="GET", get_params={}, post_data=None, headers={},
|
||||||
return response.code, soup
|
return response.code, soup
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
data = response_content.decode(response.encoding)
|
data = response_content.decode(response.encoding or fallback_encoding)
|
||||||
if json and data:
|
if json and data:
|
||||||
try:
|
try:
|
||||||
data = _json.loads(data)
|
data = _json.loads(data)
|
||||||
|
|
Loading…
Reference in a new issue