Return response code from utils.http.get_url when code=True and soup=True

This commit is contained in:
jesopo 2018-10-09 22:16:04 +01:00
parent 8842979205
commit f69a1ce7c1

View file

@ -16,6 +16,7 @@ def get_url(url, **kwargs):
post_params = kwargs.get("post_params", None) post_params = kwargs.get("post_params", None)
post_data = kwargs.get("post_data", None) post_data = kwargs.get("post_data", None)
headers = kwargs.get("headers", {}) headers = kwargs.get("headers", {})
return_code = kwargs.get("code", False)
if get_params: if get_params:
get_params = "?%s" % urllib.parse.urlencode(get_params) get_params = "?%s" % urllib.parse.urlencode(get_params)
@ -28,10 +29,11 @@ def get_url(url, **kwargs):
if post_data: if post_data:
post_data = post_data.encode("utf8") post_data = post_data.encode("utf8")
except UnicodeEncodeError: except UnicodeEncodeError:
if kwargs.get("code"): if return_code:
return 0, False return 0, False
return False return False
print(post_data)
request = urllib.request.Request(url, post_data) request = urllib.request.Request(url, post_data)
request.add_header("Accept-Language", "en-US") request.add_header("Accept-Language", "en-US")
request.add_header("User-Agent", USER_AGENT) request.add_header("User-Agent", USER_AGENT)
@ -43,7 +45,7 @@ def get_url(url, **kwargs):
response = urllib.request.urlopen(request, timeout=5) response = urllib.request.urlopen(request, timeout=5)
except urllib.error.HTTPError as e: except urllib.error.HTTPError as e:
traceback.print_exc() traceback.print_exc()
if kwargs.get("code"): if return_code:
return e.code, False return e.code, False
return False return False
except urllib.error.URLError as e: except urllib.error.URLError as e:
@ -53,14 +55,19 @@ def get_url(url, **kwargs):
return False return False
except ssl.CertificateError as e: except ssl.CertificateError as e:
traceback.print_exc() traceback.print_exc()
if kwargs.get("code"): if return_code:
return -1, False, return -1, False,
return False return False
response_content = response.read() response_content = response.read()
encoding = response.info().get_content_charset() encoding = response.info().get_content_charset()
if kwargs.get("soup"): if kwargs.get("soup"):
return bs4.BeautifulSoup(response_content, kwargs.get("parser", "lxml")) soup = bs4.BeautifulSoup(response_content, kwargs.get("parser",
"lxml"))
if return_code:
return response.code, soup
return soup
if not encoding: if not encoding:
soup = bs4.BeautifulSoup(response_content, kwargs.get("parser", "lxml")) soup = bs4.BeautifulSoup(response_content, kwargs.get("parser", "lxml"))
metas = soup.find_all("meta") metas = soup.find_all("meta")