From 0a67659637cc878b8e486cdcdefa0a9fba655319 Mon Sep 17 00:00:00 2001 From: jesopo Date: Mon, 9 Sep 2019 14:39:19 +0100 Subject: [PATCH] only look for -related tags when there are meta tags --- src/utils/http.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/utils/http.py b/src/utils/http.py index 9cfc70a1..232b1c6b 100644 --- a/src/utils/http.py +++ b/src/utils/http.py @@ -66,19 +66,21 @@ def _meta_content(s: str) -> typing.Dict[str, str]: return out def _find_encoding(soup: bs4.BeautifulSoup) -> typing.Optional[str]: - meta_charset = soup.meta.get("charset") - if not meta_charset == None: - return meta_charset - else: + if not soup.meta == None: + meta_charset = soup.meta.get("charset") + if not meta_charset == None: + return meta_charset + meta_content_type = soup.findAll("meta", {"http-equiv": lambda v: (v or "").lower() == "content-type"}) if meta_content_type: return _meta_content(meta_content_type[0].get("content"))["charset"] - else: - doctype = [item for item in soup.contents if isinstance(item, - bs4.Doctype)] or None - if doctype and doctype[0] == "html": - return "utf8" + + doctype = [item for item in soup.contents if isinstance(item, + bs4.Doctype)] or None + if doctype and doctype[0] == "html": + return "utf8" + return None def request(url: str, method: str="GET", get_params: dict={},