only pull 'charset' from <meta> content-type when it definitely exists

2020-02-06 11:54:04 +00:00 · 2020-02-06 11:54:04 +00:00 · 01441efe75
commit 01441efe75
parent 5bbae499d1
1 changed files with 3 additions and 1 deletions
--- a/src/utils/http.py
+++ b/src/utils/http.py
@ -153,7 +153,9 @@ def _find_encoding(headers: typing.Dict[str, str], data: bytes
        meta_content_type = soup.findAll("meta",
            {"http-equiv": lambda v: (v or "").lower() == "content-type"})
        if meta_content_type:
-            return _split_content(meta_content_type[0].get("content"))["charset"]
+            meta_content = _split_content(meta_content_type[0].get("content"))
+            if "charset" in meta_content:
+                return meta_content["charset"]

    doctype = [item for item in soup.contents if isinstance(item,
        bs4.Doctype)] or None