explicitly use "lxml" for finding page encoding

2019-11-26 14:34:48 +00:00 · 2019-11-26 14:34:48 +00:00 · e4a5bd01e9
commit e4a5bd01e9
parent 1d41bc7c34
1 changed files with 1 additions and 1 deletions
--- a/src/utils/http.py
+++ b/src/utils/http.py
@ -137,7 +137,7 @@ def _meta_content(s: str) -> typing.Dict[str, str]:
    return out

 def _find_encoding(data: bytes) -> typing.Optional[str]:
-    soup = bs4.BeautifulSoup(data)
+    soup = bs4.BeautifulSoup(data, "lxml")
    if not soup.meta == None:
        meta_charset = soup.meta.get("charset")
        if not meta_charset == None: