explicitly use "lxml" for finding page encoding

This commit is contained in:
jesopo 2019-11-26 14:34:48 +00:00
parent 1d41bc7c34
commit e4a5bd01e9

View file

@ -137,7 +137,7 @@ def _meta_content(s: str) -> typing.Dict[str, str]:
return out return out
def _find_encoding(data: bytes) -> typing.Optional[str]: def _find_encoding(data: bytes) -> typing.Optional[str]:
soup = bs4.BeautifulSoup(data) soup = bs4.BeautifulSoup(data, "lxml")
if not soup.meta == None: if not soup.meta == None:
meta_charset = soup.meta.get("charset") meta_charset = soup.meta.get("charset")
if not meta_charset == None: if not meta_charset == None: