only look for <meta>-related tags when there are meta tags
This commit is contained in:
parent
8e824c9277
commit
0a67659637
1 changed files with 11 additions and 9 deletions
|
@ -66,19 +66,21 @@ def _meta_content(s: str) -> typing.Dict[str, str]:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def _find_encoding(soup: bs4.BeautifulSoup) -> typing.Optional[str]:
|
def _find_encoding(soup: bs4.BeautifulSoup) -> typing.Optional[str]:
|
||||||
meta_charset = soup.meta.get("charset")
|
if not soup.meta == None:
|
||||||
if not meta_charset == None:
|
meta_charset = soup.meta.get("charset")
|
||||||
return meta_charset
|
if not meta_charset == None:
|
||||||
else:
|
return meta_charset
|
||||||
|
|
||||||
meta_content_type = soup.findAll("meta",
|
meta_content_type = soup.findAll("meta",
|
||||||
{"http-equiv": lambda v: (v or "").lower() == "content-type"})
|
{"http-equiv": lambda v: (v or "").lower() == "content-type"})
|
||||||
if meta_content_type:
|
if meta_content_type:
|
||||||
return _meta_content(meta_content_type[0].get("content"))["charset"]
|
return _meta_content(meta_content_type[0].get("content"))["charset"]
|
||||||
else:
|
|
||||||
doctype = [item for item in soup.contents if isinstance(item,
|
doctype = [item for item in soup.contents if isinstance(item,
|
||||||
bs4.Doctype)] or None
|
bs4.Doctype)] or None
|
||||||
if doctype and doctype[0] == "html":
|
if doctype and doctype[0] == "html":
|
||||||
return "utf8"
|
return "utf8"
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def request(url: str, method: str="GET", get_params: dict={},
|
def request(url: str, method: str="GET", get_params: dict={},
|
||||||
|
|
Loading…
Reference in a new issue