replace lxml usage with html5lib! the future is cool

This commit is contained in:
jesopo 2020-02-13 21:50:33 +00:00
parent 5c9e965d25
commit df38d7a57f
2 changed files with 4 additions and 3 deletions

View file

@ -3,6 +3,7 @@ cryptography==2.7
dataclasses==0.6 dataclasses==0.6
dnspython==1.16.0 dnspython==1.16.0
feedparser==5.2.1 feedparser==5.2.1
html5lib==1.0.1
lxml==4.4.1 lxml==4.4.1
netifaces==0.10.9 netifaces==0.10.9
PySocks==1.7.1 PySocks==1.7.1

View file

@ -127,7 +127,7 @@ class Response(object):
return self.data.decode(encoding or self.encoding) return self.data.decode(encoding or self.encoding)
def json(self) -> typing.Any: def json(self) -> typing.Any:
return _json.loads(self.data) return _json.loads(self.data)
def soup(self, parser: str="lxml") -> bs4.BeautifulSoup: def soup(self, parser: str="html5lib") -> bs4.BeautifulSoup:
return bs4.BeautifulSoup(self.decode(), parser) return bs4.BeautifulSoup(self.decode(), parser)
def _split_content(s: str) -> typing.Dict[str, str]: def _split_content(s: str) -> typing.Dict[str, str]:
@ -144,7 +144,7 @@ def _find_encoding(headers: typing.Dict[str, str], data: bytes
if "charset" in content_header: if "charset" in content_header:
return content_header["charset"] return content_header["charset"]
soup = bs4.BeautifulSoup(data, "lxml") soup = bs4.BeautifulSoup(data, "html5lib")
if not soup.meta == None: if not soup.meta == None:
meta_charset = soup.meta.get("charset") meta_charset = soup.meta.get("charset")
if not meta_charset == None: if not meta_charset == None:
@ -275,7 +275,7 @@ class Client(object):
request_many = request_many request_many = request_many
def strip_html(s: str) -> str: def strip_html(s: str) -> str:
return bs4.BeautifulSoup(s, "lxml").get_text() return bs4.BeautifulSoup(s, "html5lib").get_text()
def resolve_hostname(hostname: str) -> typing.List[str]: def resolve_hostname(hostname: str) -> typing.List[str]:
try: try: