replace lxml usage with html5lib! the future is cool
This commit is contained in:
parent
5c9e965d25
commit
df38d7a57f
2 changed files with 4 additions and 3 deletions
|
@ -3,6 +3,7 @@ cryptography==2.7
|
||||||
dataclasses==0.6
|
dataclasses==0.6
|
||||||
dnspython==1.16.0
|
dnspython==1.16.0
|
||||||
feedparser==5.2.1
|
feedparser==5.2.1
|
||||||
|
html5lib==1.0.1
|
||||||
lxml==4.4.1
|
lxml==4.4.1
|
||||||
netifaces==0.10.9
|
netifaces==0.10.9
|
||||||
PySocks==1.7.1
|
PySocks==1.7.1
|
||||||
|
|
|
@ -127,7 +127,7 @@ class Response(object):
|
||||||
return self.data.decode(encoding or self.encoding)
|
return self.data.decode(encoding or self.encoding)
|
||||||
def json(self) -> typing.Any:
|
def json(self) -> typing.Any:
|
||||||
return _json.loads(self.data)
|
return _json.loads(self.data)
|
||||||
def soup(self, parser: str="lxml") -> bs4.BeautifulSoup:
|
def soup(self, parser: str="html5lib") -> bs4.BeautifulSoup:
|
||||||
return bs4.BeautifulSoup(self.decode(), parser)
|
return bs4.BeautifulSoup(self.decode(), parser)
|
||||||
|
|
||||||
def _split_content(s: str) -> typing.Dict[str, str]:
|
def _split_content(s: str) -> typing.Dict[str, str]:
|
||||||
|
@ -144,7 +144,7 @@ def _find_encoding(headers: typing.Dict[str, str], data: bytes
|
||||||
if "charset" in content_header:
|
if "charset" in content_header:
|
||||||
return content_header["charset"]
|
return content_header["charset"]
|
||||||
|
|
||||||
soup = bs4.BeautifulSoup(data, "lxml")
|
soup = bs4.BeautifulSoup(data, "html5lib")
|
||||||
if not soup.meta == None:
|
if not soup.meta == None:
|
||||||
meta_charset = soup.meta.get("charset")
|
meta_charset = soup.meta.get("charset")
|
||||||
if not meta_charset == None:
|
if not meta_charset == None:
|
||||||
|
@ -275,7 +275,7 @@ class Client(object):
|
||||||
request_many = request_many
|
request_many = request_many
|
||||||
|
|
||||||
def strip_html(s: str) -> str:
|
def strip_html(s: str) -> str:
|
||||||
return bs4.BeautifulSoup(s, "lxml").get_text()
|
return bs4.BeautifulSoup(s, "html5lib").get_text()
|
||||||
|
|
||||||
def resolve_hostname(hostname: str) -> typing.List[str]:
|
def resolve_hostname(hostname: str) -> typing.List[str]:
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in a new issue