remove parser
from utils.http.Request, add Request.soup()
This commit is contained in:
parent
93aea08818
commit
ed775ddbe3
5 changed files with 24 additions and 31 deletions
|
@ -9,11 +9,13 @@ class Module(ModuleManager.BaseModule):
|
|||
@utils.kwarg("usage", "<acronym>")
|
||||
def acronym(self, event):
|
||||
query = event["args_split"][0].upper()
|
||||
response = utils.http.request(API % query, parse=True)
|
||||
if response.data:
|
||||
response = utils.http.request(API % query)
|
||||
|
||||
acronyms = []
|
||||
for element in response.data.find_all("acro"):
|
||||
for element in response.soup().find_all("acro"):
|
||||
acronyms.append(element.expan.string)
|
||||
|
||||
if acronyms:
|
||||
event["stdout"].write("%s: %s" % (query, ", ".join(acronyms)))
|
||||
else:
|
||||
raise utils.EventResultsError()
|
||||
|
|
|
@ -10,15 +10,14 @@ class Module(ModuleManager.BaseModule):
|
|||
@utils.hook("received.command.lua", min_args=1)
|
||||
def eval(self, event):
|
||||
try:
|
||||
page = utils.http.request(EVAL_URL,
|
||||
post_data={"input": event["args"]},
|
||||
method="POST", parse=True)
|
||||
page = utils.http.request(EVAL_URL, post_data=
|
||||
{"input": event["args"]}, method="POST")
|
||||
except socket.timeout:
|
||||
raise utils.EventError("%s: eval timed out" %
|
||||
event["user"].nickname)
|
||||
|
||||
if page:
|
||||
textareas = page.data.find_all("textarea")
|
||||
textareas = page.soup().find_all("textarea")
|
||||
if len(textareas) > 1:
|
||||
out = textareas[1].text.strip("\n")
|
||||
event["stdout"].write("%s: %s" % (event["user"].nickname, out))
|
||||
|
|
|
@ -38,8 +38,7 @@ class FindActorException(Exception):
|
|||
|
||||
def find_actor(username, instance):
|
||||
hostmeta = HOSTMETA_TEMPLATE % instance
|
||||
hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance,
|
||||
parse=True, check_content_type=False)
|
||||
hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance)
|
||||
try:
|
||||
hostmeta = utils.http.request(hostmeta_request)
|
||||
except:
|
||||
|
@ -47,7 +46,7 @@ def find_actor(username, instance):
|
|||
|
||||
webfinger_url = None
|
||||
if hostmeta.code == 200:
|
||||
for item in hostmeta.data.find_all("link"):
|
||||
for item in hostmeta.soup().find_all("link"):
|
||||
if item["rel"] and item["rel"][0] == "lrdd":
|
||||
webfinger_url = item["template"]
|
||||
break
|
||||
|
|
|
@ -50,14 +50,16 @@ class Module(ModuleManager.BaseModule):
|
|||
return -1, None
|
||||
|
||||
try:
|
||||
page = utils.http.request(url, parse=True)
|
||||
except utils.http.HTTPWrongContentTypeException:
|
||||
return -1, None
|
||||
page = utils.http.request(url)
|
||||
except Exception as e:
|
||||
self.log.error("failed to get URL title for %s: %s", [url, str(e)])
|
||||
return -1, None
|
||||
|
||||
if page.data.title:
|
||||
if not page.content_type in utils.http.SOUP_CONTENT_TYPES:
|
||||
return -1, None
|
||||
page = page.soup()
|
||||
|
||||
if page.title:
|
||||
title = utils.parse.line_normalise(page.data.title.text)
|
||||
if not title:
|
||||
return -3, None
|
||||
|
|
|
@ -72,9 +72,7 @@ class Request(object):
|
|||
|
||||
allow_redirects: bool = True
|
||||
check_content_type: bool = True
|
||||
parse: bool = False
|
||||
detect_encoding: bool = True
|
||||
parser: str = "lxml"
|
||||
fallback_encoding: typing.Optional[str] = None
|
||||
content_type: typing.Optional[str] = None
|
||||
proxy: typing.Optional[str] = None
|
||||
|
@ -126,8 +124,12 @@ class Response(object):
|
|||
self.encoding = encoding
|
||||
self.headers = headers
|
||||
self.cookies = cookies
|
||||
def json(self):
|
||||
def decode(self) -> str:
|
||||
return self.data
|
||||
def json(self) -> typing.Any:
|
||||
return _json.loads(self.data)
|
||||
def soup(self, parser: str="lxml") -> bs4.BeautifulSoup:
|
||||
return bs4.BeautifulSoup(self.decode(), parser)
|
||||
|
||||
def _meta_content(s: str) -> typing.Dict[str, str]:
|
||||
out = {}
|
||||
|
@ -200,23 +202,12 @@ def _request(request_obj: Request) -> Response:
|
|||
if (request_obj.detect_encoding and
|
||||
response.content_type and
|
||||
response.content_type in SOUP_CONTENT_TYPES):
|
||||
souped = bs4.BeautifulSoup(response.data, request_obj.parser)
|
||||
souped = bs4.BeautifulSoup(response.data, "lxml")
|
||||
encoding = _find_encoding(souped) or encoding
|
||||
|
||||
def _decode_data():
|
||||
return response.data.decode(encoding)
|
||||
|
||||
if request_obj.parse:
|
||||
if (not request_obj.check_content_type or
|
||||
response.content_type in SOUP_CONTENT_TYPES):
|
||||
souped = bs4.BeautifulSoup(_decode_data(), request_obj.parser)
|
||||
response.data = souped
|
||||
return response
|
||||
else:
|
||||
raise HTTPWrongContentTypeException(
|
||||
"Tried to soup non-html/non-xml data (%s)" %
|
||||
response.content_type)
|
||||
|
||||
if request_obj.json and response.data:
|
||||
data = _decode_data()
|
||||
try:
|
||||
|
|
Loading…
Reference in a new issue