remove parser from utils.http.Request, add Request.soup()

This commit is contained in:
jesopo 2019-11-26 11:35:56 +00:00
parent 93aea08818
commit ed775ddbe3
5 changed files with 24 additions and 31 deletions

View file

@ -9,11 +9,13 @@ class Module(ModuleManager.BaseModule):
@utils.kwarg("usage", "<acronym>") @utils.kwarg("usage", "<acronym>")
def acronym(self, event): def acronym(self, event):
query = event["args_split"][0].upper() query = event["args_split"][0].upper()
response = utils.http.request(API % query, parse=True) response = utils.http.request(API % query)
if response.data:
acronyms = [] acronyms = []
for element in response.data.find_all("acro"): for element in response.soup().find_all("acro"):
acronyms.append(element.expan.string) acronyms.append(element.expan.string)
if acronyms:
event["stdout"].write("%s: %s" % (query, ", ".join(acronyms))) event["stdout"].write("%s: %s" % (query, ", ".join(acronyms)))
else: else:
raise utils.EventResultsError() raise utils.EventResultsError()

View file

@ -10,15 +10,14 @@ class Module(ModuleManager.BaseModule):
@utils.hook("received.command.lua", min_args=1) @utils.hook("received.command.lua", min_args=1)
def eval(self, event): def eval(self, event):
try: try:
page = utils.http.request(EVAL_URL, page = utils.http.request(EVAL_URL, post_data=
post_data={"input": event["args"]}, {"input": event["args"]}, method="POST")
method="POST", parse=True)
except socket.timeout: except socket.timeout:
raise utils.EventError("%s: eval timed out" % raise utils.EventError("%s: eval timed out" %
event["user"].nickname) event["user"].nickname)
if page: if page:
textareas = page.data.find_all("textarea") textareas = page.soup().find_all("textarea")
if len(textareas) > 1: if len(textareas) > 1:
out = textareas[1].text.strip("\n") out = textareas[1].text.strip("\n")
event["stdout"].write("%s: %s" % (event["user"].nickname, out)) event["stdout"].write("%s: %s" % (event["user"].nickname, out))

View file

@ -38,8 +38,7 @@ class FindActorException(Exception):
def find_actor(username, instance): def find_actor(username, instance):
hostmeta = HOSTMETA_TEMPLATE % instance hostmeta = HOSTMETA_TEMPLATE % instance
hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance, hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance)
parse=True, check_content_type=False)
try: try:
hostmeta = utils.http.request(hostmeta_request) hostmeta = utils.http.request(hostmeta_request)
except: except:
@ -47,7 +46,7 @@ def find_actor(username, instance):
webfinger_url = None webfinger_url = None
if hostmeta.code == 200: if hostmeta.code == 200:
for item in hostmeta.data.find_all("link"): for item in hostmeta.soup().find_all("link"):
if item["rel"] and item["rel"][0] == "lrdd": if item["rel"] and item["rel"][0] == "lrdd":
webfinger_url = item["template"] webfinger_url = item["template"]
break break

View file

@ -50,14 +50,16 @@ class Module(ModuleManager.BaseModule):
return -1, None return -1, None
try: try:
page = utils.http.request(url, parse=True) page = utils.http.request(url)
except utils.http.HTTPWrongContentTypeException:
return -1, None
except Exception as e: except Exception as e:
self.log.error("failed to get URL title for %s: %s", [url, str(e)]) self.log.error("failed to get URL title for %s: %s", [url, str(e)])
return -1, None return -1, None
if page.data.title: if not page.content_type in utils.http.SOUP_CONTENT_TYPES:
return -1, None
page = page.soup()
if page.title:
title = utils.parse.line_normalise(page.data.title.text) title = utils.parse.line_normalise(page.data.title.text)
if not title: if not title:
return -3, None return -3, None

View file

@ -72,9 +72,7 @@ class Request(object):
allow_redirects: bool = True allow_redirects: bool = True
check_content_type: bool = True check_content_type: bool = True
parse: bool = False
detect_encoding: bool = True detect_encoding: bool = True
parser: str = "lxml"
fallback_encoding: typing.Optional[str] = None fallback_encoding: typing.Optional[str] = None
content_type: typing.Optional[str] = None content_type: typing.Optional[str] = None
proxy: typing.Optional[str] = None proxy: typing.Optional[str] = None
@ -126,8 +124,12 @@ class Response(object):
self.encoding = encoding self.encoding = encoding
self.headers = headers self.headers = headers
self.cookies = cookies self.cookies = cookies
def json(self): def decode(self) -> str:
return self.data
def json(self) -> typing.Any:
return _json.loads(self.data) return _json.loads(self.data)
def soup(self, parser: str="lxml") -> bs4.BeautifulSoup:
return bs4.BeautifulSoup(self.decode(), parser)
def _meta_content(s: str) -> typing.Dict[str, str]: def _meta_content(s: str) -> typing.Dict[str, str]:
out = {} out = {}
@ -200,23 +202,12 @@ def _request(request_obj: Request) -> Response:
if (request_obj.detect_encoding and if (request_obj.detect_encoding and
response.content_type and response.content_type and
response.content_type in SOUP_CONTENT_TYPES): response.content_type in SOUP_CONTENT_TYPES):
souped = bs4.BeautifulSoup(response.data, request_obj.parser) souped = bs4.BeautifulSoup(response.data, "lxml")
encoding = _find_encoding(souped) or encoding encoding = _find_encoding(souped) or encoding
def _decode_data(): def _decode_data():
return response.data.decode(encoding) return response.data.decode(encoding)
if request_obj.parse:
if (not request_obj.check_content_type or
response.content_type in SOUP_CONTENT_TYPES):
souped = bs4.BeautifulSoup(_decode_data(), request_obj.parser)
response.data = souped
return response
else:
raise HTTPWrongContentTypeException(
"Tried to soup non-html/non-xml data (%s)" %
response.content_type)
if request_obj.json and response.data: if request_obj.json and response.data:
data = _decode_data() data = _decode_data()
try: try: