remove parser
from utils.http.Request, add Request.soup()
This commit is contained in:
parent
93aea08818
commit
ed775ddbe3
5 changed files with 24 additions and 31 deletions
|
@ -9,11 +9,13 @@ class Module(ModuleManager.BaseModule):
|
||||||
@utils.kwarg("usage", "<acronym>")
|
@utils.kwarg("usage", "<acronym>")
|
||||||
def acronym(self, event):
|
def acronym(self, event):
|
||||||
query = event["args_split"][0].upper()
|
query = event["args_split"][0].upper()
|
||||||
response = utils.http.request(API % query, parse=True)
|
response = utils.http.request(API % query)
|
||||||
if response.data:
|
|
||||||
acronyms = []
|
acronyms = []
|
||||||
for element in response.data.find_all("acro"):
|
for element in response.soup().find_all("acro"):
|
||||||
acronyms.append(element.expan.string)
|
acronyms.append(element.expan.string)
|
||||||
|
|
||||||
|
if acronyms:
|
||||||
event["stdout"].write("%s: %s" % (query, ", ".join(acronyms)))
|
event["stdout"].write("%s: %s" % (query, ", ".join(acronyms)))
|
||||||
else:
|
else:
|
||||||
raise utils.EventResultsError()
|
raise utils.EventResultsError()
|
||||||
|
|
|
@ -10,15 +10,14 @@ class Module(ModuleManager.BaseModule):
|
||||||
@utils.hook("received.command.lua", min_args=1)
|
@utils.hook("received.command.lua", min_args=1)
|
||||||
def eval(self, event):
|
def eval(self, event):
|
||||||
try:
|
try:
|
||||||
page = utils.http.request(EVAL_URL,
|
page = utils.http.request(EVAL_URL, post_data=
|
||||||
post_data={"input": event["args"]},
|
{"input": event["args"]}, method="POST")
|
||||||
method="POST", parse=True)
|
|
||||||
except socket.timeout:
|
except socket.timeout:
|
||||||
raise utils.EventError("%s: eval timed out" %
|
raise utils.EventError("%s: eval timed out" %
|
||||||
event["user"].nickname)
|
event["user"].nickname)
|
||||||
|
|
||||||
if page:
|
if page:
|
||||||
textareas = page.data.find_all("textarea")
|
textareas = page.soup().find_all("textarea")
|
||||||
if len(textareas) > 1:
|
if len(textareas) > 1:
|
||||||
out = textareas[1].text.strip("\n")
|
out = textareas[1].text.strip("\n")
|
||||||
event["stdout"].write("%s: %s" % (event["user"].nickname, out))
|
event["stdout"].write("%s: %s" % (event["user"].nickname, out))
|
||||||
|
|
|
@ -38,8 +38,7 @@ class FindActorException(Exception):
|
||||||
|
|
||||||
def find_actor(username, instance):
|
def find_actor(username, instance):
|
||||||
hostmeta = HOSTMETA_TEMPLATE % instance
|
hostmeta = HOSTMETA_TEMPLATE % instance
|
||||||
hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance,
|
hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance)
|
||||||
parse=True, check_content_type=False)
|
|
||||||
try:
|
try:
|
||||||
hostmeta = utils.http.request(hostmeta_request)
|
hostmeta = utils.http.request(hostmeta_request)
|
||||||
except:
|
except:
|
||||||
|
@ -47,7 +46,7 @@ def find_actor(username, instance):
|
||||||
|
|
||||||
webfinger_url = None
|
webfinger_url = None
|
||||||
if hostmeta.code == 200:
|
if hostmeta.code == 200:
|
||||||
for item in hostmeta.data.find_all("link"):
|
for item in hostmeta.soup().find_all("link"):
|
||||||
if item["rel"] and item["rel"][0] == "lrdd":
|
if item["rel"] and item["rel"][0] == "lrdd":
|
||||||
webfinger_url = item["template"]
|
webfinger_url = item["template"]
|
||||||
break
|
break
|
||||||
|
|
|
@ -50,14 +50,16 @@ class Module(ModuleManager.BaseModule):
|
||||||
return -1, None
|
return -1, None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
page = utils.http.request(url, parse=True)
|
page = utils.http.request(url)
|
||||||
except utils.http.HTTPWrongContentTypeException:
|
|
||||||
return -1, None
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.log.error("failed to get URL title for %s: %s", [url, str(e)])
|
self.log.error("failed to get URL title for %s: %s", [url, str(e)])
|
||||||
return -1, None
|
return -1, None
|
||||||
|
|
||||||
if page.data.title:
|
if not page.content_type in utils.http.SOUP_CONTENT_TYPES:
|
||||||
|
return -1, None
|
||||||
|
page = page.soup()
|
||||||
|
|
||||||
|
if page.title:
|
||||||
title = utils.parse.line_normalise(page.data.title.text)
|
title = utils.parse.line_normalise(page.data.title.text)
|
||||||
if not title:
|
if not title:
|
||||||
return -3, None
|
return -3, None
|
||||||
|
|
|
@ -72,9 +72,7 @@ class Request(object):
|
||||||
|
|
||||||
allow_redirects: bool = True
|
allow_redirects: bool = True
|
||||||
check_content_type: bool = True
|
check_content_type: bool = True
|
||||||
parse: bool = False
|
|
||||||
detect_encoding: bool = True
|
detect_encoding: bool = True
|
||||||
parser: str = "lxml"
|
|
||||||
fallback_encoding: typing.Optional[str] = None
|
fallback_encoding: typing.Optional[str] = None
|
||||||
content_type: typing.Optional[str] = None
|
content_type: typing.Optional[str] = None
|
||||||
proxy: typing.Optional[str] = None
|
proxy: typing.Optional[str] = None
|
||||||
|
@ -126,8 +124,12 @@ class Response(object):
|
||||||
self.encoding = encoding
|
self.encoding = encoding
|
||||||
self.headers = headers
|
self.headers = headers
|
||||||
self.cookies = cookies
|
self.cookies = cookies
|
||||||
def json(self):
|
def decode(self) -> str:
|
||||||
|
return self.data
|
||||||
|
def json(self) -> typing.Any:
|
||||||
return _json.loads(self.data)
|
return _json.loads(self.data)
|
||||||
|
def soup(self, parser: str="lxml") -> bs4.BeautifulSoup:
|
||||||
|
return bs4.BeautifulSoup(self.decode(), parser)
|
||||||
|
|
||||||
def _meta_content(s: str) -> typing.Dict[str, str]:
|
def _meta_content(s: str) -> typing.Dict[str, str]:
|
||||||
out = {}
|
out = {}
|
||||||
|
@ -200,23 +202,12 @@ def _request(request_obj: Request) -> Response:
|
||||||
if (request_obj.detect_encoding and
|
if (request_obj.detect_encoding and
|
||||||
response.content_type and
|
response.content_type and
|
||||||
response.content_type in SOUP_CONTENT_TYPES):
|
response.content_type in SOUP_CONTENT_TYPES):
|
||||||
souped = bs4.BeautifulSoup(response.data, request_obj.parser)
|
souped = bs4.BeautifulSoup(response.data, "lxml")
|
||||||
encoding = _find_encoding(souped) or encoding
|
encoding = _find_encoding(souped) or encoding
|
||||||
|
|
||||||
def _decode_data():
|
def _decode_data():
|
||||||
return response.data.decode(encoding)
|
return response.data.decode(encoding)
|
||||||
|
|
||||||
if request_obj.parse:
|
|
||||||
if (not request_obj.check_content_type or
|
|
||||||
response.content_type in SOUP_CONTENT_TYPES):
|
|
||||||
souped = bs4.BeautifulSoup(_decode_data(), request_obj.parser)
|
|
||||||
response.data = souped
|
|
||||||
return response
|
|
||||||
else:
|
|
||||||
raise HTTPWrongContentTypeException(
|
|
||||||
"Tried to soup non-html/non-xml data (%s)" %
|
|
||||||
response.content_type)
|
|
||||||
|
|
||||||
if request_obj.json and response.data:
|
if request_obj.json and response.data:
|
||||||
data = _decode_data()
|
data = _decode_data()
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in a new issue