2019-11-04 13:19:37 +00:00
|
|
|
import os.path, urllib.parse
|
2019-10-09 15:44:54 +00:00
|
|
|
import bs4
|
2019-09-15 09:43:46 +00:00
|
|
|
from src import IRCBot, utils
|
2019-10-15 14:13:28 +00:00
|
|
|
from . import ap_actor
|
2019-09-15 09:43:46 +00:00
|
|
|
|
|
|
|
LD_TYPE = ("application/ld+json; "
|
|
|
|
"profile=\"https://www.w3.org/ns/activitystreams\"")
|
|
|
|
JRD_TYPE = "application/jrd+json"
|
|
|
|
ACTIVITY_TYPE = "application/activity+json"
|
|
|
|
|
|
|
|
def split_username(s):
|
|
|
|
if s[0] == "@":
|
|
|
|
s = s[1:]
|
|
|
|
username, _, instance = s.partition("@")
|
|
|
|
if username and instance:
|
|
|
|
return username, instance
|
|
|
|
return None, None
|
|
|
|
|
2019-09-15 13:27:34 +00:00
|
|
|
def activity_request(url, data=None, method="GET", type=ACTIVITY_TYPE,
|
|
|
|
headers={}):
|
2019-09-15 09:43:46 +00:00
|
|
|
content_type = None
|
|
|
|
|
|
|
|
if method == "POST":
|
|
|
|
content_type = type
|
|
|
|
else:
|
|
|
|
headers = {"Accept": type}
|
|
|
|
|
2019-11-20 14:42:34 +00:00
|
|
|
request = utils.http.Request(url, headers=headers,
|
2019-11-25 18:17:30 +00:00
|
|
|
content_type=content_type, post_data=data, method=method,
|
2019-10-26 21:43:11 +00:00
|
|
|
json_body=True, fallback_encoding="utf8")
|
2019-09-17 16:41:15 +00:00
|
|
|
return utils.http.request(request)
|
2019-09-15 09:43:46 +00:00
|
|
|
|
|
|
|
HOSTMETA_TEMPLATE = "https://%s/.well-known/host-meta"
|
|
|
|
WEBFINGER_TEMPLATE = "https://%s/.well-known/webfinger?resource={uri}"
|
|
|
|
|
2019-11-14 10:53:34 +00:00
|
|
|
class FindActorException(Exception):
|
|
|
|
pass
|
|
|
|
|
2019-09-15 09:43:46 +00:00
|
|
|
def find_actor(username, instance):
|
|
|
|
hostmeta = HOSTMETA_TEMPLATE % instance
|
2019-11-26 11:35:56 +00:00
|
|
|
hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance)
|
2019-11-14 10:53:34 +00:00
|
|
|
try:
|
|
|
|
hostmeta = utils.http.request(hostmeta_request)
|
|
|
|
except:
|
|
|
|
raise FindActorException("Failed to get host-meta for %s" % instance)
|
2019-09-15 09:43:46 +00:00
|
|
|
|
|
|
|
webfinger_url = None
|
2019-11-14 10:53:34 +00:00
|
|
|
if hostmeta.code == 200:
|
2019-11-26 11:35:56 +00:00
|
|
|
for item in hostmeta.soup().find_all("link"):
|
2019-11-14 10:53:34 +00:00
|
|
|
if item["rel"] and item["rel"][0] == "lrdd":
|
|
|
|
webfinger_url = item["template"]
|
|
|
|
break
|
2019-09-15 09:43:46 +00:00
|
|
|
|
|
|
|
if not webfinger_url:
|
|
|
|
webfinger_url = WEBFINGER_TEMPLATE % instance
|
|
|
|
webfinger_url = webfinger_url.replace("{uri}",
|
|
|
|
"acct:%s@%s" % (username, instance), 1)
|
|
|
|
|
2019-11-14 10:53:34 +00:00
|
|
|
try:
|
|
|
|
webfinger = activity_request(webfinger_url, type=JRD_TYPE)
|
|
|
|
except:
|
|
|
|
raise FindActorException("Failed to get webfinger for %s" % instance)
|
2019-09-15 09:43:46 +00:00
|
|
|
|
|
|
|
actor_url = None
|
2019-11-14 10:53:34 +00:00
|
|
|
if webfinger.code == 200:
|
2019-11-25 18:17:30 +00:00
|
|
|
for link in webfinger.json()["links"]:
|
2019-11-14 10:53:34 +00:00
|
|
|
if link["type"] == ACTIVITY_TYPE:
|
|
|
|
return link["href"]
|
|
|
|
else:
|
|
|
|
raise FindActorException("Could not find user @%s@%s" %
|
|
|
|
(username, instance))
|
2019-09-15 09:43:46 +00:00
|
|
|
|
2019-10-14 09:46:15 +00:00
|
|
|
KNOWN_TAGS = ["p", "br"]
|
|
|
|
|
2019-11-04 11:18:34 +00:00
|
|
|
def _line(item):
|
|
|
|
if type(item) == bs4.element.Tag:
|
|
|
|
if item.name == "p":
|
|
|
|
out = ""
|
|
|
|
for subitem in item.children:
|
|
|
|
out += _line(subitem)
|
|
|
|
return "\n%s\n" % out
|
|
|
|
elif item.name == "br":
|
|
|
|
return "\n"
|
|
|
|
else:
|
|
|
|
return str(item)
|
|
|
|
|
2019-10-09 15:44:54 +00:00
|
|
|
def _normalise_note(content):
|
2019-10-31 15:17:39 +00:00
|
|
|
soup = bs4.BeautifulSoup(content, "lxml").body
|
2019-10-09 15:44:54 +00:00
|
|
|
lines = []
|
|
|
|
for element in soup.find_all():
|
2019-11-04 11:18:34 +00:00
|
|
|
if not element.name in KNOWN_TAGS:
|
2019-11-04 12:51:24 +00:00
|
|
|
if element.text.strip() == "":
|
|
|
|
element.decompose()
|
|
|
|
else:
|
|
|
|
element.unwrap()
|
2019-11-04 11:18:34 +00:00
|
|
|
|
|
|
|
out = ""
|
2019-10-15 15:44:28 +00:00
|
|
|
for element in soup.children:
|
2019-11-04 11:18:34 +00:00
|
|
|
out += _line(element)
|
|
|
|
|
|
|
|
return utils.parse.line_normalise(out)
|
2019-10-09 15:44:54 +00:00
|
|
|
|
2019-11-04 13:19:37 +00:00
|
|
|
def _content(note):
|
|
|
|
content = note.get("content", None)
|
|
|
|
attachment = note.get("attachment", [])
|
|
|
|
|
|
|
|
if note.get("content", None):
|
|
|
|
return _normalise_note(content)
|
|
|
|
elif attachment:
|
|
|
|
type = attachment[0]["mediaType"].split("/", 1)[0]
|
|
|
|
filename = os.path.basename(attachment[0]["url"])
|
|
|
|
|
|
|
|
extension = None
|
|
|
|
if "." in filename:
|
|
|
|
filename, extension = filename.rsplit(".", 1)
|
|
|
|
if len(filename) > 20:
|
|
|
|
filename = "%s[...]" % filename[:20]
|
|
|
|
|
|
|
|
if extension:
|
|
|
|
filename = "%s.%s" % (filename, extension)
|
|
|
|
else:
|
|
|
|
filename = "%s: %s" % (type, filename)
|
|
|
|
|
|
|
|
return "<%s>" % filename
|
|
|
|
|
2019-11-27 15:16:46 +00:00
|
|
|
def parse_note(actor, note, type="Create"):
|
2019-10-04 12:06:29 +00:00
|
|
|
if type == "Announce":
|
|
|
|
retoot_url = note
|
2019-09-16 09:51:59 +00:00
|
|
|
retoot_instance = urllib.parse.urlparse(retoot_url).hostname
|
2019-11-25 18:17:30 +00:00
|
|
|
retoot = activity_request(retoot_url).json()
|
|
|
|
retoot_url = retoot.get("url", retoot["id"])
|
2019-09-16 09:51:59 +00:00
|
|
|
|
2019-11-25 18:17:30 +00:00
|
|
|
original_tooter = ap_actor.Actor(retoot["attributedTo"])
|
2019-09-16 09:51:59 +00:00
|
|
|
original_tooter.load()
|
|
|
|
retooted_user = "@%s@%s" % (original_tooter.username, retoot_instance)
|
2019-11-25 18:17:30 +00:00
|
|
|
retoot_content = _content(retoot)
|
2019-09-16 09:51:59 +00:00
|
|
|
|
2019-11-27 15:16:46 +00:00
|
|
|
author = "%s (boost %s)" % (actor.username, retooted_user)
|
|
|
|
|
|
|
|
return (retoot.get("summary", None), author, retoot_content, retoot_url)
|
|
|
|
|
2019-09-16 09:51:59 +00:00
|
|
|
|
2019-10-04 12:06:29 +00:00
|
|
|
elif type == "Create":
|
2019-11-04 13:19:37 +00:00
|
|
|
content = _content(note)
|
2019-10-08 15:07:35 +00:00
|
|
|
url = note.get("url", note["id"])
|
2019-09-16 09:51:59 +00:00
|
|
|
|
2019-11-27 15:16:46 +00:00
|
|
|
return note.get("summary", None), actor.username, content, url
|
2019-09-16 09:51:59 +00:00
|
|
|
|
2019-11-27 15:16:46 +00:00
|
|
|
return None
|