bitbot-3.11-fork/modules/fediverse/ap_utils.py

import os.path, urllib.parse
import bs4
from src import IRCBot, utils
from . import ap_actor

LD_TYPE = ("application/ld+json; "
    "profile=\"https://www.w3.org/ns/activitystreams\"")
JRD_TYPE = "application/jrd+json"
ACTIVITY_TYPE = "application/activity+json"

def split_username(s):
    if s[0] == "@":
        s = s[1:]
    username, _, instance = s.partition("@")
    if username and instance:
        return username, instance
    return None, None

def activity_request(url, data=None, method="GET", type=ACTIVITY_TYPE,
        headers={}):
    content_type = None

    if method == "POST":
        content_type = type
    else:
        headers = {"Accept": type}

    request = utils.http.Request(url, headers=headers,
        content_type=content_type, post_data=data, method=method,
        json_body=True, fallback_encoding="utf8")
    return utils.http.request(request)

HOSTMETA_TEMPLATE = "https://%s/.well-known/host-meta"
WEBFINGER_TEMPLATE = "https://%s/.well-known/webfinger?resource={uri}"

class FindActorException(Exception):
    pass

def find_actor(username, instance):
    hostmeta = HOSTMETA_TEMPLATE % instance
    hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance)
    try:
        hostmeta = utils.http.request(hostmeta_request)
    except:
        raise FindActorException("Failed to get host-meta for %s" % instance)

    webfinger_url = None
    if hostmeta.code == 200:
        for item in hostmeta.soup().find_all("link"):
            if item["rel"] and item["rel"][0] == "lrdd":
                webfinger_url = item["template"]
                break

    if not webfinger_url:
        webfinger_url = WEBFINGER_TEMPLATE % instance
    webfinger_url = webfinger_url.replace("{uri}",
        "acct:%s@%s" % (username, instance), 1)

    try:
        webfinger = activity_request(webfinger_url, type=JRD_TYPE)
    except:
        raise FindActorException("Failed to get webfinger for %s" % instance)

    actor_url = None
    if webfinger.code == 200:
        for link in webfinger.json()["links"]:
            if link["type"] == ACTIVITY_TYPE:
                return link["href"]
    else:
        raise FindActorException("Could not find user @%s@%s" %
            (username, instance))

KNOWN_TAGS = ["p", "br"]

def _line(item):
    if type(item) == bs4.element.Tag:
        if item.name == "p":
            out = ""
            for subitem in item.children:
                out += _line(subitem)
            return "\n%s\n" % out
        elif item.name == "br":
            return "\n"
    else:
        return str(item)

def _normalise_note(content):
    soup = bs4.BeautifulSoup(content, "lxml").body
    lines = []
    for element in soup.find_all():
        if not element.name in KNOWN_TAGS:
            if element.text.strip() == "":
                element.decompose()
            else:
                element.unwrap()

    out = ""
    for element in soup.children:
        out += _line(element)

    return utils.parse.line_normalise(out)

def _content(note):
    content = note.get("content", None)
    attachment = note.get("attachment", [])

    if note.get("content", None):
        return _normalise_note(content)
    elif attachment:
        type = attachment[0]["mediaType"].split("/", 1)[0]
        filename = os.path.basename(attachment[0]["url"])

        extension = None
        if "." in filename:
            filename, extension = filename.rsplit(".", 1)
        if len(filename) > 20:
            filename = "%s[...]" % filename[:20]

        if extension:
            filename = "%s.%s" % (filename, extension)
        else:
            filename = "%s: %s" % (type, filename)

        return "<%s>" % filename

def parse_note(actor, note, type="Create"):
    if type == "Announce":
        retoot_url = note
        retoot_instance = urllib.parse.urlparse(retoot_url).hostname
        retoot = activity_request(retoot_url).json()
        retoot_url = retoot.get("url", retoot["id"])

        original_tooter = ap_actor.Actor(retoot["attributedTo"])
        original_tooter.load()
        retooted_user = "@%s@%s" % (original_tooter.username, retoot_instance)
        retoot_content = _content(retoot)

        author = "%s (boost %s)" % (actor.display_name, retooted_user)

        return (retoot.get("summary", None), author, retoot_content, retoot_url)


    elif type == "Create":
        content = _content(note)
        url = note.get("url", note["id"])

        return note.get("summary", None), actor.display_name, content, url

    return None
show when a fediverse Note is nothing but an attachment 2019-11-04 13:19:37 +00:00			`import os.path, urllib.parse`
format multi-line toots on a single line using things like double space 2019-10-09 15:44:54 +00:00			`import bs4`
re-merge fediverse an fediverse_server, so they can share utils 2019-09-15 09:43:46 +00:00			`from src import IRCBot, utils`
show `first` toot if it's a boost, fix Note boost formatting 2019-10-15 14:13:28 +00:00			`from . import ap_actor`
re-merge fediverse an fediverse_server, so they can share utils 2019-09-15 09:43:46 +00:00
			`LD_TYPE = ("application/ld+json; "`
			`"profile=\"https://www.w3.org/ns/activitystreams\"")`
			`JRD_TYPE = "application/jrd+json"`
			`ACTIVITY_TYPE = "application/activity+json"`

			`def split_username(s):`
			`if s[0] == "@":`
			`s = s[1:]`
			`username, _, instance = s.partition("@")`
			`if username and instance:`
			`return username, instance`
			`return None, None`

actually pass signature and related headers to activity_request() 2019-09-15 13:27:34 +00:00			`def activity_request(url, data=None, method="GET", type=ACTIVITY_TYPE,`
			`headers={}):`
re-merge fediverse an fediverse_server, so they can share utils 2019-09-15 09:43:46 +00:00			`content_type = None`

			`if method == "POST":`
			`content_type = type`
			`else:`
			`headers = {"Accept": type}`

give bitbot a unique User-Agent closes #206 2019-11-20 14:42:34 +00:00			`request = utils.http.Request(url, headers=headers,`
add `cookies` and `.json()` to utils.http.Response objects 2019-11-25 18:17:30 +00:00			`content_type=content_type, post_data=data, method=method,`
fallback ActivityPub data encoding to utf8 2019-10-26 21:43:11 +00:00			`json_body=True, fallback_encoding="utf8")`
pass full Response from ap_utils.activity_request, use it for Actor 404 2019-09-17 16:41:15 +00:00			`return utils.http.request(request)`
re-merge fediverse an fediverse_server, so they can share utils 2019-09-15 09:43:46 +00:00
			`HOSTMETA_TEMPLATE = "https://%s/.well-known/host-meta"`
			`WEBFINGER_TEMPLATE = "https://%s/.well-known/webfinger?resource={uri}"`

Show more fediverse errors to the end user 2019-11-14 10:53:34 +00:00			`class FindActorException(Exception):`
			`pass`

re-merge fediverse an fediverse_server, so they can share utils 2019-09-15 09:43:46 +00:00			`def find_actor(username, instance):`
			`hostmeta = HOSTMETA_TEMPLATE % instance`
remove `parser` from utils.http.Request, add Request.soup() 2019-11-26 11:35:56 +00:00			`hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance)`
Show more fediverse errors to the end user 2019-11-14 10:53:34 +00:00			`try:`
			`hostmeta = utils.http.request(hostmeta_request)`
			`except:`
			`raise FindActorException("Failed to get host-meta for %s" % instance)`
re-merge fediverse an fediverse_server, so they can share utils 2019-09-15 09:43:46 +00:00
			`webfinger_url = None`
Show more fediverse errors to the end user 2019-11-14 10:53:34 +00:00			`if hostmeta.code == 200:`
remove `parser` from utils.http.Request, add Request.soup() 2019-11-26 11:35:56 +00:00			`for item in hostmeta.soup().find_all("link"):`
Show more fediverse errors to the end user 2019-11-14 10:53:34 +00:00			`if item["rel"] and item["rel"][0] == "lrdd":`
			`webfinger_url = item["template"]`
			`break`
re-merge fediverse an fediverse_server, so they can share utils 2019-09-15 09:43:46 +00:00
			`if not webfinger_url:`
			`webfinger_url = WEBFINGER_TEMPLATE % instance`
			`webfinger_url = webfinger_url.replace("{uri}",`
			`"acct:%s@%s" % (username, instance), 1)`

Show more fediverse errors to the end user 2019-11-14 10:53:34 +00:00			`try:`
			`webfinger = activity_request(webfinger_url, type=JRD_TYPE)`
			`except:`
			`raise FindActorException("Failed to get webfinger for %s" % instance)`
re-merge fediverse an fediverse_server, so they can share utils 2019-09-15 09:43:46 +00:00
			`actor_url = None`
Show more fediverse errors to the end user 2019-11-14 10:53:34 +00:00			`if webfinger.code == 200:`
add `cookies` and `.json()` to utils.http.Response objects 2019-11-25 18:17:30 +00:00			`for link in webfinger.json()["links"]:`
Show more fediverse errors to the end user 2019-11-14 10:53:34 +00:00			`if link["type"] == ACTIVITY_TYPE:`
			`return link["href"]`
			`else:`
			`raise FindActorException("Could not find user @%s@%s" %`
			`(username, instance))`
re-merge fediverse an fediverse_server, so they can share utils 2019-09-15 09:43:46 +00:00
actually explicitly strip "unknown" html tags from fedi notes 2019-10-14 09:46:15 +00:00			`KNOWN_TAGS = ["p", "br"]`

better line normalisation for fediverse Activities 2019-11-04 11:18:34 +00:00			`def _line(item):`
			`if type(item) == bs4.element.Tag:`
			`if item.name == "p":`
			`out = ""`
			`for subitem in item.children:`
			`out += _line(subitem)`
			`return "\n%s\n" % out`
			`elif item.name == "br":`
			`return "\n"`
			`else:`
			`return str(item)`

format multi-line toots on a single line using things like double space 2019-10-09 15:44:54 +00:00			`def _normalise_note(content):`
use lxml for parsing fedi Notes. html.parse is bad and unpredictable 2019-10-31 15:17:39 +00:00			`soup = bs4.BeautifulSoup(content, "lxml").body`
format multi-line toots on a single line using things like double space 2019-10-09 15:44:54 +00:00			`lines = []`
			`for element in soup.find_all():`
better line normalisation for fediverse Activities 2019-11-04 11:18:34 +00:00			`if not element.name in KNOWN_TAGS:`
only .decompose() when it's an empty and unknown tag 2019-11-04 12:51:24 +00:00			`if element.text.strip() == "":`
			`element.decompose()`
			`else:`
			`element.unwrap()`
better line normalisation for fediverse Activities 2019-11-04 11:18:34 +00:00
			`out = ""`
support AP Notes with content outside of HTML tags (e.g. pleroma) 2019-10-15 15:44:28 +00:00			`for element in soup.children:`
better line normalisation for fediverse Activities 2019-11-04 11:18:34 +00:00			`out += _line(element)`

			`return utils.parse.line_normalise(out)`
format multi-line toots on a single line using things like double space 2019-10-09 15:44:54 +00:00
show when a fediverse Note is nothing but an attachment 2019-11-04 13:19:37 +00:00			`def _content(note):`
			`content = note.get("content", None)`
			`attachment = note.get("attachment", [])`

			`if note.get("content", None):`
			`return _normalise_note(content)`
			`elif attachment:`
			`type = attachment[0]["mediaType"].split("/", 1)[0]`
			`filename = os.path.basename(attachment[0]["url"])`

			`extension = None`
			`if "." in filename:`
			`filename, extension = filename.rsplit(".", 1)`
			`if len(filename) > 20:`
			`filename = "%s[...]" % filename[:20]`

			`if extension:`
			`filename = "%s.%s" % (filename, extension)`
			`else:`
			`filename = "%s: %s" % (type, filename)`

			`return "<%s>" % filename`

show username when a toot is CWed 2019-11-27 15:16:46 +00:00			`def parse_note(actor, note, type="Create"):`
support !fedi with a URL to a Note 2019-10-04 12:06:29 +00:00			`if type == "Announce":`
			`retoot_url = note`
refactor AP Note stringifying out to ap_utils.py 2019-09-16 09:51:59 +00:00			`retoot_instance = urllib.parse.urlparse(retoot_url).hostname`
add `cookies` and `.json()` to utils.http.Response objects 2019-11-25 18:17:30 +00:00			`retoot = activity_request(retoot_url).json()`
			`retoot_url = retoot.get("url", retoot["id"])`
refactor AP Note stringifying out to ap_utils.py 2019-09-16 09:51:59 +00:00
add `cookies` and `.json()` to utils.http.Response objects 2019-11-25 18:17:30 +00:00			`original_tooter = ap_actor.Actor(retoot["attributedTo"])`
refactor AP Note stringifying out to ap_utils.py 2019-09-16 09:51:59 +00:00			`original_tooter.load()`
retooted account should use username, not display name 2019-12-12 05:35:45 +00:00			`retooted_user = "@%s@%s" % (original_tooter.username, retoot_instance)`
add `cookies` and `.json()` to utils.http.Response objects 2019-11-25 18:17:30 +00:00			`retoot_content = _content(retoot)`
refactor AP Note stringifying out to ap_utils.py 2019-09-16 09:51:59 +00:00
use fediverse display names when available 2019-12-10 11:45:29 +00:00			`author = "%s (boost %s)" % (actor.display_name, retooted_user)`
show username when a toot is CWed 2019-11-27 15:16:46 +00:00
			`return (retoot.get("summary", None), author, retoot_content, retoot_url)`

refactor AP Note stringifying out to ap_utils.py 2019-09-16 09:51:59 +00:00
support !fedi with a URL to a Note 2019-10-04 12:06:29 +00:00			`elif type == "Create":`
show when a fediverse Note is nothing but an attachment 2019-11-04 13:19:37 +00:00			`content = _content(note)`
prefer `url` over `id` as it's usually a more user-friendly url 2019-10-08 15:07:35 +00:00			`url = note.get("url", note["id"])`
refactor AP Note stringifying out to ap_utils.py 2019-09-16 09:51:59 +00:00
use fediverse display names when available 2019-12-10 11:45:29 +00:00			`return note.get("summary", None), actor.display_name, content, url`
refactor AP Note stringifying out to ap_utils.py 2019-09-16 09:51:59 +00:00
show username when a toot is CWed 2019-11-27 15:16:46 +00:00			`return None`