bitbot-3.11-fork/modules/fediverse/ap_utils.py

import urllib.parse
import bs4
from src import IRCBot, utils
from . import ap_actor

LD_TYPE = ("application/ld+json; "
    "profile=\"https://www.w3.org/ns/activitystreams\"")
JRD_TYPE = "application/jrd+json"
ACTIVITY_TYPE = "application/activity+json"
USERAGENT = "BitBot (%s) Fediverse" % IRCBot.VERSION

def split_username(s):
    if s[0] == "@":
        s = s[1:]
    username, _, instance = s.partition("@")
    if username and instance:
        return username, instance
    return None, None

def activity_request(url, data=None, method="GET", type=ACTIVITY_TYPE,
        headers={}):
    content_type = None

    if method == "POST":
        content_type = type
    else:
        headers = {"Accept": type}

    request = utils.http.Request(url, headers=headers, useragent=USERAGENT,
        content_type=content_type, post_data=data, method=method, json=True,
        json_body=True, fallback_encoding="utf8")
    return utils.http.request(request)

HOSTMETA_TEMPLATE = "https://%s/.well-known/host-meta"
WEBFINGER_TEMPLATE = "https://%s/.well-known/webfinger?resource={uri}"

def find_actor(username, instance):
    hostmeta = HOSTMETA_TEMPLATE % instance
    hostmeta_request = utils.http.Request(HOSTMETA_TEMPLATE % instance,
        useragent=USERAGENT, parse=True, check_content_type=False)
    hostmeta = utils.http.request(hostmeta_request)

    webfinger_url = None
    for item in hostmeta.data.find_all("link"):
        if item["rel"] and item["rel"][0] == "lrdd":
            webfinger_url = item["template"]
            break

    if not webfinger_url:
        webfinger_url = WEBFINGER_TEMPLATE % instance
    webfinger_url = webfinger_url.replace("{uri}",
        "acct:%s@%s" % (username, instance), 1)

    webfinger = activity_request(webfinger_url, type=JRD_TYPE)

    actor_url = None
    for link in webfinger.data["links"]:
        if link["type"] == ACTIVITY_TYPE:
            return link["href"]

KNOWN_TAGS = ["p", "br"]

def _normalise_note(content):
    soup = bs4.BeautifulSoup(content, "lxml").body
    lines = []
    for element in soup.find_all():
        if element.text.strip() == "":
            element.decompose()
        elif not element.name in KNOWN_TAGS:
            element.unwrap()
    for element in soup.children:
        out = ""
        if type(element) == bs4.element.Tag:
            if element.name == "p":
                for subitem in element.children:
                    if type(subitem) == bs4.element.Tag:
                        if subitem.name == "br":
                            lines.append(out)
                            out = ""
                    else:
                        out += subitem
        else:
            out += element

        lines.append(out.replace("  ", " "))
    return "  ".join(lines)

def format_note(actor, note, type="Create"):
    if type == "Announce":
        retoot_url = note
        retoot_instance = urllib.parse.urlparse(retoot_url).hostname
        retoot = activity_request(retoot_url)
        retoot_url = retoot.data.get("url", retoot.data["id"])

        original_tooter = ap_actor.Actor(retoot.data["attributedTo"])
        original_tooter.load()
        retooted_user = "@%s@%s" % (original_tooter.username, retoot_instance)
        retoot_content = _normalise_note(retoot.data["content"])

        return (retoot.data.get("summary", None),  "%s (boost %s): %s" % (
            actor.username, retooted_user, retoot_content), retoot_url)

    elif type == "Create":
        content = _normalise_note(note["content"])
        url = note.get("url", note["id"])

        return (note.get("summary", None),
            "%s: %s" % (actor.username, content), url)

    return None, None, None