better line normalisation for fediverse Activities

This commit is contained in:
jesopo 2019-11-04 11:18:34 +00:00
parent bcdffacab5
commit 62781badc4

View file

@ -60,30 +60,32 @@ def find_actor(username, instance):
KNOWN_TAGS = ["p", "br"] KNOWN_TAGS = ["p", "br"]
def _line(item):
if type(item) == bs4.element.Tag:
if item.name == "p":
out = ""
for subitem in item.children:
out += _line(subitem)
return "\n%s\n" % out
elif item.name == "br":
return "\n"
else:
return str(item)
def _normalise_note(content): def _normalise_note(content):
soup = bs4.BeautifulSoup(content, "lxml").body soup = bs4.BeautifulSoup(content, "lxml").body
lines = [] lines = []
for element in soup.find_all(): for element in soup.find_all():
if element.text.strip() == "": if not element.name in KNOWN_TAGS:
element.decompose()
elif not element.name in KNOWN_TAGS:
element.unwrap() element.unwrap()
for element in soup.children: elif element.text.strip() == "":
out = "" element.decompose()
if type(element) == bs4.element.Tag:
if element.name == "p":
for subitem in element.children:
if type(subitem) == bs4.element.Tag:
if subitem.name == "br":
lines.append(out)
out = ""
else:
out += subitem
else:
out += element
lines.append(out.replace(" ", " ")) out = ""
return " ".join(lines) for element in soup.children:
out += _line(element)
return utils.parse.line_normalise(out)
def format_note(actor, note, type="Create"): def format_note(actor, note, type="Create"):
if type == "Announce": if type == "Announce":