actually explicitly strip "unknown" html tags from fedi notes
This commit is contained in:
parent
a75386df32
commit
af1fd37003
1 changed files with 8 additions and 5 deletions
|
@ -57,14 +57,19 @@ def find_actor(username, instance):
|
||||||
if link["type"] == ACTIVITY_TYPE:
|
if link["type"] == ACTIVITY_TYPE:
|
||||||
return link["href"]
|
return link["href"]
|
||||||
|
|
||||||
|
KNOWN_TAGS = ["p", "br"]
|
||||||
|
|
||||||
def _normalise_note(content):
|
def _normalise_note(content):
|
||||||
soup = bs4.BeautifulSoup(content, "html.parser")
|
soup = bs4.BeautifulSoup(content, "html.parser")
|
||||||
lines = []
|
lines = []
|
||||||
for element in soup.find_all():
|
for element in soup.find_all():
|
||||||
out = ""
|
|
||||||
if element.text.strip() == "":
|
if element.text.strip() == "":
|
||||||
continue
|
element.decompose()
|
||||||
elif element.name == "p":
|
elif not element.name in KNOWN_TAGS:
|
||||||
|
element.unwrap()
|
||||||
|
for element in soup.find_all():
|
||||||
|
out = ""
|
||||||
|
if element.name == "p":
|
||||||
for subitem in element.contents:
|
for subitem in element.contents:
|
||||||
if type(subitem) == bs4.element.Tag:
|
if type(subitem) == bs4.element.Tag:
|
||||||
if subitem.name == "br":
|
if subitem.name == "br":
|
||||||
|
@ -72,8 +77,6 @@ def _normalise_note(content):
|
||||||
out = ""
|
out = ""
|
||||||
else:
|
else:
|
||||||
out += subitem
|
out += subitem
|
||||||
else:
|
|
||||||
continue
|
|
||||||
|
|
||||||
lines.append(out.replace(" ", " "))
|
lines.append(out.replace(" ", " "))
|
||||||
return " ".join(lines)
|
return " ".join(lines)
|
||||||
|
|
Loading…
Reference in a new issue