use lxml for parsing fedi Notes. html.parse is bad and unpredictable

2019-10-31 15:17:39 +00:00 · 2019-10-31 15:17:39 +00:00 · 5d08a496a4
commit 5d08a496a4
parent 8188aeb9b8
1 changed files with 1 additions and 1 deletions
--- a/modules/fediverse/ap_utils.py
+++ b/modules/fediverse/ap_utils.py
@ -61,7 +61,7 @@ def find_actor(username, instance):
 KNOWN_TAGS = ["p", "br"]

 def _normalise_note(content):
-    soup = bs4.BeautifulSoup(content, "html.parser")
+    soup = bs4.BeautifulSoup(content, "lxml").body
    lines = []
    for element in soup.find_all():
        if element.text.strip() == "":