From 5d08a496a45040ffed968e2484090ba807a566c6 Mon Sep 17 00:00:00 2001
From: jesopo <github@lolnerd.net>
Date: Thu, 31 Oct 2019 15:17:39 +0000
Subject: [PATCH] use lxml for parsing fedi Notes. html.parse is bad and
 unpredictable

---
 modules/fediverse/ap_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/fediverse/ap_utils.py b/modules/fediverse/ap_utils.py
index 934959e7..8d8dfa6e 100644
--- a/modules/fediverse/ap_utils.py
+++ b/modules/fediverse/ap_utils.py
@@ -61,7 +61,7 @@ def find_actor(username, instance):
 KNOWN_TAGS = ["p", "br"]
 
 def _normalise_note(content):
-    soup = bs4.BeautifulSoup(content, "html.parser")
+    soup = bs4.BeautifulSoup(content, "lxml").body
     lines = []
     for element in soup.find_all():
         if element.text.strip() == "":