From 5d08a496a45040ffed968e2484090ba807a566c6 Mon Sep 17 00:00:00 2001 From: jesopo Date: Thu, 31 Oct 2019 15:17:39 +0000 Subject: [PATCH] use lxml for parsing fedi Notes. html.parse is bad and unpredictable --- modules/fediverse/ap_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/fediverse/ap_utils.py b/modules/fediverse/ap_utils.py index 934959e7..8d8dfa6e 100644 --- a/modules/fediverse/ap_utils.py +++ b/modules/fediverse/ap_utils.py @@ -61,7 +61,7 @@ def find_actor(username, instance): KNOWN_TAGS = ["p", "br"] def _normalise_note(content): - soup = bs4.BeautifulSoup(content, "html.parser") + soup = bs4.BeautifulSoup(content, "lxml").body lines = [] for element in soup.find_all(): if element.text.strip() == "":