use lxml for parsing fedi Notes. html.parse is bad and unpredictable
This commit is contained in:
parent
8188aeb9b8
commit
5d08a496a4
1 changed files with 1 additions and 1 deletions
|
@ -61,7 +61,7 @@ def find_actor(username, instance):
|
|||
KNOWN_TAGS = ["p", "br"]
|
||||
|
||||
def _normalise_note(content):
|
||||
soup = bs4.BeautifulSoup(content, "html.parser")
|
||||
soup = bs4.BeautifulSoup(content, "lxml").body
|
||||
lines = []
|
||||
for element in soup.find_all():
|
||||
if element.text.strip() == "":
|
||||
|
|
Loading…
Reference in a new issue