use lxml for parsing fedi Notes. html.parse is bad and unpredictable
This commit is contained in:
parent
8188aeb9b8
commit
5d08a496a4
1 changed files with 1 additions and 1 deletions
|
@ -61,7 +61,7 @@ def find_actor(username, instance):
|
||||||
KNOWN_TAGS = ["p", "br"]
|
KNOWN_TAGS = ["p", "br"]
|
||||||
|
|
||||||
def _normalise_note(content):
|
def _normalise_note(content):
|
||||||
soup = bs4.BeautifulSoup(content, "html.parser")
|
soup = bs4.BeautifulSoup(content, "lxml").body
|
||||||
lines = []
|
lines = []
|
||||||
for element in soup.find_all():
|
for element in soup.find_all():
|
||||||
if element.text.strip() == "":
|
if element.text.strip() == "":
|
||||||
|
|
Loading…
Reference in a new issue