From 0331b763ffa845f3cb7b9651ca70ab8b407eb67e Mon Sep 17 00:00:00 2001 From: jesopo Date: Thu, 10 Oct 2019 10:32:47 +0100 Subject: [PATCH] refactor multi-line-to-line normalisation to utils.parse.line_normalise(), use it in rss.py closes #174 --- modules/rss.py | 3 ++- modules/tweets/format.py | 6 +----- src/utils/parse.py | 5 +++++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/modules/rss.py b/modules/rss.py index 159bdbf6..e18ef18b 100644 --- a/modules/rss.py +++ b/modules/rss.py @@ -18,7 +18,8 @@ class Module(ModuleManager.BaseModule): self.bot.get_setting("rss-interval", RSS_INTERVAL)) def _format_entry(self, server, feed_title, entry, shorten): - title = utils.http.strip_html(entry["title"]) + title = utils.parse.line_normalise(utils.http.strip_html( + entry["title"])) author = entry.get("author", None) author = " by %s" % author if author else "" diff --git a/modules/tweets/format.py b/modules/tweets/format.py index 8b84cba3..4d64dffd 100644 --- a/modules/tweets/format.py +++ b/modules/tweets/format.py @@ -7,11 +7,7 @@ def _timestamp(dt): return "%s %s ago" % (since, unit) def _normalise(tweet): - while " " in tweet: - tweet = tweet.replace(" ", " ") - lines = [line.strip() for line in tweet.split("\n")] - lines = list(filter(None, lines)) - return html.unescape(" ".join(lines)) + return html.unescape(utils.parse.line_normalise(tweet)) def _tweet(exports, server, tweet, from_url): linked_id = tweet.id diff --git a/src/utils/parse.py b/src/utils/parse.py index 65d0552b..b53b9595 100644 --- a/src/utils/parse.py +++ b/src/utils/parse.py @@ -1,4 +1,5 @@ import io, typing +from src import utils COMMENT_TYPES = ["#", "//"] def hashflags(filename: str @@ -79,3 +80,7 @@ def try_int(s: str) -> typing.Optional[int]: return int(s) except ValueError: return None + +def line_normalise(s: str) -> str: + lines = list(filter(None, [line.strip() for line in s.split("\n")])) + return " ".join(line.replace(" ", " ") for line in lines)