2019-07-15 16:45:48 +00:00
|
|
|
#--depends-on config
|
|
|
|
#--depends-on shorturl
|
|
|
|
|
2019-08-12 15:07:07 +00:00
|
|
|
import difflib, hashlib, time
|
2019-06-23 15:03:15 +00:00
|
|
|
from src import ModuleManager, utils
|
|
|
|
import feedparser
|
|
|
|
|
|
|
|
RSS_INTERVAL = 60 # 1 minute
|
|
|
|
|
2020-04-09 14:47:20 +00:00
|
|
|
SETTING_BIND = utils.Setting("rss-bindhost",
|
|
|
|
"Which local address to bind to for RSS requests", example="127.0.0.1")
|
2019-07-04 10:15:01 +00:00
|
|
|
@utils.export("botset", utils.IntSetting("rss-interval",
|
|
|
|
"Interval (in seconds) between RSS polls", example="120"))
|
2019-07-15 16:46:11 +00:00
|
|
|
@utils.export("channelset", utils.BoolSetting("rss-shorten",
|
|
|
|
"Whether or not to shorten RSS urls"))
|
2020-11-28 22:45:26 +00:00
|
|
|
@utils.export("channelset", utils.Setting("rss-format", "Format of RSS announcements", example="$longtitle: $title - $link [$author]"))
|
2020-04-09 14:47:20 +00:00
|
|
|
@utils.export("serverset", SETTING_BIND)
|
|
|
|
@utils.export("channelset", SETTING_BIND)
|
2019-06-23 15:03:15 +00:00
|
|
|
class Module(ModuleManager.BaseModule):
|
2019-06-24 19:23:36 +00:00
|
|
|
_name = "RSS"
|
2019-06-23 15:03:15 +00:00
|
|
|
def on_load(self):
|
2019-10-08 12:49:43 +00:00
|
|
|
self.timers.add("rss-feeds", self._timer,
|
|
|
|
self.bot.get_setting("rss-interval", RSS_INTERVAL))
|
2019-06-23 15:03:15 +00:00
|
|
|
|
2020-11-28 22:45:26 +00:00
|
|
|
def _format_entry(self, server, channel, feed_title, entry, shorten):
|
2019-10-10 09:32:47 +00:00
|
|
|
title = utils.parse.line_normalise(utils.http.strip_html(
|
|
|
|
entry["title"]))
|
2019-07-15 16:46:11 +00:00
|
|
|
|
2020-11-28 22:45:26 +00:00
|
|
|
author = entry.get("author", "unknown author")
|
|
|
|
author = "%s" % author if author else ""
|
2019-07-15 16:46:11 +00:00
|
|
|
|
|
|
|
link = entry.get("link", None)
|
|
|
|
if shorten:
|
2019-11-14 11:54:10 +00:00
|
|
|
try:
|
2020-03-08 14:14:00 +00:00
|
|
|
link = self.exports.get("shorturl")(server, link)
|
2019-11-14 11:54:10 +00:00
|
|
|
except:
|
|
|
|
pass
|
2020-11-28 22:45:26 +00:00
|
|
|
link = "%s" % link if link else ""
|
2019-07-15 16:46:11 +00:00
|
|
|
|
2020-11-28 22:45:26 +00:00
|
|
|
feed_title_str = "%s" % feed_title if feed_title else ""
|
|
|
|
# just in case the format starts keyerroring and you're not sure why
|
|
|
|
self.log.trace("RSS Entry: " + str(entry))
|
|
|
|
try:
|
|
|
|
format = channel.get_setting("rss-format", "$longtitle: $title by $author - $link").replace("$longtitle", feed_title_str).replace("$title", title).replace("$link", link).replace("$author", author).format(**entry)
|
|
|
|
except KeyError:
|
|
|
|
self.log.warn(f"Failed to format RSS entry for {channel}. Falling back to default format.")
|
|
|
|
format = f"{feed_title_str}: {title} by {author} - {link}"
|
2019-07-15 16:46:11 +00:00
|
|
|
|
2020-11-28 22:45:26 +00:00
|
|
|
return format
|
2019-07-15 16:46:11 +00:00
|
|
|
|
2019-10-08 12:49:43 +00:00
|
|
|
def _timer(self, timer):
|
2019-07-08 12:46:12 +00:00
|
|
|
start_time = time.monotonic()
|
|
|
|
self.log.trace("Polling RSS feeds")
|
|
|
|
|
2019-10-08 12:49:43 +00:00
|
|
|
timer.redo()
|
2019-06-23 15:03:15 +00:00
|
|
|
hook_settings = self.bot.database.channel_settings.find_by_setting(
|
|
|
|
"rss-hooks")
|
|
|
|
hooks = {}
|
|
|
|
for server_id, channel_name, urls in hook_settings:
|
|
|
|
server = self.bot.get_server_by_id(server_id)
|
|
|
|
if server and channel_name in server.channels:
|
|
|
|
channel = server.channels.get(channel_name)
|
|
|
|
for url in urls:
|
2020-04-09 17:26:54 +00:00
|
|
|
bindhost = channel.get_setting("rss-bindhost",
|
|
|
|
server.get_setting("rss-bindhost", None))
|
|
|
|
|
|
|
|
if url.startswith("www."):
|
|
|
|
url = url.replace("www.", "", 1)
|
|
|
|
|
|
|
|
key = (url, bindhost)
|
|
|
|
if not key in hooks:
|
|
|
|
hooks[key] = []
|
|
|
|
|
|
|
|
hooks[key].append((server, channel))
|
2019-06-23 15:03:15 +00:00
|
|
|
|
2019-07-16 09:45:29 +00:00
|
|
|
if not hooks:
|
|
|
|
return
|
|
|
|
|
2019-09-19 14:04:50 +00:00
|
|
|
requests = []
|
2020-04-09 17:26:54 +00:00
|
|
|
for url, bindhost in hooks.keys():
|
|
|
|
requests.append(utils.http.Request(url, id=f"{url} {bindhost}",
|
|
|
|
bindhost=bindhost))
|
2019-09-19 14:04:50 +00:00
|
|
|
|
|
|
|
pages = utils.http.request_many(requests)
|
2019-06-23 15:03:15 +00:00
|
|
|
|
2020-04-09 17:26:54 +00:00
|
|
|
for (url, bindhost), channels in hooks.items():
|
|
|
|
key = f"{url} {bindhost}"
|
|
|
|
if not key in pages:
|
2019-07-08 12:25:25 +00:00
|
|
|
# async url get failed
|
|
|
|
continue
|
|
|
|
|
2020-01-31 11:47:00 +00:00
|
|
|
try:
|
2020-04-09 17:26:54 +00:00
|
|
|
data = pages[key].decode()
|
2020-01-31 11:47:00 +00:00
|
|
|
except Exception as e:
|
|
|
|
self.log.error("Failed to decode rss URL %s", [url],
|
|
|
|
exc_info=True)
|
|
|
|
continue
|
|
|
|
|
|
|
|
feed = feedparser.parse(data)
|
2019-07-07 09:28:20 +00:00
|
|
|
feed_title = feed["feed"].get("title", None)
|
2019-08-12 14:08:36 +00:00
|
|
|
max_ids = len(feed["entries"])*10
|
2019-06-23 15:03:15 +00:00
|
|
|
|
2019-07-01 20:15:06 +00:00
|
|
|
for server, channel in channels:
|
2019-06-24 19:34:22 +00:00
|
|
|
seen_ids = channel.get_setting("rss-seen-ids-%s" % url, [])
|
2019-06-23 15:22:45 +00:00
|
|
|
valid = 0
|
|
|
|
for entry in feed["entries"][::-1]:
|
2019-08-12 14:16:46 +00:00
|
|
|
entry_id, entry_id_hash = self._get_id(entry)
|
|
|
|
if entry_id_hash in seen_ids or entry_id in seen_ids:
|
2019-06-23 15:03:15 +00:00
|
|
|
continue
|
|
|
|
|
2019-06-23 15:22:45 +00:00
|
|
|
if valid == 3:
|
2019-06-23 15:03:15 +00:00
|
|
|
continue
|
|
|
|
valid += 1
|
|
|
|
|
2019-07-15 16:46:11 +00:00
|
|
|
shorten = channel.get_setting("rss-shorten", False)
|
2020-11-28 22:45:26 +00:00
|
|
|
output = self._format_entry(server, channel, feed_title, entry,
|
2019-07-15 16:46:11 +00:00
|
|
|
shorten)
|
2019-06-23 15:03:15 +00:00
|
|
|
|
|
|
|
self.events.on("send.stdout").call(target=channel,
|
|
|
|
module_name="RSS", server=server, message=output)
|
2019-08-12 14:16:46 +00:00
|
|
|
seen_ids.append(entry_id_hash)
|
2019-06-23 15:03:15 +00:00
|
|
|
|
2019-08-12 14:08:36 +00:00
|
|
|
if len(seen_ids) > max_ids:
|
|
|
|
seen_ids = seen_ids[len(seen_ids)-max_ids:]
|
|
|
|
channel.set_setting("rss-seen-ids-%s" % url, seen_ids)
|
2019-06-23 15:03:15 +00:00
|
|
|
|
2019-07-08 12:46:12 +00:00
|
|
|
total_milliseconds = (time.monotonic() - start_time) * 1000
|
|
|
|
self.log.trace("Polled RSS feeds in %fms", [total_milliseconds])
|
|
|
|
|
2019-08-09 15:48:41 +00:00
|
|
|
def _get_id(self, entry):
|
2019-08-12 14:16:46 +00:00
|
|
|
entry_id = entry.get("id", entry["link"])
|
|
|
|
entry_id_hash = hashlib.sha1(entry_id.encode("utf8")).hexdigest()
|
|
|
|
return entry_id, "sha1:%s" % entry_id_hash
|
2019-08-09 15:48:41 +00:00
|
|
|
|
|
|
|
def _get_entries(self, url, max: int=None):
|
2019-06-24 05:48:48 +00:00
|
|
|
try:
|
2019-11-26 13:41:40 +00:00
|
|
|
feed = feedparser.parse(utils.http.request(url).data)
|
2019-07-04 10:21:53 +00:00
|
|
|
except Exception as e:
|
|
|
|
self.log.warn("failed to parse RSS %s", [url], exc_info=True)
|
2019-06-24 05:48:48 +00:00
|
|
|
feed = None
|
|
|
|
if not feed or not feed["feed"]:
|
2019-08-09 15:48:41 +00:00
|
|
|
return None, None
|
2019-07-08 14:56:19 +00:00
|
|
|
|
|
|
|
entry_ids = []
|
|
|
|
for entry in feed["entries"]:
|
|
|
|
entry_ids.append(entry.get("id", entry["link"]))
|
2019-08-09 15:48:41 +00:00
|
|
|
return feed["feed"].get("title", None), feed["entries"][:max]
|
2019-06-24 05:48:48 +00:00
|
|
|
|
2019-06-23 15:03:15 +00:00
|
|
|
@utils.hook("received.command.rss", min_args=1, channel_only=True)
|
|
|
|
def rss(self, event):
|
|
|
|
"""
|
|
|
|
:help: Modify RSS/Atom configuration for the current channel
|
|
|
|
:usage: list
|
|
|
|
:usage: add <url>
|
|
|
|
:usage: remove <url>
|
|
|
|
:permission: rss
|
|
|
|
"""
|
|
|
|
changed = False
|
|
|
|
message = None
|
|
|
|
|
|
|
|
rss_hooks = event["target"].get_setting("rss-hooks", [])
|
|
|
|
|
|
|
|
subcommand = event["args_split"][0].lower()
|
|
|
|
if subcommand == "list":
|
|
|
|
event["stdout"].write("RSS hooks: %s" % ", ".join(rss_hooks))
|
|
|
|
elif subcommand == "add":
|
|
|
|
if not len(event["args_split"]) > 1:
|
|
|
|
raise utils.EventError("Please provide a URL")
|
|
|
|
|
2019-07-08 11:54:06 +00:00
|
|
|
url = utils.http.url_sanitise(event["args_split"][1])
|
2019-06-23 15:03:15 +00:00
|
|
|
if url in rss_hooks:
|
|
|
|
raise utils.EventError("That URL is already being watched")
|
2019-06-24 05:48:48 +00:00
|
|
|
|
2019-08-09 15:48:41 +00:00
|
|
|
title, entries = self._get_entries(url)
|
2019-08-09 22:22:46 +00:00
|
|
|
if entries == None:
|
2019-06-24 05:48:48 +00:00
|
|
|
raise utils.EventError("Failed to read feed")
|
2019-08-09 15:48:41 +00:00
|
|
|
|
2019-08-12 14:16:46 +00:00
|
|
|
seen_ids = [self._get_id(e)[1] for e in entries]
|
2019-06-24 19:34:22 +00:00
|
|
|
event["target"].set_setting("rss-seen-ids-%s" % url, seen_ids)
|
2019-06-24 05:48:48 +00:00
|
|
|
|
2019-06-23 15:03:15 +00:00
|
|
|
rss_hooks.append(url)
|
|
|
|
changed = True
|
|
|
|
message = "Added RSS feed"
|
|
|
|
elif subcommand == "remove":
|
|
|
|
if not len(event["args_split"]) > 1:
|
|
|
|
raise utils.EventError("Please provide a URL")
|
|
|
|
|
2019-07-08 11:54:06 +00:00
|
|
|
url = utils.http.url_sanitise(event["args_split"][1])
|
2019-06-23 15:03:15 +00:00
|
|
|
if not url in rss_hooks:
|
2019-08-12 15:07:07 +00:00
|
|
|
matches = difflib.get_close_matches(url, rss_hooks, cutoff=0.5)
|
|
|
|
if matches:
|
|
|
|
raise utils.EventError("Did you mean %s ?" % matches[0])
|
|
|
|
else:
|
|
|
|
raise utils.EventError("I'm not watching that URL")
|
2019-06-23 15:03:15 +00:00
|
|
|
rss_hooks.remove(url)
|
|
|
|
changed = True
|
|
|
|
message = "Removed RSS feed"
|
2019-08-09 15:48:41 +00:00
|
|
|
elif subcommand == "read":
|
2019-08-12 11:37:47 +00:00
|
|
|
url = None
|
2019-08-09 15:48:41 +00:00
|
|
|
if not len(event["args_split"]) > 1:
|
2019-08-12 11:37:47 +00:00
|
|
|
if len(rss_hooks) == 1:
|
|
|
|
url = rss_hooks[0]
|
|
|
|
else:
|
|
|
|
raise utils.EventError("Please provide a url")
|
|
|
|
else:
|
|
|
|
url = event["args_split"][1]
|
2019-08-09 15:48:41 +00:00
|
|
|
|
2019-08-12 11:37:47 +00:00
|
|
|
title, entries = self._get_entries(url)
|
2019-08-09 15:48:41 +00:00
|
|
|
if not entries:
|
|
|
|
raise utils.EventError("Failed to get RSS entries")
|
|
|
|
|
|
|
|
shorten = event["target"].get_setting("rss-shorten", False)
|
2020-11-28 22:45:26 +00:00
|
|
|
out = self._format_entry(event["server"], event["target"], title, entries[0],
|
2019-08-09 15:48:41 +00:00
|
|
|
shorten)
|
|
|
|
event["stdout"].write(out)
|
2019-06-23 15:03:15 +00:00
|
|
|
else:
|
|
|
|
raise utils.EventError("Unknown subcommand '%s'" % subcommand)
|
|
|
|
|
|
|
|
if changed:
|
2019-07-01 20:16:04 +00:00
|
|
|
if rss_hooks:
|
|
|
|
event["target"].set_setting("rss-hooks", rss_hooks)
|
|
|
|
else:
|
|
|
|
event["target"].del_setting("rss-hooks")
|
2019-06-23 15:03:15 +00:00
|
|
|
event["stdout"].write(message)
|