2018-11-20 11:38:30 +00:00
|
|
|
import enum
|
|
|
|
from src import ModuleManager, utils
|
|
|
|
|
|
|
|
class Script(enum.Enum):
|
|
|
|
Unknown = 0
|
|
|
|
Latin = 1
|
|
|
|
Cyrillic = 2
|
|
|
|
Greek = 3
|
2018-11-20 11:44:13 +00:00
|
|
|
Armenian = 4
|
2018-11-20 11:38:30 +00:00
|
|
|
WORD_SEPERATORS = [",", " ", "\t", "."]
|
|
|
|
|
|
|
|
class Module(ModuleManager.BaseModule):
|
|
|
|
def _detect_script(self, char):
|
|
|
|
point = ord(char)
|
|
|
|
if 0 <= point <= 687:
|
|
|
|
return Script.Latin
|
|
|
|
elif 880 <= point <= 1023:
|
|
|
|
return Script.Greek
|
|
|
|
elif 1024 <= point <= 1327:
|
|
|
|
return Script.Cyrillic
|
2018-11-20 11:44:13 +00:00
|
|
|
elif 1329 <= point <= 1418:
|
|
|
|
return Script.Armenian
|
2018-11-20 11:38:30 +00:00
|
|
|
return Script.Unknown
|
|
|
|
|
|
|
|
@utils.hook("received.message.channel")
|
|
|
|
def channel_message(self, event):
|
|
|
|
last_script = None
|
|
|
|
last_was_separator = False
|
|
|
|
score = 0
|
|
|
|
|
|
|
|
for char in event["message"]:
|
|
|
|
if char in WORD_SEPERATORS:
|
|
|
|
last_was_separator = True
|
|
|
|
else:
|
|
|
|
script = self._detect_script(char)
|
|
|
|
if not script == Script.Unknown:
|
|
|
|
if last_script and not script == last_script:
|
|
|
|
score += 1
|
|
|
|
if not last_was_separator:
|
|
|
|
score += 1
|
|
|
|
|
|
|
|
last_script = script
|
|
|
|
|
|
|
|
last_was_separator = False
|
|
|
|
self.log.trace("Message given a mixed-unicode score of %d", [score])
|