Keep a track of different scripts in a message, round score to 2 decimal places
(mixed_unicode.py)
This commit is contained in:
parent
c59a5600a8
commit
3dccc9f4e0
1 changed files with 4 additions and 1 deletions
|
@ -49,12 +49,14 @@ class Module(ModuleManager.BaseModule):
|
||||||
last_script = None
|
last_script = None
|
||||||
last_was_separator = False
|
last_was_separator = False
|
||||||
score = 0
|
score = 0
|
||||||
|
scripts = set([])
|
||||||
|
|
||||||
for char in event["message"]:
|
for char in event["message"]:
|
||||||
if char in WORD_SEPERATORS:
|
if char in WORD_SEPERATORS:
|
||||||
last_was_separator = True
|
last_was_separator = True
|
||||||
else:
|
else:
|
||||||
script = self._detect_script(char)
|
script = self._detect_script(char)
|
||||||
|
scripts.add(script)
|
||||||
if not script == Script.Unknown:
|
if not script == Script.Unknown:
|
||||||
if last_script and not script == last_script:
|
if last_script and not script == last_script:
|
||||||
score += 1
|
score += 1
|
||||||
|
@ -66,5 +68,6 @@ class Module(ModuleManager.BaseModule):
|
||||||
last_was_separator = False
|
last_was_separator = False
|
||||||
|
|
||||||
score = score/(len(event["message"])/SCORE_LENGTH)
|
score = score/(len(event["message"])/SCORE_LENGTH)
|
||||||
|
score = round(score, 2)
|
||||||
if score > 0:
|
if score > 0:
|
||||||
self.log.trace("Message given a mixed-unicode score of %d", [score])
|
self.log.trace("Message given a mixed-unicode score of %f", [score])
|
||||||
|
|
Loading…
Add table
Reference in a new issue