Initial commit.
This commit is contained in:
200
mail/spamassassin/pyzor-0.7.0/tests/unit/test_digest.py
Normal file
200
mail/spamassassin/pyzor-0.7.0/tests/unit/test_digest.py
Normal file
@@ -0,0 +1,200 @@
|
||||
"""The the pyzor.digest module
|
||||
"""
|
||||
|
||||
import sys
|
||||
import hashlib
|
||||
import unittest
|
||||
|
||||
from pyzor.digest import *
|
||||
|
||||
HTML_TEXT = """<html><head><title>Email spam</title></head><body>
|
||||
<p><b>Email spam</b>, also known as <b>junk email</b>
|
||||
or <b>unsolicited bulk email</b> (<i>UBE</i>), is a subset of
|
||||
<a href="/wiki/Spam_(electronic)" title="Spam (electronic)">electronic spam</a>
|
||||
involving nearly identical messages sent to numerous recipients by <a href="/wiki/Email" title="Email">
|
||||
email</a>. Clicking on <a href="/wiki/Html_email#Security_vulnerabilities" title="Html email" class="mw-redirect">
|
||||
links in spam email</a> may send users to <a href="/wiki/Phishing" title="Phishing">phishing</a>
|
||||
web sites or sites that are hosting <a href="/wiki/Malware" title="Malware">malware</a>.</body></html>"""
|
||||
|
||||
HTML_TEXT_STRIPED = 'Email spam Email spam , also known as junk email or unsolicited bulk email ( UBE ),'\
|
||||
' is a subset of electronic spam involving nearly identical messages sent to numerous recipients by email'\
|
||||
' . Clicking on links in spam email may send users to phishing web sites or sites that are hosting malware .'
|
||||
|
||||
class HTMLStripperTests(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
unittest.TestCase.setUp(self)
|
||||
self.data = []
|
||||
|
||||
def tearDown(self):
|
||||
unittest.TestCase.tearDown(self)
|
||||
|
||||
def test_HTMLStripper(self):
|
||||
stripper = HTMLStripper(self.data)
|
||||
stripper.feed(HTML_TEXT)
|
||||
res = " ".join(self.data)
|
||||
self.assertEqual(res, HTML_TEXT_STRIPED)
|
||||
|
||||
|
||||
class PreDigestTests(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
unittest.TestCase.setUp(self)
|
||||
self.lines = []
|
||||
|
||||
def mock_digest_paylods(c, message):
|
||||
yield message.decode("utf8")
|
||||
|
||||
def mock_handle_line(s, line):
|
||||
self.lines.append(line.decode("utf8"))
|
||||
|
||||
self.real_digest_payloads = DataDigester.digest_payloads
|
||||
self.real_handle_line = DataDigester.handle_line
|
||||
DataDigester.digest_payloads = mock_digest_paylods
|
||||
DataDigester.handle_line = mock_handle_line
|
||||
|
||||
def tearDown(self):
|
||||
unittest.TestCase.tearDown(self)
|
||||
DataDigester.digest_payloads = self.real_digest_payloads
|
||||
DataDigester.handle_line = self.real_handle_line
|
||||
|
||||
|
||||
def test_predigest_emails(self):
|
||||
"""Test email removal in the predigest process"""
|
||||
real_longstr = DataDigester.longstr_ptrn
|
||||
DataDigester.longstr_ptrn = re.compile(r'\S{100,}')
|
||||
emails = ["test@example.com",
|
||||
"test123@example.com",
|
||||
"test+abc@example.com",
|
||||
"test.test2@example.com",
|
||||
"test.test2+abc@example.com", ]
|
||||
message = "Test %s Test2"
|
||||
expected = "TestTest2"
|
||||
try:
|
||||
for email in emails:
|
||||
self.lines = []
|
||||
DataDigester((message % email).encode("utf8"))
|
||||
self.assertEqual(self.lines[0], expected)
|
||||
finally:
|
||||
DataDigester.longstr_ptrn = real_longstr
|
||||
|
||||
# XXX This fails
|
||||
# def test_predigest_emails_whitespace(self):
|
||||
# real_longstr = DataDigester.longstr_ptrn
|
||||
# DataDigester.longstr_ptrn = re.compile(r'\S{100,}')
|
||||
# emails = ["chirila@example. com",
|
||||
# "chirila@example . com",
|
||||
# "chirila @example. com",
|
||||
# "chirila@ example. com",
|
||||
# "chirila @example . com",
|
||||
# "chirila @ example. com",
|
||||
# "chirila @ example . com",]
|
||||
# message = "Test %s Test2"
|
||||
# expected = "TestTest2"
|
||||
# try:
|
||||
# for email in emails:
|
||||
# self.lines = []
|
||||
# DataDigester(message % email)
|
||||
# self.assertEqual(self.lines[0], expected)
|
||||
# finally:
|
||||
# DataDigester.longstr_ptrn = real_longstr
|
||||
|
||||
|
||||
def test_predigest_urls(self):
|
||||
"""Test url removal in the predigest process"""
|
||||
real_longstr = DataDigester.longstr_ptrn
|
||||
DataDigester.longstr_ptrn = re.compile(r'\S{100,}')
|
||||
urls = ["http://www.example.com",
|
||||
# "www.example.com", # XXX This also fail
|
||||
"http://example.com",
|
||||
# "example.com", # XXX This also fails
|
||||
"http://www.example.com/test/"
|
||||
"http://www.example.com/test/test2", ]
|
||||
message = "Test %s Test2"
|
||||
expected = "TestTest2"
|
||||
try:
|
||||
for url in urls:
|
||||
self.lines = []
|
||||
DataDigester((message % url).encode("utf8"))
|
||||
self.assertEqual(self.lines[0], expected)
|
||||
finally:
|
||||
DataDigester.longstr_ptrn = real_longstr
|
||||
|
||||
def test_predigest_long(self):
|
||||
"""Test long "words" removal in the predigest process"""
|
||||
strings = ["0A2D3f%a#S",
|
||||
"3sddkf9jdkd9",
|
||||
"@@#@@@@@@@@@"]
|
||||
message = "Test %s Test2"
|
||||
expected = "TestTest2"
|
||||
for string in strings:
|
||||
self.lines = []
|
||||
DataDigester((message % string).encode("utf8"))
|
||||
self.assertEqual(self.lines[0], expected)
|
||||
|
||||
def test_predigest_min_line_lenght(self):
|
||||
"""Test small lines removal in the predigest process"""
|
||||
message = "This line is included\n"\
|
||||
"not this\n"\
|
||||
"This also"
|
||||
expected = ["Thislineisincluded", "Thisalso"]
|
||||
DataDigester(message.encode("utf8"))
|
||||
self.assertEqual(self.lines, expected)
|
||||
|
||||
def test_predigest_atomic(self):
|
||||
"""Test atomic messages (lines <= 4) in the predigest process"""
|
||||
message = "All this message\nShould be included\nIn the predigest"
|
||||
expected = ["Allthismessage", "Shouldbeincluded", "Inthepredigest"]
|
||||
DataDigester(message.encode("utf8"))
|
||||
self.assertEqual(self.lines, expected)
|
||||
|
||||
def test_predigest_pieced(self):
|
||||
"""Test pieced messages (lines > 4) in the predigest process"""
|
||||
message = ""
|
||||
for i in range(100):
|
||||
message += "Line%d test test test\n" % i
|
||||
expected = []
|
||||
for i in [20, 21, 22, 60, 61, 62]:
|
||||
expected.append("Line%dtesttesttest" % i)
|
||||
DataDigester(message.encode("utf8"))
|
||||
self.assertEqual(self.lines, expected)
|
||||
|
||||
class DigestTests(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
unittest.TestCase.setUp(self)
|
||||
self.lines = []
|
||||
|
||||
def mock_digest_paylods(c, message):
|
||||
yield message.decode("utf8")
|
||||
|
||||
self.real_digest_payloads = DataDigester.digest_payloads
|
||||
DataDigester.digest_payloads = mock_digest_paylods
|
||||
|
||||
def tearDown(self):
|
||||
unittest.TestCase.tearDown(self)
|
||||
DataDigester.digest_payloads = self.real_digest_payloads
|
||||
|
||||
def test_digest(self):
|
||||
message = b"That's some good ham right there"
|
||||
predigested = b"That'ssomegoodhamrightthere"
|
||||
|
||||
digest = hashlib.sha1()
|
||||
digest.update(predigested)
|
||||
|
||||
expected = digest.hexdigest()
|
||||
result = DataDigester(message).value
|
||||
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def suite():
|
||||
"""Gather all the tests from this module in a test suite."""
|
||||
test_suite = unittest.TestSuite()
|
||||
test_suite.addTest(unittest.makeSuite(HTMLStripperTests))
|
||||
test_suite.addTest(unittest.makeSuite(PreDigestTests))
|
||||
test_suite.addTest(unittest.makeSuite(DigestTests))
|
||||
return test_suite
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user