Skip to content

Commit 05c72a1

Browse files
committed
Updating newsgroups tests
1 parent 1174086 commit 05c72a1

File tree

3 files changed

+34
-15
lines changed

3 files changed

+34
-15
lines changed

jwzthreading/jwzthreading.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,6 @@ def __init__(self, msg=None):
162162
def __repr__(self):
163163
return '<%s: %r>' % (self.__class__.__name__, self.message_id)
164164

165-
166165
#
167166
# functions
168167
#

jwzthreading/tests/test_newsgroups.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,17 @@
1616
DATA_DIR = os.path.join(BASE_DIR, 'data/fedora-devel-mailman')
1717

1818
# Expected number of emails and threads
19-
N_JUNE2010_THREADS = 292
20-
N_JUNE2010_EMAILS = 292
19+
N_EMAILS_JUNE2010 = 292
20+
N_THREADS_JUNE2010 = 63
21+
2122

2223

2324
def test_parse_mailman_gzfiles():
2425
""" Test that we can parse mailman files """
2526
msglist = parse_mailman_gzfiles(os.path.join(DATA_DIR, '2010-January.txt.gz'),
2627
encoding='latin1', headersonly=True)
2728

28-
assert len(msglist) == N_JUNE2010_EMAILS
29+
assert len(msglist) == N_EMAILS_JUNE2010
2930

3031
def test_parse_mailman_htmlthread():
3132
""" Test that we can parse mailman html thread """
@@ -36,9 +37,12 @@ def test_parse_mailman_htmlthread():
3637
threads = parse_mailman_htmlthread(os.path.join(DATA_DIR,
3738
'2010-January_thread.html'))
3839

39-
assert sum([el.size for el in threads]) == N_JUNE2010_EMAILS
40-
40+
#assert len(threads) == N_THREADS_JUNE2010
4141
print('OK')
42+
#for el in threads:
43+
# print(' - ({})'.format(el.size), el.message.subject)
44+
assert sum([el.size for el in threads]) == N_EMAILS_JUNE2010
45+
4246
#for el in threads:
4347
# print_container(el)
4448

@@ -47,24 +51,36 @@ def test_parse_mailman_htmlthread():
4751

4852

4953
def test_fedora():
50-
""" Test threading on the fedora-devel mailing list data"""
51-
# 2010-January https://www.redhat.com/archives/fedora-devel-list/
52-
#import mailbox
54+
""" Test threading on the fedora-devel mailing list data
55+
from June 2010"""
56+
57+
try:
58+
import lxml
59+
except ImportError:
60+
raise SkipTest
5361

5462

5563
msglist = parse_mailman_gzfiles(os.path.join(DATA_DIR, '2010-January.txt.gz'),
5664
encoding='latin1', headersonly=True)
5765

58-
msglist_parsed = map(Message, msglist)
66+
assert len(msglist) == N_EMAILS_JUNE2010
5967

60-
subject_table = thread(msglist_parsed)
68+
69+
threads_ref = parse_mailman_htmlthread(os.path.join(DATA_DIR,
70+
'2010-January_thread.html'))
71+
72+
73+
subject_table = thread([Message(el) for el in msglist])
6174

6275
subjects = subject_table.items()
6376
subjects = sorted(subjects)
64-
for idx, (_, container) in enumerate(subjects):
77+
for _, container in enumerate(subjects):
6578
#print(idx)
6679
#print_container(container)
6780
pass
81+
assert sum([el.size for _, el in subjects]) == N_EMAILS_JUNE2010
82+
83+
#assert len(subjects) == N_THREADS_JUNE2010
6884

6985

7086

jwzthreading/utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def parse_mailman_htmlthread(filename):
7070
a thread list
7171
"""
7272
from lxml import etree
73-
from .jwzthreading import Container
73+
from .jwzthreading import Container, Message
7474
parser = etree.HTMLParser()
7575
with open(filename, 'rt') as fh:
7676
tree = etree.parse(fh, parser)
@@ -89,14 +89,18 @@ def create_thread(root, parent_container=None):
8989
if root.tag != 'li':
9090
raise ValueError('Element {} was not expected'.format(root))
9191

92+
if len(root.getchildren()) == 0:
93+
# this is a dummy element "<li>Possible follow-ups</li>"
94+
return None
95+
9296
container = Container()
9397
for child in root.getchildren():
9498
if child.tag == 'strong':
9599
# url with to the actual email
96100
a_el = child.getchildren()[0]
97-
container.message = DummyMessage()
101+
container.message = Message()
98102
container.message.subject = a_el.text
99-
container.message.id = int(a_el.get('name'))
103+
container.message.message_id = int(a_el.get('name'))
100104
elif child.tag == 'em':
101105
pass # email sender, ignore this line
102106
elif child.tag == 'ul':

0 commit comments

Comments
 (0)