1616DATA_DIR = os .path .join (BASE_DIR , 'data/fedora-devel-mailman' )
1717
1818# Expected number of emails and threads
19- N_JUNE2010_THREADS = 292
20- N_JUNE2010_EMAILS = 292
19+ N_EMAILS_JUNE2010 = 292
20+ N_THREADS_JUNE2010 = 63
21+
2122
2223
2324def test_parse_mailman_gzfiles ():
2425 """ Test that we can parse mailman files """
2526 msglist = parse_mailman_gzfiles (os .path .join (DATA_DIR , '2010-January.txt.gz' ),
2627 encoding = 'latin1' , headersonly = True )
2728
28- assert len (msglist ) == N_JUNE2010_EMAILS
29+ assert len (msglist ) == N_EMAILS_JUNE2010
2930
3031def test_parse_mailman_htmlthread ():
3132 """ Test that we can parse mailman html thread """
@@ -36,9 +37,12 @@ def test_parse_mailman_htmlthread():
3637 threads = parse_mailman_htmlthread (os .path .join (DATA_DIR ,
3738 '2010-January_thread.html' ))
3839
39- assert sum ([el .size for el in threads ]) == N_JUNE2010_EMAILS
40-
40+ #assert len(threads) == N_THREADS_JUNE2010
4141 print ('OK' )
42+ #for el in threads:
43+ # print(' - ({})'.format(el.size), el.message.subject)
44+ assert sum ([el .size for el in threads ]) == N_EMAILS_JUNE2010
45+
4246 #for el in threads:
4347 # print_container(el)
4448
@@ -47,24 +51,36 @@ def test_parse_mailman_htmlthread():
4751
4852
4953def test_fedora ():
50- """ Test threading on the fedora-devel mailing list data"""
51- # 2010-January https://www.redhat.com/archives/fedora-devel-list/
52- #import mailbox
54+ """ Test threading on the fedora-devel mailing list data
55+ from June 2010"""
56+
57+ try :
58+ import lxml
59+ except ImportError :
60+ raise SkipTest
5361
5462
5563 msglist = parse_mailman_gzfiles (os .path .join (DATA_DIR , '2010-January.txt.gz' ),
5664 encoding = 'latin1' , headersonly = True )
5765
58- msglist_parsed = map ( Message , msglist )
66+ assert len ( msglist ) == N_EMAILS_JUNE2010
5967
60- subject_table = thread (msglist_parsed )
68+
69+ threads_ref = parse_mailman_htmlthread (os .path .join (DATA_DIR ,
70+ '2010-January_thread.html' ))
71+
72+
73+ subject_table = thread ([Message (el ) for el in msglist ])
6174
6275 subjects = subject_table .items ()
6376 subjects = sorted (subjects )
64- for idx , ( _ , container ) in enumerate (subjects ):
77+ for _ , container in enumerate (subjects ):
6578 #print(idx)
6679 #print_container(container)
6780 pass
81+ assert sum ([el .size for _ , el in subjects ]) == N_EMAILS_JUNE2010
82+
83+ #assert len(subjects) == N_THREADS_JUNE2010
6884
6985
7086
0 commit comments