4141import math
4242import numpy as np
4343import random
44+ import six
4445
4546from beard .clustering import block_phonetic
4647from beard .clustering import block_last_name_first_initial
@@ -50,7 +51,7 @@ def _noblocking_sampling(sample_size, train_signatures, clusters_reversed):
5051 pairs = []
5152 # Pairs dict will prevent duplicates
5253 pairs_dict = {}
53- category_size = sample_size / 2
54+ category_size = sample_size // 2
5455 negative = 0
5556 while negative < category_size :
5657 s1 = random .choice (train_signatures )['signature_id' ]
@@ -78,7 +79,7 @@ def _noblocking_sampling(sample_size, train_signatures, clusters_reversed):
7879 for i in range (100 ):
7980 print ("sampling positive examples: %s out of 100 folds" % (i + 1 ))
8081 some_signatures = random .sample (train_signatures ,
81- len (train_signatures )/ 20 )
82+ len (train_signatures )// 20 )
8283 for i , s1 in enumerate (some_signatures ):
8384 for s2 in some_signatures [i + 1 :]:
8485 s1_id = s1 ['signature_id' ]
@@ -88,7 +89,7 @@ def _noblocking_sampling(sample_size, train_signatures, clusters_reversed):
8889 if s1_cluster == s2_cluster :
8990 positive_pairs .append ((s1_id , s2_id , 0 ))
9091
91- sampled = random .sample (positive_pairs , category_size / 100 )
92+ sampled = random .sample (positive_pairs , category_size // 100 )
9293 pairs += sampled
9394 for s1 , s2 , _ in sampled :
9495 if s1 > s2 :
@@ -169,7 +170,7 @@ def pair_sampling(blocking_function,
169170 """
170171 # Load ground-truth
171172 true_clusters = json .load (open (clusters_filename , "r" ))
172- clusters_reversed = {v : k for k , va in true_clusters .iteritems ()
173+ clusters_reversed = {v : k for k , va in six .iteritems (true_clusters )
173174 for v in va }
174175
175176 train_signatures = json .load (open (train_filename , "r" ))
0 commit comments