@@ -46,12 +46,10 @@ include "hashtable_func_helper.pxi"
4646
4747cdef class Factorizer:
4848 cdef public PyObjectHashTable table
49- cdef public ObjectVector uniques
5049 cdef public Py_ssize_t count
5150
5251 def __init__ (self , size_hint ):
5352 self .table = PyObjectHashTable(size_hint)
54- self .uniques = ObjectVector()
5553 self .count = 0
5654
5755 def get_count (self ):
@@ -64,19 +62,22 @@ cdef class Factorizer:
6462 >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
6563 array([ 0, 1, 20])
6664 """
67- labels = self .table.get_labels(values, self .uniques,
65+ uniques = ObjectVector()
66+ labels = self .table.get_labels(values, uniques,
6867 self .count, na_sentinel, check_null)
6968 mask = (labels == na_sentinel)
69+ if len (labels) == 0 :
70+ return labels
7071 # sort on
7172 if sort:
7273 if labels.dtype != np.intp:
7374 labels = labels.astype(np.intp)
74- sorter = self . uniques.to_array().argsort()
75+ sorter = uniques.to_array().argsort()
7576 reverse_indexer = np.empty(len (sorter), dtype = np.intp)
7677 reverse_indexer.put(sorter, np.arange(len (sorter)))
7778 labels = reverse_indexer.take(labels, mode = ' clip' )
7879 labels[mask] = na_sentinel
79- self .count = len (self . uniques)
80+ self .count = len (uniques)
8081 return labels
8182
8283 def unique (self , ndarray[object] values ):
@@ -86,35 +87,36 @@ cdef class Factorizer:
8687
8788cdef class Int64Factorizer:
8889 cdef public Int64HashTable table
89- cdef public Int64Vector uniques
9090 cdef public Py_ssize_t count
9191
9292 def __init__ (self , size_hint ):
9393 self .table = Int64HashTable(size_hint)
94- self .uniques = Int64Vector()
9594 self .count = 0
9695
9796 def get_count (self ):
9897 return self .count
9998
10099 def factorize (self , int64_t[:] values , sort = False ,
101100 na_sentinel = - 1 , check_null = True ):
101+ uniques = Int64Vector()
102102 labels = self .table.get_labels(values, self .uniques,
103103 self .count, na_sentinel,
104104 check_null)
105105
106106 # sort on
107+ if len (labels) == 0 :
108+ return labels
107109 if sort:
108110 if labels.dtype != np.intp:
109111 labels = labels.astype(np.intp)
110112
111- sorter = self . uniques.to_array().argsort()
113+ sorter = uniques.to_array().argsort()
112114 reverse_indexer = np.empty(len (sorter), dtype = np.intp)
113115 reverse_indexer.put(sorter, np.arange(len (sorter)))
114116
115117 labels = reverse_indexer.take(labels)
116118
117- self .count = len (self . uniques)
119+ self .count = len (uniques)
118120 return labels
119121
120122
0 commit comments