@@ -134,11 +134,11 @@ def dtm_binning_sv(dtm, breaks, spl_val):
134134 return {'binning_sv' :None , 'dtm' :dtm }
135135 # binning_sv
136136 binning_sv = pd .merge (
137- dtm_sv .fillna ('missing' ).groupby (['variable' ,'value' ])['y' ].agg ([n0 , n1 ])\
137+ dtm_sv .fillna ('missing' ).groupby (['variable' ,'value' ], group_keys = False )['y' ].agg ([n0 , n1 ])\
138138 .reset_index ().rename (columns = {'n0' :'good' ,'n1' :'bad' }),
139139 sv_df .fillna ('missing' ),
140140 on = 'value'
141- ).groupby (['variable' , 'rowid' , 'bin_chr' ]).agg ({'bad' :sum ,'good' :sum })\
141+ ).groupby (['variable' , 'rowid' , 'bin_chr' ], group_keys = False ).agg ({'bad' :sum ,'good' :sum })\
142142 .reset_index ().rename (columns = {'bin_chr' :'bin' })\
143143 .drop ('rowid' , axis = 1 )
144144 else :
@@ -158,7 +158,7 @@ def check_empty_bins(dtm, binning):
158158 bstbrks = sorted (list (map (float , ['-inf' ] + list (binright ) + ['inf' ])))
159159 labels = ['[{},{})' .format (bstbrks [i ], bstbrks [i + 1 ]) for i in range (len (bstbrks )- 1 )]
160160 dtm .loc [:,'bin' ] = pd .cut (dtm ['value' ], bstbrks , right = False , labels = labels )
161- binning = dtm .groupby (['variable' ,'bin' ])['y' ].agg ([n0 , n1 ])\
161+ binning = dtm .groupby (['variable' ,'bin' ], group_keys = False )['y' ].agg ([n0 , n1 ])\
162162 .reset_index ().rename (columns = {'n0' :'good' ,'n1' :'bad' })
163163 # warnings.warn("The break points are modified into '[{}]'. There are empty bins based on the provided break points.".format(','.join(binright)))
164164 # binning
@@ -202,7 +202,7 @@ def woebin2_breaks(dtm, breaks, spl_val):
202202 dtm .loc [:,'bin' ] = pd .cut (dtm ['value' ], bstbrks , right = False , labels = labels )
203203 dtm ['bin' ] = dtm ['bin' ].astype (str )
204204
205- binning = dtm .groupby (['variable' ,'bin' ])['y' ].agg ([n0 , n1 ])\
205+ binning = dtm .groupby (['variable' ,'bin' ], group_keys = False )['y' ].agg ([n0 , n1 ])\
206206 .reset_index ().rename (columns = {'n0' :'good' ,'n1' :'bad' })
207207 # check empty bins for unmeric variable
208208 binning = check_empty_bins (dtm , binning )
@@ -225,7 +225,7 @@ def woebin2_breaks(dtm, breaks, spl_val):
225225 dtm ,
226226 bk_df .assign (bin = lambda x : x .bin_chr ),
227227 how = 'left' , on = 'value'
228- ).fillna ('missing' ).groupby (['variable' , 'rowid' , 'bin' ])['y' ].agg ([n0 ,n1 ])\
228+ ).fillna ('missing' ).groupby (['variable' , 'rowid' , 'bin' ], group_keys = False )['y' ].agg ([n0 ,n1 ])\
229229 .rename (columns = {'n0' :'good' ,'n1' :'bad' })\
230230 .reset_index ().drop ('rowid' , axis = 1 )
231231 # return
@@ -317,7 +317,7 @@ def woebin2_init_bin(dtm, init_count_distr, breaks, spl_val):
317317 labels = ['[{},{})' .format (brk [i ], brk [i + 1 ]) for i in range (len (brk )- 1 )]
318318 dtm .loc [:,'bin' ] = pd .cut (dtm ['value' ], brk , right = False , labels = labels )#.astype(str)
319319 # init_bin
320- init_bin = dtm .groupby ('bin' )['y' ].agg ([n0 , n1 ])\
320+ init_bin = dtm .groupby ('bin' , group_keys = False )['y' ].agg ([n0 , n1 ])\
321321 .reset_index ().rename (columns = {'n0' :'good' ,'n1' :'bad' })
322322 # check empty bins for unmeric variable
323323 init_bin = check_empty_bins (dtm , init_bin )
@@ -329,7 +329,7 @@ def woebin2_init_bin(dtm, init_count_distr, breaks, spl_val):
329329 )[['variable' , 'bin' , 'brkp' , 'good' , 'bad' , 'badprob' ]]
330330 else : # other type variable
331331 # initial binning datatable
332- init_bin = dtm .groupby ('value' )['y' ].agg ([n0 ,n1 ])\
332+ init_bin = dtm .groupby ('value' , group_keys = False )['y' ].agg ([n0 ,n1 ])\
333333 .rename (columns = {'n0' :'good' ,'n1' :'bad' })\
334334 .assign (
335335 variable = dtm ['variable' ].values [0 ],
@@ -358,8 +358,8 @@ def woebin2_init_bin(dtm, init_count_distr, breaks, spl_val):
358358 init_bin = init_bin .assign (brkp2 = lambda x : x ['brkp' ].shift (shift_period ))\
359359 .assign (brkp = lambda x :np .where (x ['brkp' ] == rm_brkp ['brkp' ], x ['brkp2' ], x ['brkp' ]))
360360 # groupby brkp
361- init_bin = init_bin .groupby ('brkp' ).agg ({
362- 'variable' :lambda x : np .unique (x ),
361+ init_bin = init_bin .groupby ('brkp' , group_keys = False ).agg ({
362+ 'variable' :lambda x : np .unique (x )[ 0 ] ,
363363 'bin' : lambda x : '%,%' .join (x ),
364364 'good' : sum ,
365365 'bad' : sum
@@ -410,18 +410,18 @@ def total_iv_all_breaks(initial_binning, bestbreaks, dtm_rows):
410410 total_iv_all_brks = pd .melt (
411411 init_bin_all_breaks , id_vars = ["variable" , "good" , "bad" ], var_name = 'bstbin' ,
412412 value_vars = ['bstbin' + str (i ) for i in breaks_set ])\
413- .groupby (['variable' , 'bstbin' , 'value' ])\
413+ .groupby (['variable' , 'bstbin' , 'value' ], group_keys = False )\
414414 .agg ({'good' :sum , 'bad' :sum }).reset_index ()\
415415 .assign (count = lambda x : x ['good' ]+ x ['bad' ])
416416
417- total_iv_all_brks ['count_distr' ] = total_iv_all_brks .groupby (['variable' , 'bstbin' ])\
417+ total_iv_all_brks ['count_distr' ] = total_iv_all_brks .groupby (['variable' , 'bstbin' ], group_keys = False )\
418418 ['count' ].apply (lambda x : x / dtm_rows ).reset_index (drop = True )
419- total_iv_all_brks ['min_count_distr' ] = total_iv_all_brks .groupby (['variable' , 'bstbin' ])\
419+ total_iv_all_brks ['min_count_distr' ] = total_iv_all_brks .groupby (['variable' , 'bstbin' ], group_keys = False )\
420420 ['count_distr' ].transform (lambda x : min (x ))
421421
422422 total_iv_all_brks = total_iv_all_brks \
423423 .assign (bstbin = lambda x : [float (re .sub ('^bstbin' , '' , i )) for i in x ['bstbin' ]] )\
424- .groupby (['variable' ,'bstbin' ,'min_count_distr' ])\
424+ .groupby (['variable' ,'bstbin' ,'min_count_distr' ], group_keys = False )\
425425 .apply (lambda x : iv_01 (x ['good' ], x ['bad' ])).reset_index (name = 'total_iv' )
426426 # return
427427 return total_iv_all_brks
@@ -439,11 +439,11 @@ def binning_add_1bst(initial_binning, bestbreaks):
439439 bstbin = lambda x : pd .cut (x ['brkp' ], bestbreaks_inf , right = False , labels = labels )
440440 )
441441 if is_numeric_dtype (dtm ['value' ]):
442- binning_1bst_brk = binning_1bst_brk .groupby (['variable' , 'bstbin' ])\
442+ binning_1bst_brk = binning_1bst_brk .groupby (['variable' , 'bstbin' ], group_keys = False )\
443443 .agg ({'good' :sum , 'bad' :sum }).reset_index ().assign (bin = lambda x : x ['bstbin' ])\
444444 [['bstbin' , 'variable' , 'bin' , 'good' , 'bad' ]]
445445 else :
446- binning_1bst_brk = binning_1bst_brk .groupby (['variable' , 'bstbin' ])\
446+ binning_1bst_brk = binning_1bst_brk .groupby (['variable' , 'bstbin' ], group_keys = False )\
447447 .agg ({'good' :sum , 'bad' :sum , 'bin' :lambda x :'%,%' .join (x )}).reset_index ()\
448448 [['bstbin' , 'variable' , 'bin' , 'good' , 'bad' ]]
449449 # format
@@ -579,13 +579,13 @@ def add_chisq(initial_binning):
579579 var_name = 'goodbad' , value_name = 'a' )\
580580 .sort_values (by = ['goodbad' , 'brkp' ]).reset_index (drop = True )
581581 ###
582- chisq_df ['a_lag' ] = chisq_df .groupby ('goodbad' )['a' ].apply (lambda x : x .shift (1 ))#.reset_index(drop=True)
583- chisq_df ['a_rowsum' ] = chisq_df .groupby ('brkp' )['a' ].transform (lambda x : sum (x ))#.reset_index(drop=True)
584- chisq_df ['a_lag_rowsum' ] = chisq_df .groupby ('brkp' )['a_lag' ].transform (lambda x : sum (x ))#.reset_index(drop=True)
582+ chisq_df ['a_lag' ] = chisq_df .groupby ('goodbad' , group_keys = False )['a' ].apply (lambda x : x .shift (1 ))#.reset_index(drop=True)
583+ chisq_df ['a_rowsum' ] = chisq_df .groupby ('brkp' , group_keys = False )['a' ].transform (lambda x : sum (x ))#.reset_index(drop=True)
584+ chisq_df ['a_lag_rowsum' ] = chisq_df .groupby ('brkp' , group_keys = False )['a_lag' ].transform (lambda x : sum (x ))#.reset_index(drop=True)
585585 ###
586586 chisq_df = pd .merge (
587587 chisq_df .assign (a_colsum = lambda df : df .a + df .a_lag ),
588- chisq_df .groupby ('brkp' ).apply (lambda df : sum (df .a + df .a_lag )).reset_index (name = 'a_sum' ))\
588+ chisq_df .groupby ('brkp' , group_keys = False ).apply (lambda df : sum (df .a + df .a_lag )).reset_index (name = 'a_sum' ))\
589589 .assign (
590590 e = lambda df : df .a_rowsum * df .a_colsum / df .a_sum ,
591591 e_lag = lambda df : df .a_lag_rowsum * df .a_colsum / df .a_sum
@@ -637,7 +637,7 @@ def add_chisq(initial_binning):
637637 binning_chisq = binning_chisq .assign (brkp2 = lambda x : x ['brkp' ].shift (shift_period ))\
638638 .assign (brkp = lambda x :np .where (x ['brkp' ] == rm_brkp ['brkp' ], x ['brkp2' ], x ['brkp' ]))
639639 # groupby brkp
640- binning_chisq = binning_chisq .groupby ('brkp' ).agg ({
640+ binning_chisq = binning_chisq .groupby ('brkp' , group_keys = False ).agg ({
641641 'variable' :lambda x :np .unique (x ),
642642 'bin' : lambda x : '%,%' .join (x ),
643643 'good' : sum ,
@@ -765,7 +765,7 @@ def bins_to_breaks(bins, dt, to_string=False, save_string=None):
765765 bins_breakslist = bins [~ bins ['breaks' ].isin (["-inf" ,"inf" ,"missing" ]) & ~ bins ['is_special_values' ]]
766766 bins_breakslist = pd .merge (bins_breakslist [['variable' , 'breaks' ]], vars_class , how = 'left' , on = 'variable' )
767767 bins_breakslist .loc [bins_breakslist ['not_numeric' ], 'breaks' ] = '\' ' + bins_breakslist .loc [bins_breakslist ['not_numeric' ], 'breaks' ]+ '\' '
768- bins_breakslist = bins_breakslist .groupby ('variable' )['breaks' ].agg (lambda x : ',' .join (x ))
768+ bins_breakslist = bins_breakslist .groupby ('variable' , group_keys = False )['breaks' ].agg (lambda x : ',' .join (x ))
769769
770770 if to_string :
771771 bins_breakslist = "breaks_list={\n " + ', \n ' .join ('\' ' + bins_breakslist .index [i ]+ '\' : [' + bins_breakslist [i ]+ ']' for i in np .arange (len (bins_breakslist )))+ "}"
@@ -1301,15 +1301,15 @@ def gb_distr(binx):
13011301 binx ['good_distr' ] = binx ['good' ]/ sum (binx ['count' ])
13021302 binx ['bad_distr' ] = binx ['bad' ]/ sum (binx ['count' ])
13031303 return binx
1304- bins = bins .groupby ('variable' ).apply (gb_distr )
1304+ bins = bins .groupby ('variable' , group_keys = False ).apply (gb_distr )
13051305 # x variable names
13061306 if xs is None : xs = bins ['variable' ].unique ()
13071307 # plot export
13081308 plotlist = {}
13091309 for i in xs :
1310- binx = bins [bins ['variable' ] == i ].reset_index ()
1310+ binx = bins [bins ['variable' ] == i ].reset_index (drop = True )
13111311 plotlist [i ] = plot_bin (binx , title , show_iv )
1312- return plotlist
1312+ return plotlist
13131313
13141314
13151315
@@ -1436,10 +1436,10 @@ class number over total. Accepted range: 0.01-0.2; default
14361436 # adjust all variables
14371437 if not adj_all_var :
14381438 bins2 = bins .loc [~ ((bins ['bin' ] == 'missing' ) & (bins ['count_distr' ] >= count_distr_limit ))].reset_index (drop = True )
1439- bins2 ['badprob2' ] = bins2 .groupby ('variable' ).apply (lambda x : x ['badprob' ].shift (1 )).reset_index (drop = True )
1439+ bins2 ['badprob2' ] = bins2 .groupby ('variable' , group_keys = False ).apply (lambda x : x ['badprob' ].shift (1 )).reset_index (drop = True )
14401440 bins2 = bins2 .dropna (subset = ['badprob2' ]).reset_index (drop = True )
14411441 bins2 = bins2 .assign (badprob_trend = lambda x : x .badprob >= x .badprob2 )
1442- xs_adj = bins2 .groupby ('variable' )['badprob_trend' ].nunique ()
1442+ xs_adj = bins2 .groupby ('variable' , group_keys = False )['badprob_trend' ].nunique ()
14431443 xs_adj = xs_adj [xs_adj > 1 ].index
14441444 else :
14451445 xs_adj = xs_all
0 commit comments