@@ -116,12 +116,12 @@ def _length_check(others):
116116 return n
117117
118118
119- def _na_map (f , arr , na_result = np .nan , dtype = object ):
119+ def _na_map (f , arr , na_result = np .nan , dtype = object , np_f = None ):
120120 # should really _check_ for NA
121- return _map (f , arr , na_mask = True , na_value = na_result , dtype = dtype )
121+ return _map (f , arr , na_mask = True , na_value = na_result , dtype = dtype , np_f = np_f )
122122
123123
124- def _map (f , arr , na_mask = False , na_value = np .nan , dtype = object ):
124+ def _map (f , arr , na_mask = False , na_value = np .nan , dtype = object , np_f = None ):
125125 from pandas .core .series import Series
126126
127127 if not len (arr ):
@@ -131,6 +131,14 @@ def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object):
131131 arr = arr .values
132132 if not isinstance (arr , np .ndarray ):
133133 arr = np .asarray (arr , dtype = object )
134+
135+ # short path for all-string array
136+ if np_f is not None and lib .is_string_array (arr ):
137+ try :
138+ return np_f (arr .values .astype (unicode ))
139+ except Exception :
140+ pass
141+
134142 if na_mask :
135143 mask = isnull (arr )
136144 try :
@@ -686,14 +694,17 @@ def str_pad(arr, width, side='left', fillchar=' '):
686694
687695 if side == 'left' :
688696 f = lambda x : x .rjust (width , fillchar )
697+ np_f = lambda x : np .core .defchararray .ljust (x , width , fillchar )
689698 elif side == 'right' :
690699 f = lambda x : x .ljust (width , fillchar )
700+ np_f = lambda x : np .core .defchararray .rjust (x , width , fillchar )
691701 elif side == 'both' :
692702 f = lambda x : x .center (width , fillchar )
703+ np_f = lambda x : np .core .defchararray .lower (x , width , fillchar )
693704 else : # pragma: no cover
694705 raise ValueError ('Invalid side' )
695706
696- return _na_map (f , arr )
707+ return _na_map (f , arr , np_f = np_f )
697708
698709
699710def str_split (arr , pat = None , n = None ):
@@ -720,17 +731,21 @@ def str_split(arr, pat=None, n=None):
720731 if n is None or n == 0 :
721732 n = - 1
722733 f = lambda x : x .split (pat , n )
734+ np_f = lambda x : np .core .defchararray .split (x , pat , n )
723735 else :
724736 if len (pat ) == 1 :
725737 if n is None or n == 0 :
726738 n = - 1
727739 f = lambda x : x .split (pat , n )
740+ np_f = lambda x : np .core .defchararray .split (x , pat , n )
728741 else :
729742 if n is None or n == - 1 :
730743 n = 0
731744 regex = re .compile (pat )
732745 f = lambda x : regex .split (x , maxsplit = n )
733- res = _na_map (f , arr )
746+ # numpy doesn't support regex
747+ np_f = None
748+ res = _na_map (f , arr , np_f = np_f )
734749 return res
735750
736751
@@ -946,7 +961,8 @@ def str_decode(arr, encoding, errors="strict"):
946961 decoded : Series/Index of objects
947962 """
948963 f = lambda x : x .decode (encoding , errors )
949- return _na_map (f , arr )
964+ np_f = lambda x : np .core .defchararray .decode (x , errors )
965+ return _na_map (f , arr , np_f = np_f )
950966
951967
952968def str_encode (arr , encoding , errors = "strict" ):
@@ -964,12 +980,13 @@ def str_encode(arr, encoding, errors="strict"):
964980 encoded : Series/Index of objects
965981 """
966982 f = lambda x : x .encode (encoding , errors )
967- return _na_map (f , arr )
983+ np_f = lambda x : np .core .defchararray .encode (x , errors )
984+ return _na_map (f , arr , np_f = np_f )
968985
969986
970- def _noarg_wrapper (f , docstring = None , ** kargs ):
987+ def _noarg_wrapper (f , docstring = None , np_f = None , ** kargs ):
971988 def wrapper (self ):
972- result = _na_map (f , self .series , ** kargs )
989+ result = _na_map (f , self .series , np_f = np_f , ** kargs )
973990 return self ._wrap_result (result )
974991
975992 wrapper .__name__ = f .__name__
@@ -1443,7 +1460,8 @@ def rindex(self, sub, start=0, end=None):
14431460 _shared_docs ['swapcase' ] = dict (type = 'be swapcased' , method = 'swapcase' )
14441461 lower = _noarg_wrapper (lambda x : x .lower (),
14451462 docstring = _shared_docs ['casemethods' ] %
1446- _shared_docs ['lower' ])
1463+ _shared_docs ['lower' ],
1464+ np_f = np .core .defchararray .lower )
14471465 upper = _noarg_wrapper (lambda x : x .upper (),
14481466 docstring = _shared_docs ['casemethods' ] %
14491467 _shared_docs ['upper' ])
@@ -1452,7 +1470,8 @@ def rindex(self, sub, start=0, end=None):
14521470 _shared_docs ['title' ])
14531471 capitalize = _noarg_wrapper (lambda x : x .capitalize (),
14541472 docstring = _shared_docs ['casemethods' ] %
1455- _shared_docs ['capitalize' ])
1473+ _shared_docs ['capitalize' ],
1474+ np_f = np .core .defchararray .capitalize )
14561475 swapcase = _noarg_wrapper (lambda x : x .swapcase (),
14571476 docstring = _shared_docs ['casemethods' ] %
14581477 _shared_docs ['swapcase' ])
0 commit comments