3636from pandas ._typing import (
3737 ArrayLike ,
3838 DtypeArg ,
39+ DtypeObj ,
3940 Scalar ,
4041)
4142from pandas .errors import (
6162 is_string_dtype ,
6263 pandas_dtype ,
6364)
64- from pandas .core .dtypes .dtypes import CategoricalDtype
65+ from pandas .core .dtypes .dtypes import (
66+ CategoricalDtype ,
67+ ExtensionDtype ,
68+ )
6569from pandas .core .dtypes .missing import isna
6670
6771from pandas import StringDtype
6872from pandas .core import algorithms
6973from pandas .core .arrays import (
7074 BooleanArray ,
7175 Categorical ,
76+ ExtensionArray ,
7277 FloatingArray ,
7378 IntegerArray ,
7479)
@@ -599,14 +604,8 @@ def _convert_to_ndarrays(
599604 # type specified in dtype param or cast_type is an EA
600605 if cast_type and (not is_dtype_equal (cvals , cast_type ) or is_ea ):
601606 if not is_ea and na_count > 0 :
602- try :
603- if is_bool_dtype (cast_type ):
604- raise ValueError (
605- f"Bool column has NA values in column { c } "
606- )
607- except (AttributeError , TypeError ):
608- # invalid input to is_bool_dtype
609- pass
607+ if is_bool_dtype (cast_type ):
608+ raise ValueError (f"Bool column has NA values in column { c } " )
610609 cast_type = pandas_dtype (cast_type )
611610 cvals = self ._cast_types (cvals , cast_type , c )
612611
@@ -686,7 +685,7 @@ def _set(x) -> int:
686685
687686 def _infer_types (
688687 self , values , na_values , no_dtype_specified , try_num_bool : bool = True
689- ):
688+ ) -> tuple [ ArrayLike , int ] :
690689 """
691690 Infer types of values, possibly casting
692691
@@ -700,7 +699,7 @@ def _infer_types(
700699
701700 Returns
702701 -------
703- converted : ndarray
702+ converted : ndarray or ExtensionArray
704703 na_count : int
705704 """
706705 na_count = 0
@@ -777,48 +776,50 @@ def _infer_types(
777776
778777 return result , na_count
779778
780- def _cast_types (self , values , cast_type , column ):
779+ def _cast_types (self , values : ArrayLike , cast_type : DtypeObj , column ) -> ArrayLike :
781780 """
782781 Cast values to specified type
783782
784783 Parameters
785784 ----------
786- values : ndarray
787- cast_type : string or np.dtype
785+ values : ndarray or ExtensionArray
786+ cast_type : np.dtype or ExtensionDtype
788787 dtype to cast values to
789788 column : string
790789 column name - used only for error reporting
791790
792791 Returns
793792 -------
794- converted : ndarray
793+ converted : ndarray or ExtensionArray
795794 """
796795 if is_categorical_dtype (cast_type ):
797796 known_cats = (
798797 isinstance (cast_type , CategoricalDtype )
799798 and cast_type .categories is not None
800799 )
801800
802- if not is_object_dtype (values ) and not known_cats :
801+ if not is_object_dtype (values . dtype ) and not known_cats :
803802 # TODO: this is for consistency with
804803 # c-parser which parses all categories
805804 # as strings
806805
807- values = astype_nansafe (values , np .dtype (str ))
806+ values = lib .ensure_string_array (
807+ values , skipna = False , convert_na_value = False
808+ )
808809
809810 cats = Index (values ).unique ().dropna ()
810811 values = Categorical ._from_inferred_categories (
811812 cats , cats .get_indexer (values ), cast_type , true_values = self .true_values
812813 )
813814
814815 # use the EA's implementation of casting
815- elif is_extension_array_dtype (cast_type ):
816- # ensure cast_type is an actual dtype and not a string
817- cast_type = pandas_dtype (cast_type )
816+ elif isinstance (cast_type , ExtensionDtype ):
818817 array_type = cast_type .construct_array_type ()
819818 try :
820819 if is_bool_dtype (cast_type ):
821- return array_type ._from_sequence_of_strings (
820+ # error: Unexpected keyword argument "true_values" for
821+ # "_from_sequence_of_strings" of "ExtensionArray"
822+ return array_type ._from_sequence_of_strings ( # type: ignore[call-arg] # noqa:E501
822823 values ,
823824 dtype = cast_type ,
824825 true_values = self .true_values ,
@@ -832,6 +833,8 @@ def _cast_types(self, values, cast_type, column):
832833 "_from_sequence_of_strings in order to be used in parser methods"
833834 ) from err
834835
836+ elif isinstance (values , ExtensionArray ):
837+ values = values .astype (cast_type , copy = False )
835838 else :
836839 try :
837840 values = astype_nansafe (values , cast_type , copy = True , skipna = True )
0 commit comments