1717from dataclasses import dataclass
1818import datetime
1919import decimal
20+ import textwrap
2021import typing
2122from typing import Any , Dict , List , Literal , Union
2223
@@ -422,7 +423,7 @@ def arrow_dtype_to_bigframes_dtype(arrow_dtype: pa.DataType) -> Dtype:
422423 return DEFAULT_DTYPE
423424
424425 # No other types matched.
425- raise ValueError (
426+ raise TypeError (
426427 f"Unexpected Arrow data type { arrow_dtype } . { constants .FEEDBACK_LINK } "
427428 )
428429
@@ -447,7 +448,7 @@ def bigframes_dtype_to_arrow_dtype(
447448 if pa .types .is_struct (bigframes_dtype .pyarrow_dtype ):
448449 return bigframes_dtype .pyarrow_dtype
449450 else :
450- raise ValueError (
451+ raise TypeError (
451452 f"No arrow conversion for { bigframes_dtype } . { constants .FEEDBACK_LINK } "
452453 )
453454
@@ -474,7 +475,7 @@ def bigframes_dtype_to_literal(
474475 if isinstance (bigframes_dtype , gpd .array .GeometryDtype ):
475476 return shapely .Point ((0 , 0 ))
476477
477- raise ValueError (
478+ raise TypeError (
478479 f"No literal conversion for { bigframes_dtype } . { constants .FEEDBACK_LINK } "
479480 )
480481
@@ -515,11 +516,91 @@ def arrow_type_to_literal(
515516 if pa .types .is_time (arrow_type ):
516517 return datetime .time (1 , 1 , 1 )
517518
518- raise ValueError (
519+ raise TypeError (
519520 f"No literal conversion for { arrow_type } . { constants .FEEDBACK_LINK } "
520521 )
521522
522523
524+ def bigframes_type (dtype ) -> Dtype :
525+ """Convert type object to canoncial bigframes dtype."""
526+ if _is_bigframes_dtype (dtype ):
527+ return dtype
528+ elif isinstance (dtype , str ):
529+ return _dtype_from_string (dtype )
530+ elif isinstance (dtype , type ):
531+ return _infer_dtype_from_python_type (dtype )
532+ elif isinstance (dtype , pa .DataType ):
533+ return arrow_dtype_to_bigframes_dtype (dtype )
534+ else :
535+ raise TypeError (
536+ f"Cannot infer supported datatype for: { dtype } . { constants .FEEDBACK_LINK } "
537+ )
538+
539+
540+ def _is_bigframes_dtype (dtype ) -> bool :
541+ """True iff dtyps is a canonical bigframes dtype"""
542+ # have to be quite strict, as pyarrow dtypes equal their string form, and we don't consider that a canonical form.
543+ if (type (dtype ), dtype ) in set (
544+ (type (item .dtype ), item .dtype ) for item in SIMPLE_TYPES
545+ ):
546+ return True
547+ if isinstance (dtype , pd .ArrowDtype ):
548+ try :
549+ _ = arrow_dtype_to_bigframes_dtype (dtype .pyarrow_dtype )
550+ return True
551+ except TypeError :
552+ return False
553+ return False
554+
555+
556+ def _infer_dtype_from_python_type (type : type ) -> Dtype :
557+ if issubclass (type , (bool , np .bool_ )):
558+ return BOOL_DTYPE
559+ if issubclass (type , (int , np .integer )):
560+ return INT_DTYPE
561+ if issubclass (type , (float , np .floating )):
562+ return FLOAT_DTYPE
563+ if issubclass (type , decimal .Decimal ):
564+ return NUMERIC_DTYPE
565+ if issubclass (type , (str , np .str_ )):
566+ return STRING_DTYPE
567+ if issubclass (type , (bytes , np .bytes_ )):
568+ return BYTES_DTYPE
569+ if issubclass (type , datetime .date ):
570+ return DATE_DTYPE
571+ if issubclass (type , datetime .time ):
572+ return TIME_DTYPE
573+ else :
574+ raise TypeError (
575+ f"No matching datatype for python type: { type } . { constants .FEEDBACK_LINK } "
576+ )
577+
578+
579+ def _dtype_from_string (dtype_string : str ) -> typing .Optional [Dtype ]:
580+ if str (dtype_string ) in BIGFRAMES_STRING_TO_BIGFRAMES :
581+ return BIGFRAMES_STRING_TO_BIGFRAMES [
582+ typing .cast (DtypeString , str (dtype_string ))
583+ ]
584+ raise TypeError (
585+ textwrap .dedent (
586+ f"""
587+ Unexpected data type string { dtype_string } . The following
588+ dtypes are supppted: 'boolean','Float64','Int64',
589+ 'int64[pyarrow]','string','string[pyarrow]',
590+ 'timestamp[us, tz=UTC][pyarrow]','timestamp[us][pyarrow]',
591+ 'date32[day][pyarrow]','time64[us][pyarrow]'.
592+ The following pandas.ExtensionDtype are supported:
593+ pandas.BooleanDtype(), pandas.Float64Dtype(),
594+ pandas.Int64Dtype(), pandas.StringDtype(storage="pyarrow"),
595+ pd.ArrowDtype(pa.date32()), pd.ArrowDtype(pa.time64("us")),
596+ pd.ArrowDtype(pa.timestamp("us")),
597+ pd.ArrowDtype(pa.timestamp("us", tz="UTC")).
598+ { constants .FEEDBACK_LINK }
599+ """
600+ )
601+ )
602+
603+
523604def infer_literal_type (literal ) -> typing .Optional [Dtype ]:
524605 # Maybe also normalize literal to canonical python representation to remove this burden from compilers?
525606 if pd .api .types .is_list_like (literal ):
@@ -539,30 +620,17 @@ def infer_literal_type(literal) -> typing.Optional[Dtype]:
539620 return pd .ArrowDtype (pa .struct (fields ))
540621 if pd .isna (literal ):
541622 return None # Null value without a definite type
542- if isinstance (literal , (bool , np .bool_ )):
543- return BOOL_DTYPE
544- if isinstance (literal , (int , np .integer )):
545- return INT_DTYPE
546- if isinstance (literal , (float , np .floating )):
547- return FLOAT_DTYPE
548- if isinstance (literal , decimal .Decimal ):
549- return NUMERIC_DTYPE
550- if isinstance (literal , (str , np .str_ )):
551- return STRING_DTYPE
552- if isinstance (literal , (bytes , np .bytes_ )):
553- return BYTES_DTYPE
554623 # Make sure to check datetime before date as datetimes are also dates
555624 if isinstance (literal , (datetime .datetime , pd .Timestamp )):
556625 if literal .tzinfo is not None :
557626 return TIMESTAMP_DTYPE
558627 else :
559628 return DATETIME_DTYPE
560- if isinstance (literal , datetime .date ):
561- return DATE_DTYPE
562- if isinstance (literal , datetime .time ):
563- return TIME_DTYPE
629+ from_python_type = _infer_dtype_from_python_type (type (literal ))
630+ if from_python_type is not None :
631+ return from_python_type
564632 else :
565- raise ValueError (f"Unable to infer type for value: { literal } " )
633+ raise TypeError (f"Unable to infer type for value: { literal } " )
566634
567635
568636def infer_literal_arrow_type (literal ) -> typing .Optional [pa .DataType ]:
@@ -602,7 +670,7 @@ def convert_schema_field(
602670 return field .name , pd .ArrowDtype (pa_type )
603671 return field .name , _TK_TO_BIGFRAMES [field .field_type ]
604672 else :
605- raise ValueError (f"Cannot handle type: { field .field_type } " )
673+ raise TypeError (f"Cannot handle type: { field .field_type } " )
606674
607675
608676def convert_to_schema_field (
@@ -636,7 +704,7 @@ def convert_to_schema_field(
636704 if bigframes_dtype .pyarrow_dtype == pa .duration ("us" ):
637705 # Timedeltas are represented as integers in microseconds.
638706 return google .cloud .bigquery .SchemaField (name , "INTEGER" )
639- raise ValueError (
707+ raise TypeError (
640708 f"No arrow conversion for { bigframes_dtype } . { constants .FEEDBACK_LINK } "
641709 )
642710
0 commit comments