I'm having troubles with datatype inference and therefore I decided to map datatype manually, however I have discovered I can't recognize Pandas datatype as expected.
import pyarrow as pa import pyarrow.parquet as pq import pandas as pd pd_to_pa_dtypes = {"object": pa.string(), "string": pa.string(), "double": pa.float64(), pd.Int32Dtype(): pa.int32(), "int64": pa.int64(), pd.Int64Dtype(): pa.int64(), "datetime64[ns]": pa.timestamp("ns", tz="UTC"), pd.StringDtype(): pa.string(), '<M8[ns]': pa.timestamp("ns", tz="UTC")} date = pd.to_datetime(["30/04/2021", "28/04/2021"], format="%d/%m/%Y") df = pd.DataFrame(date) print(df[0].dtype) # which print datetime64[ns] pd_to_pa_dtypes[df[0].dtype] # KeyError: dtype('<M8[ns]') # However I inserted in my dictionary both "datetime64[ns]" and "<M8[ns]", also if i check datatypes df[0].dtype == "datetime64[ns]" # True df[0].dtype == "<M8[ns]" # True This happens also for some other datatypes, for example I had problems with int64, while some are mapped as expected.
pd_to_pa_dtypes[df[0].dtype.str].