Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2259,7 +2259,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
seen.int_ = True
floats[i] = <float64_t>val
complexes[i] = <double complex>val
if not seen.null_:
if not seen.null_ or convert_to_nullable_integer:
val = int(val)
seen.saw_int(val)

Expand Down
8 changes: 7 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1825,6 +1825,7 @@ def from_records(
columns=None,
coerce_float: bool = False,
nrows=None,
nullable_integer=False,
) -> DataFrame:
"""
Convert structured or record ndarray to DataFrame.
Expand Down Expand Up @@ -1852,6 +1853,9 @@ def from_records(
decimal.Decimal) to floating point, useful for SQL result sets.
nrows : int, default None
Number of rows to read if data is an iterator.
nullable_integer : bool, default False
Attempts to convert missing integer data to integer NA to maintain dtype,
work for read_sql_query only.

Returns
-------
Expand Down Expand Up @@ -1946,7 +1950,9 @@ def from_records(
arrays, columns = to_arrays(data, columns)
arr_columns = columns
else:
arrays, arr_columns = to_arrays(data, columns)
arrays, arr_columns = to_arrays(
data, columns, nullable_integer=nullable_integer
)
if coerce_float:
for i, arr in enumerate(arrays):
if arr.dtype == object:
Expand Down
20 changes: 15 additions & 5 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,10 @@ def dataclasses_to_dicts(data):


def to_arrays(
data, columns: Optional[Index], dtype: Optional[DtypeObj] = None
data,
columns: Optional[Index],
dtype: Optional[DtypeObj] = None,
nullable_integer=False,
) -> Tuple[List[ArrayLike], Index]:
"""
Return list of arrays, columns.
Expand Down Expand Up @@ -678,7 +681,9 @@ def to_arrays(
data = [tuple(x) for x in data]
content = _list_to_arrays(data)

content, columns = _finalize_columns_and_data(content, columns, dtype)
content, columns = _finalize_columns_and_data(
content, columns, dtype, nullable_integer=nullable_integer
)
return content, columns


Expand Down Expand Up @@ -766,6 +771,7 @@ def _finalize_columns_and_data(
content: np.ndarray, # ndim == 2
columns: Optional[Index],
dtype: Optional[DtypeObj],
nullable_integer=False,
) -> Tuple[List[np.ndarray], Index]:
"""
Ensure we have valid columns, cast object dtypes if possible.
Expand All @@ -779,7 +785,9 @@ def _finalize_columns_and_data(
raise ValueError(err) from err

if len(content) and content[0].dtype == np.object_:
content = _convert_object_array(content, dtype=dtype)
content = _convert_object_array(
content, dtype=dtype, nullable_integer=nullable_integer
)
return content, columns


Expand Down Expand Up @@ -842,7 +850,7 @@ def _validate_or_indexify_columns(


def _convert_object_array(
content: List[np.ndarray], dtype: Optional[DtypeObj]
content: List[np.ndarray], dtype: Optional[DtypeObj], nullable_integer=False
) -> List[ArrayLike]:
"""
Internal function to convert object array.
Expand All @@ -859,7 +867,9 @@ def _convert_object_array(
# provide soft conversion of object dtypes
def convert(arr):
if dtype != np.dtype("O"):
arr = lib.maybe_convert_objects(arr)
arr = lib.maybe_convert_objects(
arr, convert_to_nullable_integer=nullable_integer
)
arr = maybe_cast_to_datetime(arr, dtype)
return arr

Expand Down
28 changes: 26 additions & 2 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,15 @@ def _wrap_result(
coerce_float: bool = True,
parse_dates=None,
dtype: Optional[DtypeArg] = None,
nullable_integer=False,
):
"""Wrap result set of query in a DataFrame."""
frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float)
frame = DataFrame.from_records(
data,
columns=columns,
coerce_float=coerce_float,
nullable_integer=nullable_integer,
)

if dtype:
frame = frame.astype(dtype)
Expand Down Expand Up @@ -442,6 +448,7 @@ def read_sql(
parse_dates=None,
columns=None,
chunksize: None = None,
nullable_integer=False,
) -> DataFrame:
...

Expand All @@ -456,6 +463,7 @@ def read_sql(
parse_dates=None,
columns=None,
chunksize: int = 1,
nullable_integer=False,
) -> Iterator[DataFrame]:
...

Expand All @@ -469,6 +477,7 @@ def read_sql(
parse_dates=None,
columns=None,
chunksize: Optional[int] = None,
nullable_integer=False,
) -> Union[DataFrame, Iterator[DataFrame]]:
"""
Read SQL query or database table into a DataFrame.
Expand Down Expand Up @@ -516,6 +525,9 @@ def read_sql(
chunksize : int, default None
If specified, return an iterator where `chunksize` is the
number of rows to include in each chunk.
nullable_integer : bool, default: False
Attempts to convert missing integer data to integer NA to maintain dtype,
work for read_sql_query only.

Returns
-------
Expand Down Expand Up @@ -945,6 +957,7 @@ def _query_iterator(
columns,
coerce_float: bool = True,
parse_dates=None,
nullable_integer=False,
):
"""Return generator through chunked result set."""
has_read_data = False
Expand All @@ -959,7 +972,10 @@ def _query_iterator(
else:
has_read_data = True
self.frame = DataFrame.from_records(
data, columns=columns, coerce_float=coerce_float
data,
columns=columns,
coerce_float=coerce_float,
nullable_integer=nullable_integer,
)

self._harmonize_columns(parse_dates=parse_dates)
Expand Down Expand Up @@ -1379,6 +1395,7 @@ def _query_iterator(
coerce_float=True,
parse_dates=None,
dtype: Optional[DtypeArg] = None,
nullable_integer=False,
):
"""Return generator through chunked result set"""
has_read_data = False
Expand All @@ -1403,6 +1420,7 @@ def _query_iterator(
coerce_float=coerce_float,
parse_dates=parse_dates,
dtype=dtype,
nullable_integer=nullable_integer,
)

def read_query(
Expand All @@ -1414,6 +1432,7 @@ def read_query(
params=None,
chunksize: Optional[int] = None,
dtype: Optional[DtypeArg] = None,
nullable_integer=False,
):
"""
Read SQL query into a DataFrame.
Expand Down Expand Up @@ -1450,6 +1469,9 @@ def read_query(
{‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}

.. versionadded:: 1.3.0
nullable_integer : bool, default False
Attempts to convert missing integer data to integer NA to maintain dtype,
work for read_sql_query only.

Returns
-------
Expand All @@ -1475,6 +1497,7 @@ def read_query(
coerce_float=coerce_float,
parse_dates=parse_dates,
dtype=dtype,
nullable_integer=nullable_integer,
)
else:
data = result.fetchall()
Expand All @@ -1485,6 +1508,7 @@ def read_query(
coerce_float=coerce_float,
parse_dates=parse_dates,
dtype=dtype,
nullable_integer=nullable_integer,
)
return frame

Expand Down