-
- Notifications
You must be signed in to change notification settings - Fork 19.4k
Fix integral truediv and floordiv for pyarrow types with large divisor and avoid floating points for floordiv #56677
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 21 commits
aad3b2e 599420b 2f43c42 579d4ca e826790 bfcff0b cd7c4c7 692dde4 fbae188 0b3f3c8 2024f4c 31a8e19 f7095ba 80a6976 c992e77 e56b1b3 63bf5d8 167507e 263b8a2 c77c3f7 e36fb90 47c4474 ae2afa3 9913c42 51bd7f3 File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -109,7 +109,7 @@ | |
| | ||
| def cast_for_truediv( | ||
| arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar | ||
| ) -> pa.ChunkedArray: | ||
| ) -> tuple[pa.ChunkedArray, pa.Array | pa.Scalar]: | ||
| # Ensure int / int -> float mirroring Python/Numpy behavior | ||
| # as pc.divide_checked(int, int) -> int | ||
| if pa.types.is_integer(arrow_array.type) and pa.types.is_integer( | ||
| | @@ -120,19 +120,62 @@ def cast_for_truediv( | |
| # Intentionally not using arrow_array.cast because it could be a scalar | ||
| # value in reflected case, and safe=False only added to | ||
| # scalar cast in pyarrow 13. | ||
| return pc.cast(arrow_array, pa.float64(), safe=False) | ||
| return arrow_array | ||
| # In arrow, common type between integral and float64 is float64, | ||
| # but integral type is safe casted to float64, to mirror python | ||
| # and numpy, we want an unsafe cast, so we cast both operands to | ||
| # to float64 before invoking arrow. | ||
| return pc.cast(arrow_array, pa.float64(), safe=False), pc.cast( | ||
| pa_object, pa.float64(), safe=False | ||
| ) | ||
| | ||
| return arrow_array, pa_object | ||
| | ||
| def floordiv_compat( | ||
| left: pa.ChunkedArray | pa.Array | pa.Scalar, | ||
| right: pa.ChunkedArray | pa.Array | pa.Scalar, | ||
| ) -> pa.ChunkedArray: | ||
| # Ensure int // int -> int mirroring Python/Numpy behavior | ||
| # as pc.floor(pc.divide_checked(int, int)) -> float | ||
| converted_left = cast_for_truediv(left, right) | ||
| result = pc.floor(pc.divide(converted_left, right)) | ||
| if pa.types.is_integer(left.type) and pa.types.is_integer(right.type): | ||
| # Use divide_checked to ensure cases like -9223372036854775808 // -1 | ||
rohanjain101 marked this conversation as resolved. Outdated Show resolved Hide resolved | ||
| # don't silently overflow. | ||
| divided = pc.divide_checked(left, right) | ||
| # If one operand is a signed integer, we need to ensure division | ||
| # rounds towards negative infinity instead of 0. If divided.type | ||
| # is a signed integer, that means at least one operand is signed | ||
| # otherwise divided.type would be unsigned integer. | ||
| if pa.types.is_signed_integer(divided.type): | ||
| # GH 56676: avoid storing intermediate calculating in | ||
| # floating point type. | ||
| has_remainder = pc.not_equal(pc.multiply(divided, right), left) | ||
| result = pc.if_else( | ||
| pc.and_( | ||
| has_remainder, | ||
| pc.less( | ||
| pc.bit_wise_xor(left, right), | ||
| ||
| pa.scalar(0, type=divided.type), | ||
| ), | ||
| ), | ||
| # GH 55561: floordiv should round towards negative infinity. | ||
| # pc.divide_checked for integral types rounds towards 0. | ||
| # Avoid using subtract_checked which would incorrectly raise | ||
| # for -9223372036854775808 // 1, because if integer overflow | ||
| # occurs, then has_remainder should be false, and overflowed | ||
| # value is discarded. | ||
| pc.subtract(divided, pa.scalar(1, type=divided.type)), | ||
| divided, | ||
| ) | ||
| else: | ||
| # For 2 unsigned integer operands, integer division is | ||
| # same as floor division. | ||
| result = divided | ||
| # Ensure compatibility with older versions of pandas where | ||
| # int8 // int64 returned int8 rather than int64. | ||
| ||
| result = result.cast(left.type) | ||
| else: | ||
| # Use divide instead of divide_checked to match numpy | ||
| # floordiv where divide by 0 returns infinity for floating | ||
| # point types. | ||
| divided = pc.divide(left, right) | ||
| result = pc.floor(divided) | ||
| return result | ||
| | ||
| ARROW_ARITHMETIC_FUNCS = { | ||
| | @@ -142,8 +185,8 @@ def floordiv_compat( | |
| "rsub": lambda x, y: pc.subtract_checked(y, x), | ||
| "mul": pc.multiply_checked, | ||
| "rmul": lambda x, y: pc.multiply_checked(y, x), | ||
| "truediv": lambda x, y: pc.divide(cast_for_truediv(x, y), y), | ||
| "rtruediv": lambda x, y: pc.divide(y, cast_for_truediv(x, y)), | ||
| "truediv": lambda x, y: pc.divide(*cast_for_truediv(x, y)), | ||
| "rtruediv": lambda x, y: pc.divide(*cast_for_truediv(y, x)), | ||
| "floordiv": lambda x, y: floordiv_compat(x, y), | ||
| "rfloordiv": lambda x, y: floordiv_compat(y, x), | ||
| "mod": NotImplemented, | ||
| | ||
Uh oh!
There was an error while loading. Please reload this page.