@@ -63,7 +63,6 @@ def join_by_column(
6363 allow_row_identity_join (bool):
6464 If True, allow matching by row identity. Set to False to always
6565 perform a true JOIN in generated SQL.
66-
6766 Returns:
6867 The joined expression and the objects needed to interpret it.
6968
@@ -123,13 +122,13 @@ def join_by_column(
123122 ),
124123 )
125124 else :
126- # Generate offsets if non-default ordering is applied
127- # Assumption, both sides are totally ordered, otherwise offsets will be nondeterministic
128125 left_table = left .to_ibis_expr (
129- ordering_mode = "string_encoded" , order_col_name = core .ORDER_ID_COLUMN
126+ ordering_mode = "unordered" ,
127+ expose_hidden_cols = True ,
130128 )
131129 right_table = right .to_ibis_expr (
132- ordering_mode = "string_encoded" , order_col_name = core .ORDER_ID_COLUMN
130+ ordering_mode = "unordered" ,
131+ expose_hidden_cols = True ,
133132 )
134133 join_conditions = [
135134 value_to_join_key (left_table [left_index ])
@@ -178,41 +177,13 @@ def get_column_right(key: str) -> str:
178177
179178 return key
180179
181- left_ordering_encoding_size = (
182- left ._ordering .string_encoding .length
183- if left ._ordering .is_string_encoded
184- else bigframes .core .ordering .DEFAULT_ORDERING_ID_LENGTH
185- )
186- right_ordering_encoding_size = (
187- right ._ordering .string_encoding .length
188- if right ._ordering .is_string_encoded
189- else bigframes .core .ordering .DEFAULT_ORDERING_ID_LENGTH
190- )
191-
192- # Preserve original ordering accross joins.
193- left_order_id = get_column_left (core .ORDER_ID_COLUMN )
194- right_order_id = get_column_right (core .ORDER_ID_COLUMN )
195- new_order_id_col = _merge_order_ids (
196- typing .cast (ibis_types .StringColumn , combined_table [left_order_id ]),
197- left_ordering_encoding_size ,
198- typing .cast (ibis_types .StringColumn , combined_table [right_order_id ]),
199- right_ordering_encoding_size ,
200- how ,
201- )
202- new_order_id = new_order_id_col .get_name ()
203- if new_order_id is None :
204- raise ValueError ("new_order_id unexpectedly has no name" )
205-
206- hidden_columns = (new_order_id_col ,)
207- ordering = core .ExpressionOrdering (
208- # Order id is non-nullable but na_last=False generates simpler sql with current impl
209- ordering_value_columns = [
210- core .OrderingColumnReference (new_order_id , na_last = False )
211- ],
212- total_ordering_columns = frozenset ([new_order_id ]),
213- string_encoding = core .StringEncoding (
214- True , left_ordering_encoding_size + right_ordering_encoding_size
215- ),
180+ # Preserve ordering accross joins.
181+ ordering = join_orderings (
182+ left ._ordering ,
183+ right ._ordering ,
184+ get_column_left ,
185+ get_column_right ,
186+ left_order_dominates = (how != "right" ),
216187 )
217188
218189 left_join_keys = [
@@ -234,11 +205,21 @@ def get_column_right(key: str) -> str:
234205 for col in right .columns
235206 ]
236207 )
208+ hidden_ordering_columns = [
209+ * [
210+ combined_table [get_column_left (col .get_name ())]
211+ for col in left .hidden_ordering_columns
212+ ],
213+ * [
214+ combined_table [get_column_right (col .get_name ())]
215+ for col in right .hidden_ordering_columns
216+ ],
217+ ]
237218 combined_expr = core .ArrayValue (
238219 left ._session ,
239220 combined_table ,
240221 columns = columns ,
241- hidden_ordering_columns = hidden_columns ,
222+ hidden_ordering_columns = hidden_ordering_columns ,
242223 ordering = ordering ,
243224 )
244225 if sort :
@@ -313,32 +294,33 @@ def value_to_join_key(value: ibis_types.Value):
313294 return value .fillna (ibis_types .literal ("$NULL_SENTINEL$" ))
314295
315296
316- def _merge_order_ids (
317- left_id : ibis_types .StringColumn ,
318- left_encoding_size : int ,
319- right_id : ibis_types .StringColumn ,
320- right_encoding_size : int ,
321- how : str ,
322- ) -> ibis_types .StringColumn :
323- if how == "right" :
324- return _merge_order_ids (
325- right_id , right_encoding_size , left_id , left_encoding_size , "left"
326- )
297+ def join_orderings (
298+ left : core .ExpressionOrdering ,
299+ right : core .ExpressionOrdering ,
300+ left_id_mapping : Callable [[str ], str ],
301+ right_id_mapping : Callable [[str ], str ],
302+ left_order_dominates : bool = True ,
303+ ) -> core .ExpressionOrdering :
304+ left_ordering_refs = [
305+ ref .with_name (left_id_mapping (ref .column_id ))
306+ for ref in left .all_ordering_columns
307+ ]
308+ right_ordering_refs = [
309+ ref .with_name (right_id_mapping (ref .column_id ))
310+ for ref in right .all_ordering_columns
311+ ]
312+ if left_order_dominates :
313+ joined_refs = [* left_ordering_refs , * right_ordering_refs ]
314+ else :
315+ joined_refs = [* right_ordering_refs , * left_ordering_refs ]
327316
328- if how == "left" :
329- right_id = typing .cast (
330- ibis_types .StringColumn ,
331- right_id .fillna (ibis_types .literal (":" * right_encoding_size )),
332- )
333- elif how != "inner" : # outer join
334- left_id = typing .cast (
335- ibis_types .StringColumn ,
336- left_id .fillna (ibis_types .literal (":" * left_encoding_size )),
337- )
338- right_id = typing .cast (
339- ibis_types .StringColumn ,
340- right_id .fillna (ibis_types .literal (":" * right_encoding_size )),
341- )
342- return (left_id + right_id ).name (
343- bigframes .core .guid .generate_guid (prefix = "bigframes_ordering_id_" )
317+ left_total_order_cols = frozenset (
318+ [left_id_mapping (id ) for id in left .total_ordering_columns ]
319+ )
320+ right_total_order_cols = frozenset (
321+ [right_id_mapping (id ) for id in right .total_ordering_columns ]
322+ )
323+ return core .ExpressionOrdering (
324+ ordering_value_columns = joined_refs ,
325+ total_ordering_columns = left_total_order_cols | right_total_order_cols ,
344326 )
0 commit comments