@@ -2010,7 +2010,7 @@ def merge(
20102010 mappings = (* left_mappings , * right_mappings ),
20112011 type = how ,
20122012 )
2013- joined_expr = self .expr .join (other .expr , join_def = join_def )
2013+ joined_expr = self .expr .relational_join (other .expr , join_def = join_def )
20142014 result_columns = []
20152015 matching_join_labels = []
20162016
@@ -2269,25 +2269,33 @@ def join(
22692269 raise NotImplementedError (
22702270 f"Only how='outer','left','right','inner' currently supported. { constants .FEEDBACK_LINK } "
22712271 )
2272- # Special case for null index,
2272+ # Handle null index, which only supports row join
2273+ if (self .index .nlevels == other .index .nlevels == 0 ) and not block_identity_join :
2274+ if not block_identity_join :
2275+ result = try_row_join (self , other , how = how )
2276+ if result is not None :
2277+ return result
2278+ raise bigframes .exceptions .NullIndexError (
2279+ "Cannot implicitly align objects. Set an explicit index using set_index."
2280+ )
2281+
2282+ # Oddly, pandas row-wise join ignores right index names
22732283 if (
2274- ( self . index . nlevels == other . index . nlevels == 0 )
2275- and not sort
2276- and not block_identity_join
2284+ not block_identity_join
2285+ and ( self . index . nlevels == other . index . nlevels )
2286+ and ( self . index . dtypes == other . index . dtypes )
22772287 ):
2278- return join_indexless (self , other , how = how )
2288+ result = try_row_join (self , other , how = how )
2289+ if result is not None :
2290+ return result
22792291
22802292 self ._throw_if_null_index ("join" )
22812293 other ._throw_if_null_index ("join" )
22822294 if self .index .nlevels == other .index .nlevels == 1 :
2283- return join_mono_indexed (
2284- self , other , how = how , sort = sort , block_identity_join = block_identity_join
2285- )
2286- else :
2295+ return join_mono_indexed (self , other , how = how , sort = sort )
2296+ else : # Handles cases where one or both sides are multi-indexed
22872297 # Always sort mult-index join
2288- return join_multi_indexed (
2289- self , other , how = how , sort = sort , block_identity_join = block_identity_join
2290- )
2298+ return join_multi_indexed (self , other , how = how , sort = sort )
22912299
22922300 def _force_reproject (self ) -> Block :
22932301 """Forces a reprojection of the underlying tables expression. Used to force predicate/order application before subsequent operations."""
@@ -2626,46 +2634,55 @@ def is_uniquely_named(self: BlockIndexProperties):
26262634 return len (set (self .names )) == len (self .names )
26272635
26282636
2629- def join_indexless (
2637+ def try_row_join (
26302638 left : Block ,
26312639 right : Block ,
26322640 * ,
26332641 how = "left" ,
2634- ) -> Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],]:
2635- """Joins two blocks"""
2642+ ) -> Optional [ Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],] ]:
2643+ """Joins two blocks that have a common root expression by merging the projections. """
26362644 left_expr = left .expr
26372645 right_expr = right .expr
2646+ # Create a new array value, mapping from both, then left, and then right
2647+ join_keys = tuple (
2648+ join_defs .CoalescedColumnMapping (
2649+ left_source_id = left_id ,
2650+ right_source_id = right_id ,
2651+ destination_id = guid .generate_guid (),
2652+ )
2653+ for left_id , right_id in zip (left .index_columns , right .index_columns )
2654+ )
26382655 left_mappings = [
26392656 join_defs .JoinColumnMapping (
26402657 source_table = join_defs .JoinSide .LEFT ,
26412658 source_id = id ,
26422659 destination_id = guid .generate_guid (),
26432660 )
2644- for id in left_expr . column_ids
2661+ for id in left . value_columns
26452662 ]
26462663 right_mappings = [
26472664 join_defs .JoinColumnMapping (
26482665 source_table = join_defs .JoinSide .RIGHT ,
26492666 source_id = id ,
26502667 destination_id = guid .generate_guid (),
26512668 )
2652- for id in right_expr . column_ids
2669+ for id in right . value_columns
26532670 ]
26542671 combined_expr = left_expr .try_align_as_projection (
26552672 right_expr ,
26562673 join_type = how ,
2674+ join_keys = join_keys ,
26572675 mappings = (* left_mappings , * right_mappings ),
26582676 )
26592677 if combined_expr is None :
2660- raise bigframes .exceptions .NullIndexError (
2661- "Cannot implicitly align objects. Set an explicit index using set_index."
2662- )
2678+ return None
26632679 get_column_left = {m .source_id : m .destination_id for m in left_mappings }
26642680 get_column_right = {m .source_id : m .destination_id for m in right_mappings }
26652681 block = Block (
26662682 combined_expr ,
26672683 column_labels = [* left .column_labels , * right .column_labels ],
2668- index_columns = (),
2684+ index_columns = (key .destination_id for key in join_keys ),
2685+ index_labels = left .index .names ,
26692686 )
26702687 return (
26712688 block ,
@@ -2707,7 +2724,7 @@ def join_with_single_row(
27072724 mappings = (* left_mappings , * right_mappings ),
27082725 type = "cross" ,
27092726 )
2710- combined_expr = left_expr .join (
2727+ combined_expr = left_expr .relational_join (
27112728 right_expr ,
27122729 join_def = join_def ,
27132730 )
@@ -2734,7 +2751,6 @@ def join_mono_indexed(
27342751 * ,
27352752 how = "left" ,
27362753 sort = False ,
2737- block_identity_join : bool = False ,
27382754) -> Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],]:
27392755 left_expr = left .expr
27402756 right_expr = right .expr
@@ -2762,14 +2778,14 @@ def join_mono_indexed(
27622778 mappings = (* left_mappings , * right_mappings ),
27632779 type = how ,
27642780 )
2765- combined_expr = left_expr .join (
2781+
2782+ combined_expr = left_expr .relational_join (
27662783 right_expr ,
27672784 join_def = join_def ,
2768- allow_row_identity_join = (not block_identity_join ),
27692785 )
2786+
27702787 get_column_left = join_def .get_left_mapping ()
27712788 get_column_right = join_def .get_right_mapping ()
2772- # Drop original indices from each side. and used the coalesced combination generated by the join.
27732789 left_index = get_column_left [left .index_columns [0 ]]
27742790 right_index = get_column_right [right .index_columns [0 ]]
27752791 # Drop original indices from each side. and used the coalesced combination generated by the join.
@@ -2803,7 +2819,6 @@ def join_multi_indexed(
28032819 * ,
28042820 how = "left" ,
28052821 sort = False ,
2806- block_identity_join : bool = False ,
28072822) -> Tuple [Block , Tuple [Mapping [str , str ], Mapping [str , str ]],]:
28082823 if not (left .index .is_uniquely_named () and right .index .is_uniquely_named ()):
28092824 raise ValueError ("Joins not supported on indices with non-unique level names" )
@@ -2822,8 +2837,6 @@ def join_multi_indexed(
28222837 left_join_ids = [left .index .resolve_level_exact (name ) for name in common_names ]
28232838 right_join_ids = [right .index .resolve_level_exact (name ) for name in common_names ]
28242839
2825- names_fully_match = len (left_only_names ) == 0 and len (right_only_names ) == 0
2826-
28272840 left_expr = left .expr
28282841 right_expr = right .expr
28292842
@@ -2853,13 +2866,11 @@ def join_multi_indexed(
28532866 type = how ,
28542867 )
28552868
2856- combined_expr = left_expr .join (
2869+ combined_expr = left_expr .relational_join (
28572870 right_expr ,
28582871 join_def = join_def ,
2859- # If we're only joining on a subset of the index columns, we need to
2860- # perform a true join.
2861- allow_row_identity_join = (names_fully_match and not block_identity_join ),
28622872 )
2873+
28632874 get_column_left = join_def .get_left_mapping ()
28642875 get_column_right = join_def .get_right_mapping ()
28652876 left_ids_post_join = [get_column_left [id ] for id in left_join_ids ]
0 commit comments