-
- Notifications
You must be signed in to change notification settings - Fork 19.3k
Description
Pandas version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
I have confirmed this bug exists on the main branch of pandas.
Reproducible Example
from datetime import datetime data_A = list() data_B = list() for i in range(10): data_A.append({ "id": i, "created_date": datetime.today(), "created_at": datetime.now(), }) data_B.append({ "id": i if i % 2 == 0 else 3*i, "created_date": datetime.today(), "created_at": datetime.now(), }) df_A = pd.DataFrame.from_dict(data_A) df_B = pd.DataFrame.from_dict(data_B) df_A.merge(df_B, how="full", on="id")--------------------------------------------------------------------------- UnboundLocalError Traceback (most recent call last) Cell In[59], line 23 19 df_A = pd.DataFrame.from_dict(data_A) 21 df_B = pd.DataFrame.from_dict(data_B) ---> 23 df_A.merge(df_B, how="full", on="id") File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/frame.py:10832, in DataFrame.merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate) 10813 @Substitution("") 10814 @Appender(_merge_doc, indents=2) 10815 def merge( (...) 10828 validate: MergeValidate | None = None, 10829 ) -> DataFrame: 10830 from pandas.core.reshape.merge import merge > 10832 return merge( 10833 self, 10834 right, 10835 how=how, 10836 on=on, 10837 left_on=left_on, 10838 right_on=right_on, 10839 left_index=left_index, 10840 right_index=right_index, 10841 sort=sort, 10842 suffixes=suffixes, 10843 copy=copy, 10844 indicator=indicator, 10845 validate=validate, 10846 ) File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:184, in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate) 169 else: 170 op = _MergeOperation( 171 left_df, 172 right_df, (...) 182 validate=validate, 183 ) --> 184 return op.get_result(copy=copy) File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:886, in _MergeOperation.get_result(self, copy) 883 if self.indicator: 884 self.left, self.right = self._indicator_pre_merge(self.left, self.right) --> 886 join_index, left_indexer, right_indexer = self._get_join_info() 888 result = self._reindex_and_concat( 889 join_index, left_indexer, right_indexer, copy=copy 890 ) 891 result = result.__finalize__(self, method=self._merge_type) File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1151, in _MergeOperation._get_join_info(self) 1147 join_index, right_indexer, left_indexer = _left_join_on_index( 1148 right_ax, left_ax, self.right_join_keys, sort=self.sort 1149 ) 1150 else: -> 1151 (left_indexer, right_indexer) = self._get_join_indexers() 1153 if self.right_index: 1154 if len(self.left) > 0: File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1125, in _MergeOperation._get_join_indexers(self) 1123 # make mypy happy 1124 assert self.how != "asof" -> 1125 return get_join_indexers( 1126 self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how 1127 ) File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1759, in get_join_indexers(left_keys, right_keys, sort, how) 1757 _, lidx, ridx = left.join(right, how=how, return_indexers=True, sort=sort) 1758 else: -> 1759 lidx, ridx = get_join_indexers_non_unique( 1760 left._values, right._values, sort, how 1761 ) 1763 if lidx is not None and is_range_indexer(lidx, len(left)): 1764 lidx = None File ~/Documents/adhoc/.local_lab/lib/python3.12/site-packages/pandas/core/reshape/merge.py:1802, in get_join_indexers_non_unique(left, right, sort, how) 1800 elif how == "outer": 1801 lidx, ridx = libjoin.full_outer_join(lkey, rkey, count) -> 1802 return lidx, ridx UnboundLocalError: cannot access local variable 'lidx' where it is not associated with a value Issue Description
I wrote a full outer join between 2 dataframes and it resulted on UnboundLocalError.
Expected Behavior
Have a joined dataframe of the two dataframes, where half of the rows would match and the other half wouldn't.
Installed Versions
INSTALLED VERSIONS
commit : d9cdd2e
python : 3.12.4.final.0
python-bits : 64
OS : Linux
OS-release : 6.5.0-45-generic
Version : #45~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Mon Jul 15 16:40:02 UTC 2
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : en_US.UTF-8
LOCALE : en_US.UTF-8
pandas : 2.2.2
numpy : 2.0.1
pytz : 2024.1
dateutil : 2.9.0.post0
setuptools : 71.1.0
pip : 24.2
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : 5.2.2
html5lib : None
pymysql : None
psycopg2 : 2.9.9
jinja2 : 3.1.4
IPython : 8.26.0
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : 4.12.3
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : None
gcsfs : None
matplotlib : 3.9.1
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pyreadstat : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : None
sqlalchemy : 2.0.31
tables : None
tabulate : None
xarray : None
xlrd : None
zstandard : None
tzdata : 2024.1
qtpy : None
pyqt5 : None