Skip to main content
edited body
Source Link
piRSquared
  • 295.6k
  • 68
  • 509
  • 654

setup

df = pd.read_csv( "https://raw.githubusercontent.com/wesm/pydata-book/master/ch08/tips.csv", sep=',') d1 = pd.concat([df.query('day != "Sun"'), df], keys=['DINER A', 'DINER B']) \ .rename_axis(['restaurant', None]).reset_index('restaurant') 

build a pd.MultiIndex with all restaurants and days

mux = pd.MultiIndex.from_product([ d1.restaurant.unique(), d1.day.unique() ], names=['restaurant', 'day']) 

do groupby and+ join followed by+ reindex

d2 = d1.groupby(['day']).total_bill.sum() d3 = d1.groupby(['restaurant', 'day'])[['total_bill']].sum() d3.reindex(mux).join(d2, lsuffix='_x', rsuffix='_y').reindex(mux).reset_index() restaurant day total_bill_x total_bill_y 0 DINER A Sat 1778.40 3556.80 1 DINER A Thur 1096.33 2192.66 2 DINER A Fri 325.88 651.76 3 DINER A Sun NaN  NaN1627.16 4 DINER B Sat 1778.40 3556.80 5 DINER B Thur 1096.33 2192.66 6 DINER B Fri 325.88 651.76 7 DINER B Sun 1627.16 1627.16 

setup

df = pd.read_csv( "https://raw.githubusercontent.com/wesm/pydata-book/master/ch08/tips.csv", sep=',') d1 = pd.concat([df.query('day != "Sun"'), df], keys=['DINER A', 'DINER B']) \ .rename_axis(['restaurant', None]).reset_index('restaurant') 

build a pd.MultiIndex with all restaurants and days

mux = pd.MultiIndex.from_product([ d1.restaurant.unique(), d1.day.unique() ], names=['restaurant', 'day']) 

do groupby and join followed by reindex

d2 = d1.groupby(['day']).total_bill.sum() d3 = d1.groupby(['restaurant', 'day'])[['total_bill']].sum() d3.join(d2, lsuffix='_x', rsuffix='_y').reindex(mux).reset_index() restaurant day total_bill_x total_bill_y 0 DINER A Sat 1778.40 3556.80 1 DINER A Thur 1096.33 2192.66 2 DINER A Fri 325.88 651.76 3 DINER A Sun NaN  NaN 4 DINER B Sat 1778.40 3556.80 5 DINER B Thur 1096.33 2192.66 6 DINER B Fri 325.88 651.76 7 DINER B Sun 1627.16 1627.16 

setup

df = pd.read_csv( "https://raw.githubusercontent.com/wesm/pydata-book/master/ch08/tips.csv", sep=',') d1 = pd.concat([df.query('day != "Sun"'), df], keys=['DINER A', 'DINER B']) \ .rename_axis(['restaurant', None]).reset_index('restaurant') 

build a pd.MultiIndex with all restaurants and days

mux = pd.MultiIndex.from_product([ d1.restaurant.unique(), d1.day.unique() ], names=['restaurant', 'day']) 

do groupby + join + reindex

d2 = d1.groupby(['day']).total_bill.sum() d3 = d1.groupby(['restaurant', 'day'])[['total_bill']].sum() d3.reindex(mux).join(d2, lsuffix='_x', rsuffix='_y').reset_index() restaurant day total_bill_x total_bill_y 0 DINER A Sat 1778.40 3556.80 1 DINER A Thur 1096.33 2192.66 2 DINER A Fri 325.88 651.76 3 DINER A Sun NaN 1627.16 4 DINER B Sat 1778.40 3556.80 5 DINER B Thur 1096.33 2192.66 6 DINER B Fri 325.88 651.76 7 DINER B Sun 1627.16 1627.16 
Source Link
piRSquared
  • 295.6k
  • 68
  • 509
  • 654

setup

df = pd.read_csv( "https://raw.githubusercontent.com/wesm/pydata-book/master/ch08/tips.csv", sep=',') d1 = pd.concat([df.query('day != "Sun"'), df], keys=['DINER A', 'DINER B']) \ .rename_axis(['restaurant', None]).reset_index('restaurant') 

build a pd.MultiIndex with all restaurants and days

mux = pd.MultiIndex.from_product([ d1.restaurant.unique(), d1.day.unique() ], names=['restaurant', 'day']) 

do groupby and join followed by reindex

d2 = d1.groupby(['day']).total_bill.sum() d3 = d1.groupby(['restaurant', 'day'])[['total_bill']].sum() d3.join(d2, lsuffix='_x', rsuffix='_y').reindex(mux).reset_index() restaurant day total_bill_x total_bill_y 0 DINER A Sat 1778.40 3556.80 1 DINER A Thur 1096.33 2192.66 2 DINER A Fri 325.88 651.76 3 DINER A Sun NaN NaN 4 DINER B Sat 1778.40 3556.80 5 DINER B Thur 1096.33 2192.66 6 DINER B Fri 325.88 651.76 7 DINER B Sun 1627.16 1627.16