Data
import pandas as pd df = pd.DataFrame({"Category" : ["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"], "SubCategory": ["A1", "A1", "A1", "A2", "A2", "A2", "B1", "B1", "B1", "B2", "B2", "B2"], "Value": [1, 2, 3, 2, 3, 5, 1, 6, 7, 3, 6, 7]})
Solution
import scipy as sp # this will contain a list of DataFrames storing the correlation matrices correlations = [] for g in df.groupby("Category"): sub_df = g[1][["SubCategory", "Value"]] data = sub_df.pivot_table(columns="SubCategory", values="Value", aggfunc=list) correlation = pd.DataFrame(sp.corrcoef(data.values.tolist()[0]), columns=data.columns.values.tolist(), index=data.columns.values.tolist()) correlations.append(correlation) pd.concat(correlations, sort=False)
Output
A1 A2 B1 B2 ________________________________________________ A1 1.000000 0.996271 NaN NaN A2 0.996271 1.000000 NaN NaN B1 NaN NaN 1.000000 0.996271 B2 NaN NaN 0.996271 1.000000
Update
This solution was tested on python and pandas versions as shown below, older versions may not work:
from platform import python_version print('python version:', python_version()) import pandas as pd print('pandas version:', pd.__version__) python version: 3.7.0 pandas version: 0.23.4