import pandas as pd import numpy as np import matplotlib.pyplot as plt import pickle java = pickle.load(open('JavaSafe.p','rb')) ##import 2d array python = pickle.load(open('PythonSafe.p','rb')) ##import 2d array javaFrame = pd.DataFrame(java,columns=['Town','Java Jobs']) pythonFrame = pd.DataFrame(python,columns=['Town','Python Jobs']) javaFrame = javaFrame.sort_values(by='Java Jobs',ascending=False) pythonFrame = pythonFrame.sort_values(by='Python Jobs',ascending=False) print(javaFrame,"\n",pythonFrame) This code comes out with the following:
Town Java Jobs 435 York,NY 3593 212 NewYork,NY 3585 584 Seattle,WA 2080 624 Chicago,IL 1920 301 Boston,MA 1571 ... 79 Holland,MI 5 38 Manhattan,KS 5 497 Vernon,IL 5 30 Clayton,MO 5 90 Waukegan,IL 5 [653 rows x 2 columns] Town Python Jobs 160 NewYork,NY 2949 11 York,NY 2938 349 Seattle,WA 1321 91 Chicago,IL 1312 167 Boston,MA 1117 383 Hanover,NH 5 209 Bulverde,TX 5 203 Salisbury,NC 5 67 Rockford,IL 5 256 Ventura,CA 5 [416 rows x 2 columns] I want to make a new dataframe that uses the town names as an index and has a column for each java and python. However, some of the towns will only have results for one of the languages.
result = pd.merge(pythonFrame, javeFrame, on='Town', how='outer').set_index('Town')