I have several distributors that supply me with CSV files of their inventory levels. Some of these distributors have as many as 3 files with info. These files are pretty large, with 170,000+ rows of data.
What I'm trying to do is write a program that allows me to reorganize this data into a new CSV file so every distributor would have one file organized the way I wish.
Here's a little run down on what I'm doing with the code without going too technical:
step 1 Open file1 for row1 in file1 Grab partnumber from row[1] step 2 open file2 for row2 in file2 if partnumber == row2[2] grab data from row[4] break Step 2 is then repeated for every piece of data I want to pull. The problem I'm seeing is the program runs very quickly until it gets further into the data, since it still reads every line of data even if I've already collected from the line of text. I've even thought well what if I start deleting the line of data when I'm finished with it but I thought maybe there was an alternative I don't know about. Any help would be great.
def PartSearch(): Partexists = "N" global SelectedObject with open(eval("file"+str(SelectedFile))) as f: r2 = csv.reader(f, delimiter = eval("file"+str(SelectedFile)+"Del")) for row2 in r2: if int(SelectedFile) == 1: if str(row2[int(file1PartNumber)]) == str(PartNumberobject): Partexists= "Y" SelectedObject = row2[int(SelectedCol)] break if int(SelectedFile) == 2: if row2[int(file2PartNumber)] == PartNumberobject: Partexists= "Y" SelectedObject = row2[int(SelectedCol)] break if int(SelectedFile) == 3: if row2[int(file3PartNumber)] == PartNumberobject: Partexists= "Y" SelectedObject = row2[int(SelectedCol)] break if int(SelectedFile) == 4: if row2[int(file4PartNumber)] == PartNumberobject: Partexists= "Y" SelectedObject = row2[int(SelectedCol)] break if Partexists != "Y": SelectedObject = "X" with open("C:\\Python34\\Python34\\Distributors\\ListOfDistributors.txt") as f: r3 = csv.reader(f, delimiter = "\t") for row3 in r3: distributor = row3[0] with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f: r4 = csv.reader(f, delimiter = "\t") totalRows = sum(1 for _ in f) i = totalRows if totalRows == 1: with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f: r4 = csv.reader(f, delimiter = "\t") for row4 in r4: file1 = row4[1] file1Del = row4[2] file1titles = row4[3] file1titles = row4[3] file1PartNumber = row4[4] if totalRows == 2: with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f: r4 = csv.reader(f, delimiter = "\t") for row4 in r4: if i == 2: file1 = row4[1] file1Del = row4[2] file1titles = row4[3] file1titles = row4[3] file1PartNumber = row4[4] if i == 1: file2 = row4[1] file2Del = row4[2] file2titles = row4[3] file2titles = row4[3] file2PartNumber = row4[4] i = i-1 if totalRows == 3: with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f: r4 = csv.reader(f, delimiter = "\t") for row4 in r4: if i == 3: file1 = row4[1] file1Del = row4[2] file1titles = row4[3] file1titles = row4[3] file1PartNumber = row4[4] if i == 2: file2 = row4[1] file2Del = row4[2] file2titles = row4[3] file2titles = row4[3] file2PartNumber = row4[4] if i == 1: file3 = row4[1] file3Del = row4[2] file3titles = row4[3] file3titles = row4[3] file3PartNumber = row4[4] i = i-1 if totalRows == 4: with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f: r4 = csv.reader(f, delimiter = "\t") for row4 in r4: if i == 4: file1 = row4[1] file1Del = row4[2] file1titles = row4[3] file1titles = row4[3] file1PartNumber = row4[4] if i == 3: file2 = row4[1] file2Del = row4[2] file2titles = row4[3] file2titles = row4[3] file2PartNumber = row4[4] if i == 2: file3 = row4[1] file3Del = row4[2] file3titles = row4[3] file3titles = row4[3] file3PartNumber = row4[4] if i == 1: file4 = row4[1] file4Del = row4[2] file4titles = row4[3] file4titles = row4[3] file4PartNumber = row4[4] i = i-1 with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"structure.txt") as f: r5 = csv.reader(f, delimiter = "\t") i=1 for row5 in r5: if i == 1: DistributorName = row5[0] PartNumberFile = row5[2] PartNumberCol = row5[3] AltPartNumberFile = row5[5] AltPartNumberCol = row5[6] VendorPartNumberFile = row5[8] VendorPartNumberCol = row5[9] AltVendorPartNumberFile = row5[11] AltVendorPartNumberCol = row5[12] DescriptionFile = row5[14] DescriptionCol = row5[15] BrandFile = row5[17] BrandCol = row5[18] CostFile = row5[20] CostCol = row5[21] RetailFile = row5[23] RetailCol = row5[24] StatusFile = row5[26] StatusCol = row5[27] WeightFile = row5[29] WeightCol = row5[30] if i == 2: if row5[2] == 0: NumofOnedaywarehouse = row5[2] if row5[2] == 1: NumofOnedaywarehouse = row5[2] Oneday1WarehouseFile = row5[4] Oneday1WarehouseCol = row5[5] if row5[2] == 2: NumofOnedaywarehouse = row5[2] Oneday1WarehouseFile = row5[4] Oneday1WarehouseCol = row5[5] Oneday2WarehouseFile = row5[7] Oneday2WarehouseCol = row5[8] if row5[2] == 3: NumofOnedaywarehouse = row5[2] Oneday1WarehouseFile = row5[4] Oneday1WarehouseCol = row5[5] Oneday2WarehouseFile = row5[7] Oneday2WarehouseCol = row5[8] Oneday3WarehouseFile = row5[10] Oneday3WarehouseCol = row5[11] if i == 3: if row5[2] == 0: NumofTwodaywarehouse = row5[2] if row5[2] == 1: NumofTwodaywarehouse = row5[2] Twoday1WarehouseFile = row5[4] Twoday1WarehouseCol = row5[5] if row5[2] == 2: NumofTwodaywarehouse = row5[2] Twoday1WarehouseFile = row5[4] Twoday1WarehouseCol = row5[5] Twoday2WarehouseFile = row5[7] Twoday2WarehouseCol = row5[8] if row5[2] == 3: NumofTwodaywarehouse = row5[2] Twoday1WarehouseFile = row5[4] Twoday1WarehouseCol = row5[5] Twoday2WarehouseFile = row5[7] Twoday2WarehouseCol = row5[8] Twoday3WarehouseFile = row5[10] Twoday3WarehouseCol = row5[11] if i == 4: if row5[2] == 0: NumofThreedaywarehouse = row5[2] if row5[2] == 1: NumofThreedaywarehouse = row5[2] Threeday1WarehouseFile = row5[4] Threeday1WarehouseCol = row5[5] if row5[2] == 2: NumofThreedaywarehouse = row5[2] Threeday1WarehouseFile = row5[4] Threeday1WarehouseCol = row5[5] Threeday2WarehouseFile = row5[7] Threeday2WarehouseCol = row5[8] if row5[2] == 3: NumofThreedaywarehouse = row5[2] Threeday1WarehouseFile = row5[4] Threeday1WarehouseCol = row5[5] Threeday2WarehouseFile = row5[7] Threeday2WarehouseCol = row5[8] Threeday3WarehouseFile = row5[10] Threeday3WarehouseCol = row5[11] if i == 5: if row5[2] == 0: NumofFourdaywarehouse = row5[2] if row5[2] == 1: NumofFourdaywarehouse = row5[2] Fourday1WarehouseFile = row5[4] Threeday1WarehouseCol = row5[5] if row5[2] == 2: NumofFourdaywarehouse = row5[2] Fourday1WarehouseFile = row5[4] Fourday1WarehouseCol = row5[5] Fourday2WarehouseFile = row5[7] Fourday2WarehouseCol = row5[8] if row5[2] == 3: NumofFourdaywarehouse = row5[2] Fourday1WarehouseFile = row5[4] Fourday1WarehouseCol = row5[5] Fourday2WarehouseFile = row5[7] Fourday2WarehouseCol = row5[8] Fourday3WarehouseFile = row5[10] Fourday3WarehouseCol = row5[11] if i == 6: if row5[2] == 0: NumofFivedaywarehouse = row5[2] if row5[2] == 1: NumofFivedaywarehouse = row5[2] Fiveday1WarehouseFile = row5[4] Fiveday1WarehouseCol = row5[5] if row5[2] == 2: NumofFivedaywarehouse = row5[2] Fiveday1WarehouseFile = row5[4] Fiveday1WarehouseCol = row5[5] Fiveday2WarehouseFile = row5[7] Fiveday2WarehouseCol = row5[8] if row5[2] == 3: NumofFivedaywarehouse = row5[2] Fiveday1WarehouseFile = row5[4] Fiveday1WarehouseCol = row5[5] Fiveday2WarehouseFile = row5[7] Fiveday2WarehouseCol = row5[8] Fiveday3WarehouseFile = row5[10] Fiveday3WarehouseCol = row5[11] i = i+1 """print(file1Del) PartNumberFile = 1 PartNumberCol = 1 CostFile = 2 CostCol = 2 SelectedFile = PartNumberFile SelectedCol = PartNumberCol number = 1""" #Program to grab Part Number with open(file1) as f: r = csv.reader(f, delimiter = file1Del) if file1titles == "Y": file=r.__next__() for row in r: PartNumberobject = row[int(file1PartNumber)] "start of data collection, save variables as SelectedFile and SelectedCol. Run PartSearch() then save Variable SelectedObject" SelectedFile = PartNumberFile SelectedCol = PartNumberCol PartSearch() FPartNumber = SelectedObject SelectedFile = AltPartNumberFile SelectedCol = AltPartNumberCol PartSearch() FAltPartNumber = SelectedObject SelectedFile = VendorPartNumberFile SelectedCol = VendorPartNumberCol PartSearch() FVendorPartNumber = SelectedObject SelectedFile = AltVendorPartNumberFile SelectedCol = AltVendorPartNumberCol PartSearch() FAltVendorPartNumber = SelectedObject SelectedFile = DescriptionFile SelectedCol = DescriptionCol PartSearch() FDescription = SelectedObject SelectedFile = BrandFile SelectedCol = BrandCol PartSearch() FBrand = SelectedObject SelectedFile = CostFile SelectedCol = CostCol PartSearch() FCost = SelectedObject SelectedFile = RetailFile SelectedCol = RetailCol PartSearch() FRetail = SelectedObject SelectedFile = StatusFile SelectedCol = StatusCol PartSearch() FStatus = SelectedObject SelectedFile = WeightFile SelectedCol = WeightCol PartSearch() FWeight = SelectedObject print(DistributorName, PartNumberobject, FAltPartNumber, FVendorPartNumber, FAltVendorPartNumber, FDescription, FBrand, FCost, FRetail, FStatus, FWeight)
file2, i.e. convertfile2after loading into a dictionary/set so you can testif partnumber in file2inO(1)time. If you need to copyfile2then do it, 400Mb of additional memory is still nothing nowadays. That should be more then enough.with open('file1', 'r') as f1, open('file2', 'r') as f2, open('file3', 'r') as f3, open('newfile', 'w') as newfile:and nest the rest of your code inside.