I have a python module that was already written for me to download and parse data from googles patent listing. The code works great until I do anything before 2005. I have no knowledge of python except how to run the module. How do I fix it?
The traceback I receive is:
Traceback (most recent call last): File "C:\Users\John\Desktop\FINAL BART ALL INFO-Magic Bullet.py", line 46, in <module> assert xml_file is not None AssertionError And this is the code I'm using:
#Ignore all this information import urllib2, os, zipfile from lxml import etree #------------------------------------------------------------------------------- #Ignore all this information def xmlSplitter(data,separator=lambda x: x.startswith('<?xml')): buff = [] for line in data: if separator(line): if buff: yield ''.join(buff) buff[:] = [] buff.append(line) yield ''.join(buff) def first(seq,default=None): """Return the first item from sequence, seq or the default(None) value""" for item in seq: return item return default #------------------------------------------------------------------------------- #This is where you change the internet source file- Use the file extensions from the sheet provided. datasrc = "http://storage.googleapis.com/patents/grant_full_text/2003/pg030107.zip" #http://commondatastorage.googleapis.com/patents/grant_full_text/2012/ipg120117.zip filename = datasrc.split('/')[-1] #------------------------------------------------------------------------------- #Ignore all this information if not os.path.exists(filename): with open(filename,'wb') as file_write: r = urllib2.urlopen(datasrc) file_write.write(r.read()) zf = zipfile.ZipFile(filename) xml_file = first([ x for x in zf.namelist() if x.endswith('.xml')]) assert xml_file is not None #------------------------------------------------------------------------------- #output set your folder location here, keep double \\ between outFolder = "C:\\PatentFiles\\" outFilename = os.path.splitext(filename)[0] #------------------------------------------------------------------------------- #These outputs are the names of the files-Ignore all this information output = outFolder + outFilename + "_general.txt" output2 = outFolder + outFilename + "_USCL.txt" output3 = outFolder + outFilename + "_citation.txt" output4 = outFolder + outFilename + "_inventor.txt" #Open files outFile = open(output, "w") outFile2 = open(output2, "w") outFile3 = open(output3, "w") outFile4 = open(output4, "w") #write the headers outFile.write("Patent No.|GrantDate|Application Date|Number of Claims|Examiners|US Primary Main Classification|Assignee|Assignee Address City_State_Country|First Inventor|First Inventor Address City_State_Country| \n") outFile2.write("Patent No.|Primary|U.S Classification| \n") outFile3.write ("Patent No.|Citation|Citation Date|Who Cited This| \n") outFile4.write ("Patent No.|Inventor Last Name|First Name|City|State|Country|Nationality Country|Residence Country|\n") #------------------------------------------------------------------------------- #Here is the count- adjust this each time you run the program for the first time. #Run at 10 for the 1st run then 5500 afterward. count = 0 for item in xmlSplitter(zf.open(xml_file)): count += 1 #5500 if count > 10: break doc = etree.XML(item) #------------------------------------------------------------------------------- #This is where the python starts parsing the infomation. #This is the Start of the General Infomation file. docID = "~".join(doc.xpath('//publication-reference/document-id/country/text()|//publication-reference/document-id/doc-number/text()')) docID = docID.replace("D0","D") docID = docID.replace("H000","H") docID = docID.replace("PP0","PP") docID = docID.replace("PP0","PP") docID = docID.replace("RE0","RE") docID = docID.replace("~0","~") docID = docID.replace("US~","") grantdate = first(doc.xpath('//publication-reference/document-id/date/text()')) applicationdate = first(doc.xpath('//application-reference/document-id/date/text()')) claimsNum = first(doc.xpath('//number-of-claims/text()')) assignee1 = "-".join(doc.xpath('//assignees/assignee/addressbook/orgname/text()|//assignees/assignee/addressbook/last-name/text()|//assignees/assignee/addressbook/first-name/text()')) assignee1 = assignee1.replace('-',', ') assignee2 = "_".join(doc.xpath('//assignee/addressbook/address/*/text()')) assignees = str(assignee1.encode("UTF-8")) + "|" + str(assignee2.encode("UTF-8")) inventors1 = first(doc.xpath('//applicants/applicant/addressbook/last-name/text()')) inventor2 = first(doc.xpath('//applicants/applicant/addressbook/first-name/text()')) inventor3 = first(doc.xpath('//applicants/applicant/addressbook/address/city/text()')) inventor4 = first(doc.xpath('//applicants/applicant/addressbook/address/state/text()')) inventor5 = first(doc.xpath('//applicants/applicant/addressbook/address/country/text()')) inventor = str(inventor2.encode("UTF-8") if inventor2 else inventor2) + " " + str(inventors1.encode("UTF-8") if inventors1 else inventors1) inventors2 = str(inventor3.encode("UTF-8") if inventor3 else inventor3) + "_" + str(inventor4) + "_" + str(inventor5) inventors = str(inventor) + "|" + str(inventors2) examiners = "~".join(doc.xpath('//examiners/primary-examiner/first-name/text()|//examiners/primary-examiner/last-name/text()')) examiners = examiners.replace("~",", ") uscl1 = first(doc.xpath('//classification-national/main-classification/text()')) #END FIRST TEXT FILE #------------------------------------------------------------------------------- #This begings the USCL file notprimary = first(doc.xpath('//publication-reference/document-id/country/text()')) notprimary = notprimary.replace("US","0") primary1 = first(doc.xpath('//publication-reference/document-id/country/text()')) primary1 = primary1.replace("US","1") uscl2 = "~".join(doc.xpath('//us-bibliographic-data-grant/classification-national/*/text()|//sequence-cwu/publication-reference/document-id/country/text()')) #-------------------------NOTE-------------------------------------------------- #--------------------------NOTE------------------------------------------------- #-----------------------NOTE---------------------------------------------------- #NOTE- RUN through count 10 then remove pound signs from two below uscl2 = uscl2.replace("US~", str(primary1) + "|") uscl2 = uscl2.replace("~", "|" + "\n" + str(docID) + "|" + str(notprimary) + "|") uscl2 = uscl2.replace("US", "|") #END SECOND TEXT FILE #------------------------------------------------------------------------------- #Begin the Citation file citation = '~'.join(doc.xpath('//publication-reference/document-id/country/text()|//references-cited/citation/patcit/document-id/country/text()|//references-cited/citation/patcit/document-id/doc-number/text()|//references-cited/citation/patcit/document-id/kind/text()|//references-cited/citation/patcit/document-id/date/text()|//references-cited/citation/category/text()')) #Here is the start of the patent connectors- in the patents they exist at the end. They are replaced in this code to make pipes | for the final output citation = citation.replace("~A~", "$@") citation = citation.replace("~S~", "$@") citation = citation.replace("~S1~", "$@") citation = citation.replace("~B1~", "$@") citation = citation.replace("~B2~", "$@") citation = citation.replace("~A1~", "$@") citation = citation.replace("~H~", "$@") citation = citation.replace("~E~", "$@") #citation = citation.replace("~QQ~", "$@") #make unique citation changes here-for example when "US" or "DE" in imbeded in citation see below citation = citation.replace("05225US~", "05225U$|" ) citation = citation.replace("063106 DE", "063106D!" ) citation = citation.replace("US~US~", "US~" ) citation = citation.replace("PCT/US", "PCT/U$") citation = citation.replace("PCTUS", "PCTU$") citation = citation.replace("WO US", "WO U$") citation = citation.replace("WO~US", "WO~ U$") #fixes for cites without pipes-see below -DONT TOUCH THESE citation = citation.replace("US~cited by examiner", "||cited by examiner" ) citation = citation.replace("US~cited by other", "||cited by other" ) #Here are the changes to return each citation into a unique row #If a country is only listed in the columns in Excel they need a fix like this, If KR is alone then use the code:::: citation = citation.replace("KR~", "Foreign -KR-" ) citation = citation.replace("$@", "|") citation = citation.replace("~US~", "|" + "\n" + str(docID) +"|") citation = citation.replace("US~", "") citation = citation.replace("~JP~", "|" + "\n" + str(docID) +"|"+ "Foreign -JP-") citation = citation.replace("JP~", "Foreign -JP-" ) citation = citation.replace("~GB~", "|" + "\n" + str(docID) +"|"+ "Foreign -GB-") citation = citation.replace("GB~", "Foreign -GB-" ) citation = citation.replace("~WO~", "|" + "\n" + str(docID) +"|"+ "Foreign -WO-") citation = citation.replace("WO~", "Foreign -WO-" ) citation = citation.replace("~CA~", "|" + "\n" + str(docID) +"|"+ "Foreign -CA-") citation = citation.replace("~DE~EP~", "~DE~ EP-" ) citation = citation.replace("~DE~", "|" + "\n" + str(docID) +"|"+ "Foreign -DE-") citation = citation.replace("DE~", "Foreign -DE-" ) citation = citation.replace("~KR~", "|" + "\n" + str(docID) +"|"+ "Foreign -KR-") citation = citation.replace("KR~", "Foreign -KR-" ) citation = citation.replace("~EM~", "|" + "\n" + str(docID) +"|"+ "Foreign -EM-") citation = citation.replace("~CH~", "|" + "\n" + str(docID) +"|"+ "Foreign -CH-") citation = citation.replace("~DE~", "|" + "\n" + str(docID) +"|"+ "Foreign -DE-") citation = citation.replace("~SE~", "|" + "\n" + str(docID) +"|"+ "Foreign -SE-") citation = citation.replace("~FR~", "|" + "\n" + str(docID) +"|"+ "Foreign -FR-") citation = citation.replace("~FR~EP~", "~FR~ EP-" ) citation = citation.replace("FR~", "Foreign -FR-" ) citation = citation.replace("~CN~", "|" + "\n" + str(docID) +"|"+ "Foreign -CN-") citation = citation.replace("~TW~", "|" + "\n" + str(docID) +"|"+ "Foreign -TW-") citation = citation.replace("~TW", "|" + "\n" + str(docID) +"|"+ "Foreign -TW-") citation = citation.replace("TW~", "Foreign -TW-" ) citation = citation.replace("~NL~", "|" + "\n" + str(docID) +"|"+ "Foreign -NL-") citation = citation.replace("~BR~", "|" + "\n" + str(docID) +"|"+ "Foreign -BR-") citation = citation.replace("~AU~", "|" + "\n" + str(docID) +"|"+ "Foreign -AU-") citation = citation.replace("~ES~", "|" + "\n" + str(docID) +"|"+ "Foreign -ES-") citation = citation.replace("~IT~", "|" + "\n" + str(docID) +"|"+ "Foreign -IT-") citation = citation.replace("~SU~", "|" + "\n" + str(docID) +"|"+ "Foreign -SU-") citation = citation.replace("~AT~", "|" + "\n" + str(docID) +"|"+ "Foreign -AT-") citation = citation.replace("~BE~", "|" + "\n" + str(docID) +"|"+ "Foreign -BE-") citation = citation.replace("~DK~", "|" + "\n" + str(docID) +"|"+ "Foreign -DK-") citation = citation.replace("~RU~", "|" + "\n" + str(docID) +"|"+ "Foreign -RU-") citation = citation.replace("RU~", "Foreign -RU-" ) #citation = citation.replace("~QQ~", "|" + "\n" + str(docID) +"|"+ "Foreign -QQ-") #These are just end of citation fixes-DONT TOUCH THESE citation = citation.replace("cited by other~cited by other~cited by other~cited by other~cited by other~cited by other~cited by other~cited by other~cited by other~cited by other", "cited by other" ) citation = citation.replace("cited by examiner~cited by other~cited by other", "cited by examiner" ) citation = citation.replace("cited by other~cited by examiner~cited by examiner", "cited by other" ) citation = citation.replace("cited by other~cited by other~cited by other~cited by other", "cited by other" ) citation = citation.replace("cited by examiner~cited by examiner~cited by examiner~cited by examiner", "cited by examiner" ) citation = citation.replace("cited by other~cited by other", "cited by other" ) citation = citation.replace("cited by examiner~cited by examiner", "cited by examiner" ) citation = citation.replace("cited by other~cited by examiner", "cited by other" ) citation = citation.replace("cited by examiner~cited by other", "cited by examiner" ) citation = citation.replace("cited by examiner~cited by other~cited by other", "cited by examiner" ) citation = citation.replace("cited by other~cited by examiner~cited by examiner", "cited by other" ) citation = citation.replace("cited by other~cited by other~cited by other~cited by other", "cited by other" ) citation = citation.replace("cited by examiner~cited by examiner~cited by examiner~cited by examiner", "cited by examiner" ) citation = citation.replace("cited by other~cited by other", "cited by other" ) citation = citation.replace("cited by examiner~cited by examiner", "cited by examiner" ) citation = citation.replace("cited by other~cited by examiner", "cited by other" ) citation = citation.replace("cited by examiner~cited by other", "cited by examiner" ) citation = citation.replace("cited by examiner~cited by other~cited by other", "cited by examiner" ) citation = citation.replace("cited by other~cited by examiner~cited by examiner", "cited by other" ) citation = citation.replace("cited by other~cited by other~cited by other~cited by other", "cited by other" ) citation = citation.replace("cited by examiner~cited by examiner~cited by examiner~cited by examiner", "cited by examiner" ) citation = citation.replace("cited by other~cited by other", "cited by other" ) citation = citation.replace("cited by examiner~cited by examiner", "cited by examiner" ) citation = citation.replace("cited by other~cited by examiner", "cited by other" ) citation = citation.replace("cited by examiner~cited by other", "cited by examiner" ) citation = citation.replace("cited by other~cited by other", "cited by other" ) citation = citation.replace("cited by examiner~cited by examiner", "cited by examiner" ) citation = citation.replace("cited by other~cited by examiner", "cited by other" ) citation = citation.replace("cited by examiner~cited by other", "cited by examiner" ) citation = citation.replace("~", "|" ) citation = citation.replace("US", "||") #make unique post-processing citation changes here-If needed for the end of the scripts citation = citation.replace("CA|", "Foreign -CA-" ) citation = citation.replace("EP|", "Foreign -EP-" ) citation = citation.replace("CN|", "Foreign -CN-" ) citation = citation.replace("$", "S") citation = citation.replace("D!", "DE") #citation = citation.replace(" ", " " ) #END CITATION FILE------------------------------------------------------------------------------- #START the inventors file inventor1 = doc.xpath('//applicants/applicant/addressbook/last-name/text()|//applicants/applicant/addressbook/first-name/text()|//applicants/applicant/addressbook/address/city/text()|//applicants/applicant/addressbook/address/state/text()|//applicants/applicant/addressbook/address/country/text()|//applicants/applicant/nationality/*/text()|//applicants/applicant/residence/*/text()|//sequence-cwu/publication-reference/document-id/country/text()|//sequence-cwu/number/text()') inventor1 = '~'.join(inventor1).replace('\n-','') #For files after 2009 use this to replace State errors in the Excel- If the output is short then use this to add in a None value for State inventor1 = inventor1.replace('~KR~omitted','~None~KR~omitted') inventor1 = inventor1.replace('~GB~omitted','~None~GB~omitted') inventor1 = inventor1.replace('~IT~omitted','~None~IT~omitted') inventor1 = inventor1.replace('~JP~omitted','~None~JP~omitted') inventor1 = inventor1.replace('~FR~omitted','~None~FR~omitted') inventor1 = inventor1.replace('~BR~omitted','~None~BR~omitted') inventor1 = inventor1.replace('~NO~omitted','~None~NO~omitted') inventor1 = inventor1.replace('~HK~omitted','~None~HK~omitted') inventor1 = inventor1.replace('~CA~omitted','~None~CA~omitted') inventor1 = inventor1.replace('~TW~omitted','~None~TW~omitted') inventor1 = inventor1.replace('~SE~omitted','~None~SE~omitted') inventor1 = inventor1.replace('~CH~omitted','~None~CH~omitted') inventor1 = inventor1.replace('~DE~omitted','~None~DE~omitted') inventor1 = inventor1.replace('~SG~omitted','~None~SG~omitted') inventor1 = inventor1.replace('~IN~omitted','~None~IN~omitted') inventor1 = inventor1.replace('~IL~omitted','~None~IL~omitted') inventor1 = inventor1.replace('~CN~omitted','~None~CN~omitted') inventor1 = inventor1.replace('~FI~omitted','~None~FI~omitted') inventor1 = inventor1.replace('~ZA~omitted','~None~ZA~omitted') inventor1 = inventor1.replace('~NL~omitted','~None~NL~omitted') inventor1 = inventor1.replace('~AT~omitted','~None~AT~omitted') inventor1 = inventor1.replace('~AU~omitted','~None~AU~omitted') inventor1 = inventor1.replace('~BE~omitted','~None~BE~omitted') inventor1 = inventor1.replace('~CZ~omitted','~None~CZ~omitted') inventor1 = inventor1.replace('~RU~omitted','~None~RU~omitted') inventor1 = inventor1.replace('~IE~omitted','~None~IE~omitted') inventor1 = inventor1.replace('~AR~omitted','~None~AR~omitted') inventor1 = inventor1.replace('~MY~omitted','~None~MY~omitted') inventor1 = inventor1.replace('~SK~omitted','~None~SK~omitted') inventor1 = inventor1.replace('~ES~omitted','~None~ES~omitted') inventor1 = inventor1.replace('~NZ~omitted','~None~NZ~omitted') inventor1 = inventor1.replace('~HU~omitted','~None~HU~omitted') inventor1 = inventor1.replace('~UA~omitted','~None~UA~omitted') inventor1 = inventor1.replace('~DK~omitted','~None~DK~omitted') inventor1 = inventor1.replace('~TH~omitted','~None~TH~omitted') inventor1 = inventor1.replace('~MX~omitted','~None~MX~omitted') #inventor1 = inventor1.replace('~QQ~omitted','~None~QQ~omitted') #For the 2005-2008 files use these lines inventor1 = inventor1.replace('~NO~NO~NO','~None~NO~NO~NO') inventor1 = inventor1.replace('~NZ~NZ~NZ','~None~NZ~NZ~NZ') inventor1 = inventor1.replace('~RU~RU~RU','~None~RU~RU~RU') inventor1 = inventor1.replace('~RO~RO~RO','~None~RO~RO~RO') inventor1 = inventor1.replace('~SE~SE~SE','~None~SE~SE~SE') inventor1 = inventor1.replace('~SG~SG~SG','~None~SG~SG~SG') inventor1 = inventor1.replace('~SI~SI~SI','~None~SI~SI~SI') inventor1 = inventor1.replace('~TH~TH~TH','~None~TH~TH~TH') inventor1 = inventor1.replace('~TR~TR~TR','~None~TR~TR~TR') inventor1 = inventor1.replace('~TW~TW~TW','~None~TW~TW~TW') inventor1 = inventor1.replace('~VE~VE~VE','~None~VE~VE~VE') inventor1 = inventor1.replace('~ZA~ZA~ZA','~None~ZA~ZA~ZA') inventor1 = inventor1.replace('~AN~AN~AN','~None~AN~AN~AN') inventor1 = inventor1.replace('~AR~AR~AR','~None~AR~AR~AR') inventor1 = inventor1.replace('~BA~BA~BA','~None~BA~BA~BA') inventor1 = inventor1.replace('~PH~PH~PH','~None~PH~PH~PH') inventor1 = inventor1.replace('~HR~HR~HR','~None~HR~HR~HR') inventor1 = inventor1.replace('~LT~LT~LT','~None~LT~LT~LT') inventor1 = inventor1.replace('~EE~EE~EE','~None~EE~EE~EE') inventor1 = inventor1.replace('~BJ~BJ~BJ','~None~BJ~BJ~BJ') inventor1 = inventor1.replace('~CR~CR~CR','~None~CR~CR~CR') inventor1 = inventor1.replace('~PL~PL~PL','~None~PL~PL~PL') inventor1 = inventor1.replace('~CO~CO~CO','~None~CO~CO~CO') inventor1 = inventor1.replace('~UA~UA~UA','~None~UA~UA~UA') inventor1 = inventor1.replace('~KW~KW~KW','~None~KW~KW~KW') inventor1 = inventor1.replace('~CL~CL~CL','~None~CL~CL~CL') inventor1 = inventor1.replace('~CY~CY~CY','~None~CY~CY~CY') inventor1 = inventor1.replace('~LI~LI~LI','~None~LI~LI~LI') inventor1 = inventor1.replace('~SA~SA~SA','~None~SA~SA~SA') #inventor1 = inventor1.replace('~QQ~QQ~QQ','~None~QQ~QQ~QQ') #For lines that don't return use these lines in the code for 2009- inventor1 = inventor1.replace('omitted~US~','omitted~US' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~FR~','omitted~FR' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~DK~','omitted~DK' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~KR~','omitted~KR' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~JP~','omitted~JP' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~GB~','omitted~GB' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~IT~','omitted~IT' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~CH~','omitted~CH' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~SG~','omitted~SG' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~DE~','omitted~DE' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~IN~','omitted~IN' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~TW~','omitted~TW' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('omitted~CN~','omitted~CN' +"|"+ '\n' + str(docID) +"|") #inventor1 = inventor1.replace('omitted~QQ~','omitted~QQ' +"|"+ '\n' + str(docID) +"|") #for lines 2005-2008 use this line for returning countries inventor1 = inventor1.replace('AT~AT~AT~','AT~AT~AT' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('AN~AN~AN~','AN~AN~AN' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('AR~AR~AR~','AR~AR~AR' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('AU~AU~AU~','AU~AU~AU' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('AZ~AZ~AZ~','AZ~AZ~AZ' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('BA~BA~BA~','BA~BA~BA' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('BE~BE~BE~','BE~BE~BE' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('BR~BR~BR~','BR~BR~BR' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('BS~BS~BS~','BS~BS~BS' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('CA~CA~CA~','CA~CA~CA' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('CH~CH~CH~','CH~CH~CH' +"|"+ '\n' + str(docID) +"|") inventor1 = inventor1.replace('CN~CN~CN~','CN~CN~CN' +"|"+ '\n' + str(docID) +"|") #inventor1 = inventor1.replace('QQ~QQ~QQ~','QQ~QQ~QQ' +"|"+ '\n' + str(docID) +"|") #special case fixes- these are for strange names fixes in the code that may not create the correct amount of columns. inventor1 = inventor1.replace('~None~None~NO~','~None~NO~') inventor1 = inventor1.replace('Ramandeep~Chandigarh','Ramandeep|None~Chandigarh') inventor1 = inventor1.replace('Esk~eh~r','Eskehr') inventor1 = inventor1.replace('Baychar~Eastport','Baychar~None~Eastport') inventor1 = inventor1.replace('US~1', '||||||') inventor1 = inventor1.replace('~','|') #End the inventor file #------------------------------------------------------------------------------- #Here are the output print fields- you can change one if you want but remember to comment out all but the one you wish to view. print "DocID: {0}\nGrantDate: {1}\nApplicationDate: {2}\nNumber of Claims: {3}\nExaminers: {4}\nAssignee: {5}\nInventor: {6}\nUS Cl.: {7}\n".format(docID,grantdate,applicationdate,claimsNum,examiners.encode("UTF-8"),assignees,inventors,uscl1) #print "DocID: {0}\nU.S Cl: {1}\nPrimary: {2}\n".format(docID,uscl2,primary1) #print "DocID: {0}\nCitation: {1}\n".format(docID,citation.encode("UTF-8")) #print "DocID: {0}\nTitle: {1}\nInventors: {2}\n".format(docID,appID,inventor1.encode("UTF-8")) #------------------------------------------------------------------------------- IGNORE Everything else below this. #Output first general info bits outFile.write(str(docID) +"|"+ str(grantdate) +"|"+ str(applicationdate) + "|"+ str(claimsNum) + "|"+ str(examiners.encode("UTF-8")) + "|"+ str(uscl1) + "|"+ str(assignees) + "|"+ str(inventors) +"|"+"\n") #Output Classifications only outFile2.write(str(docID) +"|"+ str(uscl2) +"|"+ "\n") #Output Citations only outFile3.write(str(docID) +"|"+ str(citation) +"|"+"\n") #Output inventors only outFile4.write(str(docID) + "|"+ str(inventor1.encode("UTF-8")) + "|" +"\n") outFile.close() outFile2.close() outFile3.close() outFile4.close() print "output files complete"