__author__ = 'Mustafa' import os import sys import cx_Oracle import csv import glob from datetime import datetime from random import randint, choice, shuffle from timeit import default_timer as timer # Not needed, but is more pythonic def lazy_reader(path): """reads a file one line at a time.""" try: file = open(path, 'r') while True: line = file.readline() if not line: break yield line.rstrip('\n') # "outputs" the line from the generator except IOError: sys.stderr.write("error while opening file at %s\n" % path) sys.exit(2) finally: file.close() def parse(generator): for line in generator: yield line ############################################################### # Populate the list which will be used to create the CSV file # ############################################################### fp = open('F:\Projects\\bulkInsert\wordlist\cities.txt') fw = open('F:\Projects\\bulkInsert\wordlist\words.txt') fNames = open('F:\Projects\\bulkInsert\wordlist\GivenNames.txt') fNouns = open('F:\Projects\\bulkInsert\wordlist\Nouns.txt') fVerbs = open('F:\Projects\\bulkInsert\wordlist\Verbs.txt') fTechWords = open('F:\Projects\\bulkInsert\wordlist\TechnicalManualWords.txt') fGreWords = open('F:\Projects\\bulkInsert\wordlist\GRE_WordList.txt') #fCustom = open('F:\Projects\\bulkInsert\wordlist\Custom-WPA') fCustom = 'F:\Projects\\bulkInsert\wordlist\CustomWPASplit\*' fCustomSplitFiles = glob.glob(fCustom) #fCustomPath = 'F:\Projects\\bulkInsert\wordlist\Custom-WPA' fSuperWord = 'F:\Projects\\bulkInsert\wordlist\SuperWPASplit\*' fSuperWordSplitFiles = glob.glob(fSuperWord) #fSuperWordPath = 'F:\Projects\\bulkInsert\wordlist\Super-WPA' listofcities = fp.readlines() listofwords = fw.readlines() listofnames = fNames.readlines() listofnouns = fNouns.readlines() listofverbs = fVerbs.readlines() listoftech = fTechWords.readlines() listofgre = fGreWords.readlines() listofcustom = [] readCustomStart = timer() print "into listofcustom" for fName in fCustomSplitFiles: print fName for data in parse(lazy_reader(fName)): listofcustom.append(data) readCustomEnd = timer() lenCustom = len(listofcustom) print lenCustom listofsuper = [] readSuperStart = timer() print "into listofsuper" for fName in fSuperWordSplitFiles: print fName for data in parse(lazy_reader(fName)): listofsuper.append(data) readSuperEnd = timer() lenSuper = len(listofsuper) print lenSuper lenStart = timer() print "Done populating lists" # removes \n newline from each element listofcities = map(lambda s: s.strip(), listofcities) lenCities = len(listofcities) listofwords = map(lambda s: s.strip(), listofwords) lenWords = len(listofwords) listofnames = map(lambda s: s.strip(), listofnames) lenNames = len(listofnames) listofnouns = map(lambda s: s.strip(), listofnouns) lenNouns = len(listofnouns) listofverbs = map(lambda s: s.strip(), listofverbs) lenVerbs = len(listofverbs) listoftech = map(lambda s: s.strip(), listoftech) lenTech = len(listoftech) listofgre = map(lambda s: s.strip(), listofgre) lenGRE = len(listofgre) #listofcustom = map(lambda s: s.strip(), listofcustom) #listofsuper = map(lambda s: s.strip(), listofsuper) lenEnd = timer() wordid=['10','20','30','40','50','60','70','80','90','100','110','120','130','140','150','160','170','180','190','200'] subDataList = [] print "Starting to write to the csv file" csvFile = 'F:\Projects\\bulkInsert\data\dataInsert.csv' with open(csvFile, "wb") as f: writer = csv.writer(f) s1 = timer() # Shuffle might take time depending on the size of the list shuffle(listofsuper) s1End = timer() for i in xrange(1, lenSuper): s = timer() print("################################ START #########################################") start1 = timer() #super = choice(listofsuper) super = listofsuper.pop() end1 = timer() - start1 print("Pop from super took %f seconds" % end1) start2 = timer() #listofsuper.remove(super) #end2 = timer() - start2 #print("Removal from super took %f seconds" % end2) start3 = timer() # ID, RANDOM, WORD, CITIES, NAMES, NOUNS, VERBS, subDataList = [i, int(str(randint(100000000,999999999))), choice(listofwords), choice(listofcities), choice(listofnames), choice(listofnouns), choice(listofverbs), \ # TECH_WORDS, GRE_WORDS, CUSTOM_WORD, SUPER_WORD, DATE, WORDID choice(listoftech), choice(listofgre), choice(listofcustom), super, int(choice(wordid))] end3 = timer() - start3 print(subDataList) print("Row {0} took {1} seconds ".format(i, end3)) writer.writerow(subDataList) total = timer() - start1 print("Total time taken %f seconds " % total) print("Total rows inserted {0} out of {1} rows".format(i, lenSuper-i)) #print i #print subDataList #listData.append(subDataList) print("################################# END ##########################################\n") print("Total time taken to create dataset {0}".format(timer()-s)) print("Time taken to shuffle super {0}".format(s1 - s1End)) print("Time taken to read Custom files into list {0}".format(readCustomStart - readCustomEnd)) print("Time taken to read Super files into list {0}".format(readSuperStart - readSuperEnd)) print("Total time taken to calculate length of lists {0}".format(lenStart - lenEnd)) print "End writing to csv file" ############################################################### ''' CREATE TABLE "LEARN"."HUGEDATA" ( "ID" NUMBER(*,0), "RANDOM" NUMBER, "WORD" VARCHAR2(150), "CITIES" VARCHAR2(30), "NAMES" VARCHAR2(50), "NOUNS" VARCHAR2(50), "VERBS" VARCHAR2(50), "TECH_WORDS" VARCHAR2(50), "GRE_WORDS" VARCHAR2(50), "CUSTOM_WORD" VARCHAR2(150), "SUPER_WORD" VARCHAR2(150), "DATE" DATE, "WORDID" NUMBER(*,0), PRIMARY KEY ("ID"), CONSTRAINT "FK_WORDID_HUGEDATA" FOREIGN KEY ("WORDID") REFERENCES "LEARN"."WORD" ("WORDID")); ''' ''' create table word( wordid number, word varchar2(100), primary key (wordid) ) '''