Skip to content

Commit bf6c31e

Browse files
authored
Bag of Visual Words
1 parent 9a89d38 commit bf6c31e

File tree

100 files changed

+19870
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+19870
-0
lines changed

BagOfVisualWord/1_mergecsv.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import glob
2+
import csv
3+
index = 0
4+
with open('trainData.csv', 'a') as singleFile:
5+
for i in range(1, 1888 + 1):
6+
path = '/home/soumen/Desktop/cv_new1/A2_Data_CV/train_sift_features'+ '/' + str(i) + '_train_sift' + '.csv'
7+
print(path)
8+
for csvFile in glob.glob(path):
9+
index = index + 1
10+
for line in open(csvFile, 'r'):
11+
line1 = str(i) +","+line
12+
singleFile.write(line1)
13+
14+
15+
print "Number of CSV file Read is {}".format(index)

BagOfVisualWord/2_kmean.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from copy import deepcopy
2+
import numpy as np
3+
import pandas as pd
4+
from matplotlib import pyplot as plt
5+
from sklearn.cluster import KMeans
6+
import csv
7+
from scipy.spatial import distance
8+
9+
nc = 128
10+
11+
# Importing the dataset
12+
train_data = pd.read_csv('trainData.csv', header = None)
13+
print("Shape of the training data is {}".format(train_data.shape))
14+
train_image_id = train_data.iloc[:,0:1].values
15+
train_image_id = train_image_id.reshape(max(train_image_id.shape),)
16+
17+
18+
test_data = pd.read_csv('testData.csv', header = None)
19+
print("Shape of the testing data is {}".format(test_data.shape))
20+
test_image_id = test_data.iloc[:,0:1].values
21+
test_image_id = test_image_id.reshape(max(test_image_id.shape),)
22+
#np.savetxt("test_image_id.csv", image_id, delimiter=",")
23+
24+
def kmean(train_X, test_X, nc):
25+
# Number of clusters
26+
kmeans = KMeans(n_clusters=nc)
27+
# Fitting the input data
28+
kmeans = kmeans.fit(train_X)
29+
# Getting the cluster labels
30+
train_labels = kmeans.predict(train_X)
31+
test_labels = kmeans.predict(test_X)
32+
33+
#labels = np.add(labels, 1)
34+
#np.savetxt("test_image_label.csv", labels, delimiter=",")
35+
# Centroid values
36+
#centroids = kmeans.cluster_centers_
37+
38+
#myFile = open('train_centroid_data_8.csv', 'w')
39+
#with myFile:
40+
# writer = csv.writer(myFile)
41+
# writer.writerows(centroids)
42+
return train_labels, test_labels
43+
44+
train_image_label, test_image_label = kmean(train_data.iloc[:,5:], test_data.iloc[:,5:], nc)
45+
train_img_clster_map = np.column_stack((train_image_id, train_image_label))
46+
np.savetxt("train_image_claster_map_128.csv", train_img_clster_map, delimiter=",")
47+
48+
test_img_clster_map = np.column_stack((test_image_id, test_image_label))
49+
np.savetxt("test_image_claster_map_128.csv", test_img_clster_map, delimiter=",")
50+
51+
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import pandas as pd
2+
import numpy as np
3+
import csv
4+
5+
number_claster = 64
6+
input_data = pd.read_csv('train_image_claster_map_64.csv', header = None)
7+
input_data = input_data.values
8+
print("Shape of the input data is {}".format(input_data.shape))
9+
10+
label_data = pd.read_csv('train_labels.csv', header = None)
11+
label_data = label_data.values
12+
print("Shape of the label data is {}".format(label_data.shape))
13+
14+
temp_feature = np.zeros(number_claster, dtype = int)
15+
index = 0
16+
temp_feature[int(input_data[0, 1:2])] = temp_feature[int(input_data[0, 1:2])] + 1
17+
for i in range(1, input_data.shape[0]):
18+
if(input_data[(i-1), 0:1] == input_data[i, 0:1]):
19+
temp_feature[int(input_data[i, 1:2])] = temp_feature[int(input_data[i, 1:2])] + 1
20+
else:
21+
temp_feature = temp_feature.tolist()
22+
temp_feature.append(int(label_data[0, index]))
23+
with open("train_feature_64.csv", "a") as fp:
24+
wr = csv.writer(fp, dialect='excel')
25+
wr.writerow(temp_feature)
26+
27+
temp_feature = []
28+
temp_feature = np.zeros(number_claster, dtype = int)
29+
temp_feature[int(input_data[i, 1:2])] = temp_feature[int(input_data[i, 1:2])] + 1
30+
index = index + 1
31+
32+
temp_feature = temp_feature.tolist()
33+
temp_feature.append(int(label_data[0, index]))
34+
with open("train_feature_64.csv", "a") as fp:
35+
wr = csv.writer(fp, dialect='excel')
36+
wr.writerow(temp_feature)
37+
temp_feature = []
38+
39+
40+

BagOfVisualWord/4_knn.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Example of kNN implemented from Scratch in Python
2+
from sklearn.metrics import confusion_matrix
3+
import csv
4+
import random
5+
import math
6+
import operator
7+
import pandas as pd
8+
9+
10+
number_claster = 128
11+
12+
def loadDataset(train_filename, test_filename, trainingSet=[] , testSet=[]):
13+
with open(train_filename, 'rb') as csvfile:
14+
lines = csv.reader(csvfile)
15+
dataset = list(lines)
16+
for x in range(len(dataset)):
17+
for y in range(number_claster):
18+
dataset[x][y] = float(dataset[x][y])
19+
trainingSet.append(dataset[x])
20+
21+
with open(test_filename, 'rb') as csvfile:
22+
lines = csv.reader(csvfile)
23+
dataset = list(lines)
24+
for x in range(len(dataset)):
25+
for y in range(number_claster):
26+
dataset[x][y] = float(dataset[x][y])
27+
testSet.append(dataset[x])
28+
29+
30+
def euclideanDistance(instance1, instance2, length):
31+
distance = 0
32+
for x in range(length):
33+
distance += pow((instance1[x] - instance2[x]), 2)
34+
return math.sqrt(distance)
35+
36+
def getNeighbors(trainingSet, testInstance, k):
37+
distances = []
38+
length = len(testInstance)-1
39+
for x in range(len(trainingSet)):
40+
dist = euclideanDistance(testInstance, trainingSet[x], length)
41+
distances.append((trainingSet[x], dist))
42+
distances.sort(key=operator.itemgetter(1))
43+
neighbors = []
44+
for x in range(k):
45+
neighbors.append(distances[x][0])
46+
return neighbors
47+
48+
def getResponse(neighbors):
49+
classVotes = {}
50+
for x in range(len(neighbors)):
51+
response = neighbors[x][-1]
52+
if response in classVotes:
53+
classVotes[response] += 1
54+
else:
55+
classVotes[response] = 1
56+
sortedVotes = sorted(classVotes.iteritems(), key=operator.itemgetter(1), reverse=True)
57+
return sortedVotes[0][0]
58+
59+
def getAccuracy(testSet, predictions):
60+
correct = 0
61+
temp_list = []
62+
for x in range(len(testSet)):
63+
temp_list.append(testSet[x][-1])
64+
if testSet[x][-1] == predictions[x]:
65+
correct += 1
66+
#print("The Confusion Matrix is:")
67+
#print(confusion_matrix(temp_list, predictions))
68+
print('The length of the test set is: ' + repr(len(testSet)))
69+
return (correct/float(len(testSet))) * 100.0
70+
71+
def main():
72+
# prepare data
73+
trainingSet=[]
74+
testSet=[]
75+
loadDataset('train_feature_128.csv', 'test_feature_128.csv', trainingSet, testSet)
76+
print 'Train set: ' + repr(len(trainingSet))
77+
print 'Test set: ' + repr(len(testSet))
78+
# generate predictions
79+
predictions=[]
80+
k = 21
81+
for x in range(len(testSet)):
82+
neighbors = getNeighbors(trainingSet, testSet[x], k)
83+
result = getResponse(neighbors)
84+
predictions.append(result)
85+
#print('> predicted=' + repr(result) + ', actual=' + repr(testSet[x][-1]))
86+
accuracy = getAccuracy(testSet, predictions)
87+
print('K = '+repr(k) + ' Number of Features = '+repr(number_claster) + ' Accuracy: ' + repr(accuracy) + '%')
88+
89+
main()

BagOfVisualWord/5_svm.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Example of kNN implemented from Scratch in Python
2+
from sklearn.metrics import confusion_matrix
3+
import csv
4+
import random
5+
import math
6+
import operator
7+
import pandas as pd
8+
import numpy as np
9+
import matplotlib.pyplot as plt
10+
from matplotlib import style
11+
style.use("ggplot")
12+
from sklearn import svm
13+
14+
number_claster = 8
15+
16+
17+
def loadDataset(train_filename, test_filename, trainingSet=[], testSet=[]):
18+
with open(train_filename, 'rb') as csvfile:
19+
lines = csv.reader(csvfile)
20+
dataset = list(lines)
21+
for x in range(len(dataset)):
22+
for y in range(number_claster):
23+
dataset[x][y] = float(dataset[x][y])
24+
trainingSet.append(dataset[x])
25+
26+
with open(test_filename, 'rb') as csvfile:
27+
lines = csv.reader(csvfile)
28+
dataset = list(lines)
29+
for x in range(len(dataset)):
30+
for y in range(number_claster):
31+
dataset[x][y] = float(dataset[x][y])
32+
testSet.append(dataset[x])
33+
34+
35+
36+
37+
def getResponse(neighbors):
38+
classVotes = {}
39+
for x in range(len(neighbors)):
40+
response = neighbors[x][-1]
41+
if response in classVotes:
42+
classVotes[response] += 1
43+
else:
44+
classVotes[response] = 1
45+
sortedVotes = sorted(classVotes.iteritems(), key=operator.itemgetter(1), reverse=True)
46+
return sortedVotes[0][0]
47+
48+
49+
def getAccuracy(testSet, predictions):
50+
correct = 0
51+
temp_list = []
52+
for x in range(len(testSet)):
53+
temp_list.append(testSet[x])
54+
if testSet[x] == predictions[x]:
55+
correct += 1
56+
print("The Confusion Matrix is:")
57+
print(confusion_matrix(temp_list, predictions))
58+
print(len(testSet))
59+
return (correct / float(len(testSet))) * 100.0
60+
61+
62+
def main():
63+
# prepare data
64+
trainingSet = []
65+
trainingSet1 = []
66+
testSet = []
67+
testSet1 = []
68+
testLabels = []
69+
trainLabels = []
70+
predictions = []
71+
loadDataset('train_feature_8.csv', 'test_feature_8.csv', trainingSet, testSet)
72+
print 'Train set: ' + repr(len(trainingSet))
73+
print 'Test set: ' + repr(len(testSet))
74+
75+
# generate predictions
76+
print(len(trainingSet))
77+
78+
for x in range(len(trainingSet)):
79+
trainLabels.append(trainingSet[x][-1])
80+
81+
for x in range(len(trainingSet)):
82+
trainingSet1.append(trainingSet[x][:-1])
83+
84+
for x in range(len(testSet)):
85+
testLabels.append(testSet[x][-1])
86+
87+
for x in range(len(testSet)):
88+
testSet1.append(testSet[x][:-1])
89+
90+
#clf = svm.LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, intercept_scaling=1, loss='squared_hinge', max_iter=1000, multi_class='ovr', penalty='l2', random_state=None, tol=0.0001, verbose=0) # make classifier object
91+
clf=svm.SVC(kernel='linear',gamma=1,C=1.0,probability=True )
92+
#clf=svm.LinearSVC()
93+
import time
94+
t=time.time()
95+
clf.fit(trainingSet1,trainLabels)
96+
clf.score(trainingSet1,trainLabels)
97+
t1=time.time()
98+
t2=time.time()
99+
for x in range(len(testSet1)):
100+
result = clf.predict(testSet1[x])
101+
result1=clf.decision_function(testSet1[x])
102+
#result1=clf.predict_proba(testSet1[x])
103+
print(result1)
104+
predictions.append(result)
105+
print('> predicted=' + repr(result[0]) + ', actual=' + repr(testLabels[x]))
106+
t3 = time.time()
107+
accuracy = getAccuracy(testLabels, predictions)
108+
109+
print('Accuracy: ' + repr(accuracy) + '%')
110+
print("Training time:", (t1 - t))
111+
print("Testing Time:", (t3 - t2))
112+
113+
114+
main()

0 commit comments

Comments
 (0)