Skip to content

Commit 6566025

Browse files
author
Yu Feng
committed
add multi-layer perceptron
1 parent a603a77 commit 6566025

File tree

4 files changed

+169
-3
lines changed

4 files changed

+169
-3
lines changed

dataflow_search.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,14 @@
5656
# for 3D DNN layer is
5757
# (width, height, disparity, in_channel, out_channel,
5858
# kenrel_width, kernel_height, kernel_disp, stride, Deconv?)
59+
#
60+
# for MLP is
61+
# [in_channel, out_channel]
5962
def import_dnn(filename, ifmap_dim, ifmap3d_dim):
6063
# a list to store the dnn configuration
6164
dnn = []
6265
weight_dim = []
6366

64-
is_2d_layer = True
65-
6667
# The weight input format as follows:
6768
# [out_channel,kenrel_width,kernel_height,stride,Deconv?]
6869
for line in open(filename):
@@ -92,6 +93,14 @@ def import_dnn(filename, ifmap_dim, ifmap3d_dim):
9293
ifmap_dim[1]/prev_layer["stride"], \
9394
prev_layer["out_channel"]]
9495

96+
elif len(ls) == 4:
97+
dnn.append({
98+
"ifmap" : [int(ls[0])*int(ls[1]), int(ls[2])],
99+
"out_channel" : int(ls[3]),
100+
"type" : "MLP",
101+
"Deconv?" : False,
102+
"stride" : 1
103+
})
95104
else:
96105
dnn.append({"ifmap" : ifmap3d_dim,
97106
"out_channel" : int(ls[0]),

dnn_optimizer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import layer_static_method
1414
import layer_exhaustive_searcher
1515
import deconv_exhaustive_searcher
16+
from multi_layer_perceptron import MultiLayerPerceptron
1617

1718
import layer3d_optimizer
1819
import layer3d_exhaustive_searcher
@@ -45,6 +46,9 @@ def setup(meta_data, hardware_constraints):
4546

4647
def single_layer_optimization(data, sys_info):
4748
global method, enable, buffer_partition
49+
if data["type"] == "MLP":
50+
return MultiLayerPerceptron(data, sys_info).optimize()
51+
4852
# if "static" option is enabled, it will be prioritized
4953
if enable["static"]:
5054
return layer_static_method.\

layer_base_method.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def buffer_utilization(self, x):
6565
+ self.Ci*(self.S*x[1]+self.K_h/2)*(self.S*x[2]+self.K_h/2))
6666

6767
def total_batch_number(self, h_0, w_0, c_0):
68-
return math.ceil(self.H*self.W*self.Co / (h_0*w_0*c_0))
68+
return math.ceil(float(self.H*self.W*self.Co) / (h_0*w_0*c_0))
6969

7070
# (ofmap + ifmap)*total_batch + (ofmap+weights)*Co/c_0
7171
def row_major_data_transfer(self, h_0, w_0, c_0):

multi_layer_perceptron.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#!/usr/bin/python2.7
2+
3+
# public library
4+
import math
5+
import numpy as np
6+
7+
class MultiLayerPerceptron(object):
8+
"""docstring for MultiLayerPerceptron"""
9+
# info for systolic array
10+
A = None # systolic array dimension
11+
12+
# memory bandwith number of bytes can be transferred.
13+
B = None
14+
15+
# on-chip buffer size
16+
buf_size = None
17+
18+
# input layer dimension
19+
N = None # numbers of feature (NumberOfPoints x NumberOfFeature)
20+
Ci = None # channels for ifmap
21+
Co = None # channels for ofmap
22+
23+
# on-chip buffer size
24+
bufi_size = None
25+
bufo_size = None
26+
bufw_size = None
27+
28+
"""docstring for MultiLayerPerceptron"""
29+
def __init__(self, data, sys_info):
30+
self.data = data
31+
self.sys_info = sys_info
32+
self.A = sys_info["sa_size"]
33+
self.B = sys_info["memory_bandwidth"]/(sys_info["bit_width"]/8)
34+
self.buf_size = sys_info["bufsize"]
35+
36+
def init_setup(self):
37+
layer_info = self.data
38+
39+
# set up the new layer information
40+
[self.N, self.Ci] = layer_info["ifmap"]
41+
self.Co = layer_info["out_channel"]
42+
43+
self.bufw_size = self.Co * self.Ci
44+
45+
###############################################################
46+
# general process #
47+
###############################################################
48+
49+
# compute buffer utilization
50+
def buffer_utilization(self, x):
51+
# buffer = ofmap + weights + ifmap
52+
return (x*self.Co + self.Ci*self.Co + x*self.Ci)
53+
54+
# (ofmap + ifmap)*total_batch + (ofmap+weights)*Co/c_0
55+
def data_transfer(self, x):
56+
# calculate the total batch
57+
total_batch = math.ceil(float(self.N) / x)
58+
59+
# ofmap, ifmap and kernel tile size
60+
ofmap_tile_size = self.Co * x
61+
ifmap_tile_size = self.Ci * x
62+
kernel_tile_size = self.Co*self.Ci
63+
64+
# ofmap + ifmap transfer
65+
total_transfer = (ofmap_tile_size + ifmap_tile_size) * total_batch
66+
67+
# add additional data transfer
68+
total_transfer += kernel_tile_size
69+
70+
return total_transfer
71+
72+
def systolic_array_utilization(self, x):
73+
A = self.A
74+
A_w_uiti = math.ceil(self.Co/math.ceil(float(self.Co)/A))
75+
76+
total_usage = x * self.Co
77+
round_up_val = math.ceil(float(x/A))*A \
78+
* math.ceil(float(self.Co)/A)*A
79+
80+
# the pct of extra delay due to output-stationary
81+
delay_pct = float(self.Ci)/(self.Ci+A_w_uiti)
82+
83+
return delay_pct * total_usage / round_up_val
84+
85+
def compute_bound_cycle(self, util_rate):
86+
# total number of ops
87+
total_computation = (self.N*self.Ci*self.Co)
88+
89+
# systolic array calculation capacity
90+
comp_cap = (self.A*self.A) * util_rate
91+
92+
return total_computation / comp_cap
93+
94+
def process_parameter(self, x):
95+
96+
x = math.floor(x)
97+
bound = "C"
98+
# make the tile size even for every batch
99+
x_0 = min(self.N/math.ceil(self.N/round(x)), self.N)
100+
101+
# (ofmap + ifmap)*total_batch + weights
102+
total_transfer = self.data_transfer(x_0)
103+
104+
# compute the utilization of systolic array
105+
util_sys_arr = self.systolic_array_utilization(x_0)
106+
107+
# compute the utilization of buffer
108+
util_buf = float(self.buffer_utilization(x_0))/self.buf_size
109+
110+
if util_buf > 1.01:
111+
print("ERROR: the utilization of buffer is over 100%")
112+
exit()
113+
114+
# calculate the amount of cycles of computing all elements.
115+
if self.compute_bound_cycle(util_sys_arr) > total_transfer/self.B:
116+
bound = "C"
117+
total_cycle = self.compute_bound_cycle(util_sys_arr)
118+
else:
119+
bound = "M"
120+
total_cycle = total_transfer/self.B
121+
122+
ret = {
123+
"total_transfer": round(total_transfer),
124+
"total_cycle": round(total_cycle),
125+
"systolic_array_utilization": util_sys_arr,
126+
"buffer_utilization": util_buf,
127+
"x_0": x_0,
128+
"Bound" : bound
129+
}
130+
131+
return ret
132+
133+
# optimize one layer
134+
def optimize(self):
135+
self.init_setup()
136+
137+
# if sum of bufi and bufw is over the self.buf_size
138+
# we should skip it.
139+
if self.bufw_size > self.buf_size:
140+
print("FAIL: the entire weight cannot be stored in buffer")
141+
exit()
142+
143+
self.bufi_size = (self.buf_size - self.bufw_size)*self.Ci/(self.Ci+self.Co)
144+
self.bufo_size = (self.buf_size - self.bufw_size)*self.Co/(self.Ci+self.Co)
145+
146+
# set the initial guess;
147+
x0 = self.A
148+
149+
# let's see what percentage of ifmap can we fit into the buffer.
150+
while x0 < self.N and (x0+self.A)*self.Ci < self.bufi_size:
151+
x0 = x0 + self.A
152+
153+
return self.process_parameter(x0)

0 commit comments

Comments
 (0)