CoCalc -- quantize_face

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/dnn/misc/quantize_face_detector.py
¹⁶³³⁹ views
1
from __future__ import print_function
2
import sys
3
import argparse
4
import cv2 as cv
5
import tensorflow as tf
6
import numpy as np
7
import struct
8

9
if sys.version_info > (3,):
10
    long = int
11

12
from tensorflow.python.tools import optimize_for_inference_lib
13
from tensorflow.tools.graph_transforms import TransformGraph
14
from tensorflow.core.framework.node_def_pb2 import NodeDef
15
from google.protobuf import text_format
16

17
parser = argparse.ArgumentParser(description="Use this script to create TensorFlow graph "
18
                                             "with weights from OpenCV's face detection network. "
19
                                             "Only backbone part of SSD model is converted this way. "
20
                                             "Look for .pbtxt configuration file at "
21
                                             "https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/opencv_face_detector.pbtxt")
22
parser.add_argument('--model', help='Path to .caffemodel weights', required=True)
23
parser.add_argument('--proto', help='Path to .prototxt Caffe model definition', required=True)
24
parser.add_argument('--pb', help='Path to output .pb TensorFlow model', required=True)
25
parser.add_argument('--pbtxt', help='Path to output .pbxt TensorFlow graph', required=True)
26
parser.add_argument('--quantize', help='Quantize weights to uint8', action='store_true')
27
parser.add_argument('--fp16', help='Convert weights to half precision floats', action='store_true')
28
args = parser.parse_args()
29

30
assert(not args.quantize or not args.fp16)
31

32
dtype = tf.float16 if args.fp16 else tf.float32
33

34
################################################################################
35
cvNet = cv.dnn.readNetFromCaffe(args.proto, args.model)
36

37
def dnnLayer(name):
38
    return cvNet.getLayer(long(cvNet.getLayerId(name)))
39

40
def scale(x, name):
41
    with tf.variable_scope(name):
42
        layer = dnnLayer(name)
43
        w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')
44
        if len(layer.blobs) > 1:
45
            b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='add')
46
            return tf.nn.bias_add(tf.multiply(x, w), b)
47
        else:
48
            return tf.multiply(x, w, name)
49

50
def conv(x, name, stride=1, pad='SAME', dilation=1, activ=None):
51
    with tf.variable_scope(name):
52
        layer = dnnLayer(name)
53
        w = tf.Variable(layer.blobs[0].transpose(2, 3, 1, 0), dtype=dtype, name='weights')
54
        if dilation == 1:
55
            conv = tf.nn.conv2d(x, filter=w, strides=(1, stride, stride, 1), padding=pad)
56
        else:
57
            assert(stride == 1)
58
            conv = tf.nn.atrous_conv2d(x, w, rate=dilation, padding=pad)
59

60
        if len(layer.blobs) > 1:
61
            b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='bias')
62
            conv = tf.nn.bias_add(conv, b)
63
        return activ(conv) if activ else conv
64

65
def batch_norm(x, name):
66
    with tf.variable_scope(name):
67
        # Unfortunately, TensorFlow's batch normalization layer doesn't work with fp16 input.
68
        # Here we do a cast to fp32 but remove it in the frozen graph.
69
        if x.dtype != tf.float32:
70
            x = tf.cast(x, tf.float32)
71

72
        layer = dnnLayer(name)
73
        assert(len(layer.blobs) >= 3)
74

75
        mean = layer.blobs[0].flatten()
76
        std = layer.blobs[1].flatten()
77
        scale = layer.blobs[2].flatten()
78

79
        eps = 1e-5
80
        hasBias = len(layer.blobs) > 3
81
        hasWeights = scale.shape != (1,)
82

83
        if not hasWeights and not hasBias:
84
            mean /= scale[0]
85
            std /= scale[0]
86

87
        mean = tf.Variable(mean, dtype=tf.float32, name='mean')
88
        std = tf.Variable(std, dtype=tf.float32, name='std')
89
        gamma = tf.Variable(scale if hasWeights else np.ones(mean.shape), dtype=tf.float32, name='gamma')
90
        beta = tf.Variable(layer.blobs[3].flatten() if hasBias else np.zeros(mean.shape), dtype=tf.float32, name='beta')
91
        bn = tf.nn.fused_batch_norm(x, gamma, beta, mean, std, eps,
92
                                    is_training=False)[0]
93
        if bn.dtype != dtype:
94
            bn = tf.cast(bn, dtype)
95
        return bn
96

97
def l2norm(x, name):
98
    with tf.variable_scope(name):
99
        layer = dnnLayer(name)
100
        w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')
101
        return tf.nn.l2_normalize(x, 3, epsilon=1e-10) * w
102

103
### Graph definition ###########################################################
104
inp = tf.placeholder(dtype, [1, 300, 300, 3], 'data')
105
data_bn = batch_norm(inp, 'data_bn')
106
data_scale = scale(data_bn, 'data_scale')
107

108
# Instead of tf.pad we use tf.space_to_batch_nd layers which override convolution's padding strategy to explicit numbers
109
# data_scale = tf.pad(data_scale, [[0, 0], [3, 3], [3, 3], [0, 0]])
110
data_scale = tf.space_to_batch_nd(data_scale, [1, 1], [[3, 3], [3, 3]], name='Pad')
111
conv1_h = conv(data_scale, stride=2, pad='VALID', name='conv1_h')
112

113
conv1_bn_h = batch_norm(conv1_h, 'conv1_bn_h')
114
conv1_scale_h = scale(conv1_bn_h, 'conv1_scale_h')
115
conv1_relu = tf.nn.relu(conv1_scale_h)
116
conv1_pool = tf.layers.max_pooling2d(conv1_relu, pool_size=(3, 3), strides=(2, 2),
117
                                     padding='SAME', name='conv1_pool')
118

119
layer_64_1_conv1_h = conv(conv1_pool, 'layer_64_1_conv1_h')
120
layer_64_1_bn2_h = batch_norm(layer_64_1_conv1_h, 'layer_64_1_bn2_h')
121
layer_64_1_scale2_h = scale(layer_64_1_bn2_h, 'layer_64_1_scale2_h')
122
layer_64_1_relu2 = tf.nn.relu(layer_64_1_scale2_h)
123
layer_64_1_conv2_h = conv(layer_64_1_relu2, 'layer_64_1_conv2_h')
124
layer_64_1_sum = layer_64_1_conv2_h + conv1_pool
125

126
layer_128_1_bn1_h = batch_norm(layer_64_1_sum, 'layer_128_1_bn1_h')
127
layer_128_1_scale1_h = scale(layer_128_1_bn1_h, 'layer_128_1_scale1_h')
128
layer_128_1_relu1 = tf.nn.relu(layer_128_1_scale1_h)
129
layer_128_1_conv1_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv1_h')
130
layer_128_1_bn2 = batch_norm(layer_128_1_conv1_h, 'layer_128_1_bn2')
131
layer_128_1_scale2 = scale(layer_128_1_bn2, 'layer_128_1_scale2')
132
layer_128_1_relu2 = tf.nn.relu(layer_128_1_scale2)
133
layer_128_1_conv2 = conv(layer_128_1_relu2, 'layer_128_1_conv2')
134
layer_128_1_conv_expand_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv_expand_h')
135
layer_128_1_sum = layer_128_1_conv2 + layer_128_1_conv_expand_h
136

137
layer_256_1_bn1 = batch_norm(layer_128_1_sum, 'layer_256_1_bn1')
138
layer_256_1_scale1 = scale(layer_256_1_bn1, 'layer_256_1_scale1')
139
layer_256_1_relu1 = tf.nn.relu(layer_256_1_scale1)
140

141
# layer_256_1_conv1 = tf.pad(layer_256_1_relu1, [[0, 0], [1, 1], [1, 1], [0, 0]])
142
layer_256_1_conv1 = tf.space_to_batch_nd(layer_256_1_relu1, [1, 1], [[1, 1], [1, 1]], name='Pad_1')
143
layer_256_1_conv1 = conv(layer_256_1_conv1, stride=2, pad='VALID', name='layer_256_1_conv1')
144

145
layer_256_1_bn2 = batch_norm(layer_256_1_conv1, 'layer_256_1_bn2')
146
layer_256_1_scale2 = scale(layer_256_1_bn2, 'layer_256_1_scale2')
147
layer_256_1_relu2 = tf.nn.relu(layer_256_1_scale2)
148
layer_256_1_conv2 = conv(layer_256_1_relu2, 'layer_256_1_conv2')
149
layer_256_1_conv_expand = conv(layer_256_1_relu1, stride=2, name='layer_256_1_conv_expand')
150
layer_256_1_sum = layer_256_1_conv2 + layer_256_1_conv_expand
151

152
layer_512_1_bn1 = batch_norm(layer_256_1_sum, 'layer_512_1_bn1')
153
layer_512_1_scale1 = scale(layer_512_1_bn1, 'layer_512_1_scale1')
154
layer_512_1_relu1 = tf.nn.relu(layer_512_1_scale1)
155
layer_512_1_conv1_h = conv(layer_512_1_relu1, 'layer_512_1_conv1_h')
156
layer_512_1_bn2_h = batch_norm(layer_512_1_conv1_h, 'layer_512_1_bn2_h')
157
layer_512_1_scale2_h = scale(layer_512_1_bn2_h, 'layer_512_1_scale2_h')
158
layer_512_1_relu2 = tf.nn.relu(layer_512_1_scale2_h)
159
layer_512_1_conv2_h = conv(layer_512_1_relu2, dilation=2, name='layer_512_1_conv2_h')
160
layer_512_1_conv_expand_h = conv(layer_512_1_relu1, 'layer_512_1_conv_expand_h')
161
layer_512_1_sum = layer_512_1_conv2_h + layer_512_1_conv_expand_h
162

163
last_bn_h = batch_norm(layer_512_1_sum, 'last_bn_h')
164
last_scale_h = scale(last_bn_h, 'last_scale_h')
165
fc7 = tf.nn.relu(last_scale_h, name='last_relu')
166

167
conv6_1_h = conv(fc7, 'conv6_1_h', activ=tf.nn.relu)
168
conv6_2_h = conv(conv6_1_h, stride=2, name='conv6_2_h', activ=tf.nn.relu)
169
conv7_1_h = conv(conv6_2_h, 'conv7_1_h', activ=tf.nn.relu)
170

171
# conv7_2_h = tf.pad(conv7_1_h, [[0, 0], [1, 1], [1, 1], [0, 0]])
172
conv7_2_h = tf.space_to_batch_nd(conv7_1_h, [1, 1], [[1, 1], [1, 1]], name='Pad_2')
173
conv7_2_h = conv(conv7_2_h, stride=2, pad='VALID', name='conv7_2_h', activ=tf.nn.relu)
174

175
conv8_1_h = conv(conv7_2_h, pad='SAME', name='conv8_1_h', activ=tf.nn.relu)
176
conv8_2_h = conv(conv8_1_h, pad='SAME', name='conv8_2_h', activ=tf.nn.relu)
177
conv9_1_h = conv(conv8_2_h, 'conv9_1_h', activ=tf.nn.relu)
178
conv9_2_h = conv(conv9_1_h, pad='SAME', name='conv9_2_h', activ=tf.nn.relu)
179

180
conv4_3_norm = l2norm(layer_256_1_relu1, 'conv4_3_norm')
181

182
### Locations and confidences ##################################################
183
locations = []
184
confidences = []
185
flattenLayersNames = []  # Collect all reshape layers names that should be replaced to flattens.
186
for top, suffix in zip([locations, confidences], ['_mbox_loc', '_mbox_conf']):
187
    for bottom, name in zip([conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h],
188
                            ['conv4_3_norm', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']):
189
        name += suffix
190
        flat = tf.layers.flatten(conv(bottom, name))
191
        flattenLayersNames.append(flat.name[:flat.name.find(':')])
192
        top.append(flat)
193

194
mbox_loc = tf.concat(locations, axis=-1, name='mbox_loc')
195
mbox_conf = tf.concat(confidences, axis=-1, name='mbox_conf')
196

197
total = int(np.prod(mbox_conf.shape[1:]))
198
mbox_conf_reshape = tf.reshape(mbox_conf, [-1, 2], name='mbox_conf_reshape')
199
mbox_conf_softmax = tf.nn.softmax(mbox_conf_reshape, name='mbox_conf_softmax')
200
mbox_conf_flatten = tf.reshape(mbox_conf_softmax, [-1, total], name='mbox_conf_flatten')
201
flattenLayersNames.append('mbox_conf_flatten')
202

203
with tf.Session() as sess:
204
    sess.run(tf.global_variables_initializer())
205

206
    ### Check correctness ######################################################
207
    out_nodes = ['mbox_loc', 'mbox_conf_flatten']
208
    inp_nodes = [inp.name[:inp.name.find(':')]]
209

210
    np.random.seed(2701)
211
    inputData = np.random.standard_normal([1, 3, 300, 300]).astype(np.float32)
212

213
    cvNet.setInput(inputData)
214
    cvNet.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
215
    outDNN = cvNet.forward(out_nodes)
216

217
    outTF = sess.run([mbox_loc, mbox_conf_flatten], feed_dict={inp: inputData.transpose(0, 2, 3, 1)})
218
    print('Max diff @ locations:  %e' % np.max(np.abs(outDNN[0] - outTF[0])))
219
    print('Max diff @ confidence: %e' % np.max(np.abs(outDNN[1] - outTF[1])))
220

221
    # Save a graph
222
    graph_def = sess.graph.as_graph_def()
223

224
    # Freeze graph. Replaces variables to constants.
225
    graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, out_nodes)
226
    # Optimize graph. Removes training-only ops, unused nodes.
227
    graph_def = optimize_for_inference_lib.optimize_for_inference(graph_def, inp_nodes, out_nodes, dtype.as_datatype_enum)
228
    # Fuse constant operations.
229
    transforms = ["fold_constants(ignore_errors=True)"]
230
    if args.quantize:
231
        transforms += ["quantize_weights(minimum_size=0)"]
232
    transforms += ["sort_by_execution_order"]
233
    graph_def = TransformGraph(graph_def, inp_nodes, out_nodes, transforms)
234

235
    # By default, float16 weights are stored in repeated tensor's field called
236
    # `half_val`. It has type int32 with leading zeros for unused bytes.
237
    # This type is encoded by Variant that means only 7 bits are used for value
238
    # representation but the last one is indicated the end of encoding. This way
239
    # float16 might takes 1 or 2 or 3 bytes depends on value. To improve compression,
240
    # we replace all `half_val` values to `tensor_content` using only 2 bytes for everyone.
241
    for node in graph_def.node:
242
        if 'value' in node.attr:
243
            halfs = node.attr["value"].tensor.half_val
244
            if not node.attr["value"].tensor.tensor_content and halfs:
245
                node.attr["value"].tensor.tensor_content = struct.pack('H' * len(halfs), *halfs)
246
                node.attr["value"].tensor.ClearField('half_val')
247

248
    # Serialize
249
    with tf.gfile.FastGFile(args.pb, 'wb') as f:
250
            f.write(graph_def.SerializeToString())
251

252

253
################################################################################
254
# Write a text graph representation
255
################################################################################
256
def tensorMsg(values):
257
    msg = 'tensor { dtype: DT_FLOAT tensor_shape { dim { size: %d } }' % len(values)
258
    for value in values:
259
        msg += 'float_val: %f ' % value
260
    return msg + '}'
261

262
# Remove Const nodes and unused attributes.
263
for i in reversed(range(len(graph_def.node))):
264
    if graph_def.node[i].op in ['Const', 'Dequantize']:
265
        del graph_def.node[i]
266
    for attr in ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim',
267
                 'use_cudnn_on_gpu', 'Index', 'Tperm', 'is_training',
268
                 'Tpaddings', 'Tblock_shape', 'Tcrops']:
269
        if attr in graph_def.node[i].attr:
270
            del graph_def.node[i].attr[attr]
271

272
# Append prior box generators
273
min_sizes = [30, 60, 111, 162, 213, 264]
274
max_sizes = [60, 111, 162, 213, 264, 315]
275
steps = [8, 16, 32, 64, 100, 300]
276
aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
277
layers = [conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h]
278
for i in range(6):
279
    priorBox = NodeDef()
280
    priorBox.name = 'PriorBox_%d' % i
281
    priorBox.op = 'PriorBox'
282
    priorBox.input.append(layers[i].name[:layers[i].name.find(':')])
283
    priorBox.input.append(inp_nodes[0])  # data
284

285
    text_format.Merge('i: %d' % min_sizes[i], priorBox.attr["min_size"])
286
    text_format.Merge('i: %d' % max_sizes[i], priorBox.attr["max_size"])
287
    text_format.Merge('b: true', priorBox.attr["flip"])
288
    text_format.Merge('b: false', priorBox.attr["clip"])
289
    text_format.Merge(tensorMsg(aspect_ratios[i]), priorBox.attr["aspect_ratio"])
290
    text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])
291
    text_format.Merge('f: %f' % steps[i], priorBox.attr["step"])
292
    text_format.Merge('f: 0.5', priorBox.attr["offset"])
293
    graph_def.node.extend([priorBox])
294

295
# Concatenate prior boxes
296
concat = NodeDef()
297
concat.name = 'mbox_priorbox'
298
concat.op = 'ConcatV2'
299
for i in range(6):
300
    concat.input.append('PriorBox_%d' % i)
301
concat.input.append('mbox_loc/axis')
302
graph_def.node.extend([concat])
303

304
# DetectionOutput layer
305
detectionOut = NodeDef()
306
detectionOut.name = 'detection_out'
307
detectionOut.op = 'DetectionOutput'
308

309
detectionOut.input.append('mbox_loc')
310
detectionOut.input.append('mbox_conf_flatten')
311
detectionOut.input.append('mbox_priorbox')
312

313
text_format.Merge('i: 2', detectionOut.attr['num_classes'])
314
text_format.Merge('b: true', detectionOut.attr['share_location'])
315
text_format.Merge('i: 0', detectionOut.attr['background_label_id'])
316
text_format.Merge('f: 0.45', detectionOut.attr['nms_threshold'])
317
text_format.Merge('i: 400', detectionOut.attr['top_k'])
318
text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])
319
text_format.Merge('i: 200', detectionOut.attr['keep_top_k'])
320
text_format.Merge('f: 0.01', detectionOut.attr['confidence_threshold'])
321

322
graph_def.node.extend([detectionOut])
323

324
# Replace L2Normalization subgraph onto a single node.
325
for i in reversed(range(len(graph_def.node))):
326
    if graph_def.node[i].name in ['conv4_3_norm/l2_normalize/Square',
327
                                  'conv4_3_norm/l2_normalize/Sum',
328
                                  'conv4_3_norm/l2_normalize/Maximum',
329
                                  'conv4_3_norm/l2_normalize/Rsqrt']:
330
        del graph_def.node[i]
331
for node in graph_def.node:
332
    if node.name == 'conv4_3_norm/l2_normalize':
333
        node.op = 'L2Normalize'
334
        node.input.pop()
335
        node.input.pop()
336
        node.input.append(layer_256_1_relu1.name)
337
        node.input.append('conv4_3_norm/l2_normalize/Sum/reduction_indices')
338
        break
339

340
softmaxShape = NodeDef()
341
softmaxShape.name = 'reshape_before_softmax'
342
softmaxShape.op = 'Const'
343
text_format.Merge(
344
'tensor {'
345
'  dtype: DT_INT32'
346
'  tensor_shape { dim { size: 3 } }'
347
'  int_val: 0'
348
'  int_val: -1'
349
'  int_val: 2'
350
'}', softmaxShape.attr["value"])
351
graph_def.node.extend([softmaxShape])
352

353
for node in graph_def.node:
354
    if node.name == 'mbox_conf_reshape':
355
        node.input[1] = softmaxShape.name
356
    elif node.name == 'mbox_conf_softmax':
357
        text_format.Merge('i: 2', node.attr['axis'])
358
    elif node.name in flattenLayersNames:
359
        node.op = 'Flatten'
360
        inpName = node.input[0]
361
        node.input.pop()
362
        node.input.pop()
363
        node.input.append(inpName)
364

365
tf.train.write_graph(graph_def, "", args.pbtxt, as_text=True)
366

367
Product

Resources

Company