Path: blob/master/modules/dnn/misc/quantize_face_detector.py
16339 views
from __future__ import print_function1import sys2import argparse3import cv2 as cv4import tensorflow as tf5import numpy as np6import struct78if sys.version_info > (3,):9long = int1011from tensorflow.python.tools import optimize_for_inference_lib12from tensorflow.tools.graph_transforms import TransformGraph13from tensorflow.core.framework.node_def_pb2 import NodeDef14from google.protobuf import text_format1516parser = argparse.ArgumentParser(description="Use this script to create TensorFlow graph "17"with weights from OpenCV's face detection network. "18"Only backbone part of SSD model is converted this way. "19"Look for .pbtxt configuration file at "20"https://github.com/opencv/opencv_extra/tree/master/testdata/dnn/opencv_face_detector.pbtxt")21parser.add_argument('--model', help='Path to .caffemodel weights', required=True)22parser.add_argument('--proto', help='Path to .prototxt Caffe model definition', required=True)23parser.add_argument('--pb', help='Path to output .pb TensorFlow model', required=True)24parser.add_argument('--pbtxt', help='Path to output .pbxt TensorFlow graph', required=True)25parser.add_argument('--quantize', help='Quantize weights to uint8', action='store_true')26parser.add_argument('--fp16', help='Convert weights to half precision floats', action='store_true')27args = parser.parse_args()2829assert(not args.quantize or not args.fp16)3031dtype = tf.float16 if args.fp16 else tf.float323233################################################################################34cvNet = cv.dnn.readNetFromCaffe(args.proto, args.model)3536def dnnLayer(name):37return cvNet.getLayer(long(cvNet.getLayerId(name)))3839def scale(x, name):40with tf.variable_scope(name):41layer = dnnLayer(name)42w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')43if len(layer.blobs) > 1:44b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='add')45return tf.nn.bias_add(tf.multiply(x, w), b)46else:47return tf.multiply(x, w, name)4849def conv(x, name, stride=1, pad='SAME', dilation=1, activ=None):50with tf.variable_scope(name):51layer = dnnLayer(name)52w = tf.Variable(layer.blobs[0].transpose(2, 3, 1, 0), dtype=dtype, name='weights')53if dilation == 1:54conv = tf.nn.conv2d(x, filter=w, strides=(1, stride, stride, 1), padding=pad)55else:56assert(stride == 1)57conv = tf.nn.atrous_conv2d(x, w, rate=dilation, padding=pad)5859if len(layer.blobs) > 1:60b = tf.Variable(layer.blobs[1].flatten(), dtype=dtype, name='bias')61conv = tf.nn.bias_add(conv, b)62return activ(conv) if activ else conv6364def batch_norm(x, name):65with tf.variable_scope(name):66# Unfortunately, TensorFlow's batch normalization layer doesn't work with fp16 input.67# Here we do a cast to fp32 but remove it in the frozen graph.68if x.dtype != tf.float32:69x = tf.cast(x, tf.float32)7071layer = dnnLayer(name)72assert(len(layer.blobs) >= 3)7374mean = layer.blobs[0].flatten()75std = layer.blobs[1].flatten()76scale = layer.blobs[2].flatten()7778eps = 1e-579hasBias = len(layer.blobs) > 380hasWeights = scale.shape != (1,)8182if not hasWeights and not hasBias:83mean /= scale[0]84std /= scale[0]8586mean = tf.Variable(mean, dtype=tf.float32, name='mean')87std = tf.Variable(std, dtype=tf.float32, name='std')88gamma = tf.Variable(scale if hasWeights else np.ones(mean.shape), dtype=tf.float32, name='gamma')89beta = tf.Variable(layer.blobs[3].flatten() if hasBias else np.zeros(mean.shape), dtype=tf.float32, name='beta')90bn = tf.nn.fused_batch_norm(x, gamma, beta, mean, std, eps,91is_training=False)[0]92if bn.dtype != dtype:93bn = tf.cast(bn, dtype)94return bn9596def l2norm(x, name):97with tf.variable_scope(name):98layer = dnnLayer(name)99w = tf.Variable(layer.blobs[0].flatten(), dtype=dtype, name='mul')100return tf.nn.l2_normalize(x, 3, epsilon=1e-10) * w101102### Graph definition ###########################################################103inp = tf.placeholder(dtype, [1, 300, 300, 3], 'data')104data_bn = batch_norm(inp, 'data_bn')105data_scale = scale(data_bn, 'data_scale')106107# Instead of tf.pad we use tf.space_to_batch_nd layers which override convolution's padding strategy to explicit numbers108# data_scale = tf.pad(data_scale, [[0, 0], [3, 3], [3, 3], [0, 0]])109data_scale = tf.space_to_batch_nd(data_scale, [1, 1], [[3, 3], [3, 3]], name='Pad')110conv1_h = conv(data_scale, stride=2, pad='VALID', name='conv1_h')111112conv1_bn_h = batch_norm(conv1_h, 'conv1_bn_h')113conv1_scale_h = scale(conv1_bn_h, 'conv1_scale_h')114conv1_relu = tf.nn.relu(conv1_scale_h)115conv1_pool = tf.layers.max_pooling2d(conv1_relu, pool_size=(3, 3), strides=(2, 2),116padding='SAME', name='conv1_pool')117118layer_64_1_conv1_h = conv(conv1_pool, 'layer_64_1_conv1_h')119layer_64_1_bn2_h = batch_norm(layer_64_1_conv1_h, 'layer_64_1_bn2_h')120layer_64_1_scale2_h = scale(layer_64_1_bn2_h, 'layer_64_1_scale2_h')121layer_64_1_relu2 = tf.nn.relu(layer_64_1_scale2_h)122layer_64_1_conv2_h = conv(layer_64_1_relu2, 'layer_64_1_conv2_h')123layer_64_1_sum = layer_64_1_conv2_h + conv1_pool124125layer_128_1_bn1_h = batch_norm(layer_64_1_sum, 'layer_128_1_bn1_h')126layer_128_1_scale1_h = scale(layer_128_1_bn1_h, 'layer_128_1_scale1_h')127layer_128_1_relu1 = tf.nn.relu(layer_128_1_scale1_h)128layer_128_1_conv1_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv1_h')129layer_128_1_bn2 = batch_norm(layer_128_1_conv1_h, 'layer_128_1_bn2')130layer_128_1_scale2 = scale(layer_128_1_bn2, 'layer_128_1_scale2')131layer_128_1_relu2 = tf.nn.relu(layer_128_1_scale2)132layer_128_1_conv2 = conv(layer_128_1_relu2, 'layer_128_1_conv2')133layer_128_1_conv_expand_h = conv(layer_128_1_relu1, stride=2, name='layer_128_1_conv_expand_h')134layer_128_1_sum = layer_128_1_conv2 + layer_128_1_conv_expand_h135136layer_256_1_bn1 = batch_norm(layer_128_1_sum, 'layer_256_1_bn1')137layer_256_1_scale1 = scale(layer_256_1_bn1, 'layer_256_1_scale1')138layer_256_1_relu1 = tf.nn.relu(layer_256_1_scale1)139140# layer_256_1_conv1 = tf.pad(layer_256_1_relu1, [[0, 0], [1, 1], [1, 1], [0, 0]])141layer_256_1_conv1 = tf.space_to_batch_nd(layer_256_1_relu1, [1, 1], [[1, 1], [1, 1]], name='Pad_1')142layer_256_1_conv1 = conv(layer_256_1_conv1, stride=2, pad='VALID', name='layer_256_1_conv1')143144layer_256_1_bn2 = batch_norm(layer_256_1_conv1, 'layer_256_1_bn2')145layer_256_1_scale2 = scale(layer_256_1_bn2, 'layer_256_1_scale2')146layer_256_1_relu2 = tf.nn.relu(layer_256_1_scale2)147layer_256_1_conv2 = conv(layer_256_1_relu2, 'layer_256_1_conv2')148layer_256_1_conv_expand = conv(layer_256_1_relu1, stride=2, name='layer_256_1_conv_expand')149layer_256_1_sum = layer_256_1_conv2 + layer_256_1_conv_expand150151layer_512_1_bn1 = batch_norm(layer_256_1_sum, 'layer_512_1_bn1')152layer_512_1_scale1 = scale(layer_512_1_bn1, 'layer_512_1_scale1')153layer_512_1_relu1 = tf.nn.relu(layer_512_1_scale1)154layer_512_1_conv1_h = conv(layer_512_1_relu1, 'layer_512_1_conv1_h')155layer_512_1_bn2_h = batch_norm(layer_512_1_conv1_h, 'layer_512_1_bn2_h')156layer_512_1_scale2_h = scale(layer_512_1_bn2_h, 'layer_512_1_scale2_h')157layer_512_1_relu2 = tf.nn.relu(layer_512_1_scale2_h)158layer_512_1_conv2_h = conv(layer_512_1_relu2, dilation=2, name='layer_512_1_conv2_h')159layer_512_1_conv_expand_h = conv(layer_512_1_relu1, 'layer_512_1_conv_expand_h')160layer_512_1_sum = layer_512_1_conv2_h + layer_512_1_conv_expand_h161162last_bn_h = batch_norm(layer_512_1_sum, 'last_bn_h')163last_scale_h = scale(last_bn_h, 'last_scale_h')164fc7 = tf.nn.relu(last_scale_h, name='last_relu')165166conv6_1_h = conv(fc7, 'conv6_1_h', activ=tf.nn.relu)167conv6_2_h = conv(conv6_1_h, stride=2, name='conv6_2_h', activ=tf.nn.relu)168conv7_1_h = conv(conv6_2_h, 'conv7_1_h', activ=tf.nn.relu)169170# conv7_2_h = tf.pad(conv7_1_h, [[0, 0], [1, 1], [1, 1], [0, 0]])171conv7_2_h = tf.space_to_batch_nd(conv7_1_h, [1, 1], [[1, 1], [1, 1]], name='Pad_2')172conv7_2_h = conv(conv7_2_h, stride=2, pad='VALID', name='conv7_2_h', activ=tf.nn.relu)173174conv8_1_h = conv(conv7_2_h, pad='SAME', name='conv8_1_h', activ=tf.nn.relu)175conv8_2_h = conv(conv8_1_h, pad='SAME', name='conv8_2_h', activ=tf.nn.relu)176conv9_1_h = conv(conv8_2_h, 'conv9_1_h', activ=tf.nn.relu)177conv9_2_h = conv(conv9_1_h, pad='SAME', name='conv9_2_h', activ=tf.nn.relu)178179conv4_3_norm = l2norm(layer_256_1_relu1, 'conv4_3_norm')180181### Locations and confidences ##################################################182locations = []183confidences = []184flattenLayersNames = [] # Collect all reshape layers names that should be replaced to flattens.185for top, suffix in zip([locations, confidences], ['_mbox_loc', '_mbox_conf']):186for bottom, name in zip([conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h],187['conv4_3_norm', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']):188name += suffix189flat = tf.layers.flatten(conv(bottom, name))190flattenLayersNames.append(flat.name[:flat.name.find(':')])191top.append(flat)192193mbox_loc = tf.concat(locations, axis=-1, name='mbox_loc')194mbox_conf = tf.concat(confidences, axis=-1, name='mbox_conf')195196total = int(np.prod(mbox_conf.shape[1:]))197mbox_conf_reshape = tf.reshape(mbox_conf, [-1, 2], name='mbox_conf_reshape')198mbox_conf_softmax = tf.nn.softmax(mbox_conf_reshape, name='mbox_conf_softmax')199mbox_conf_flatten = tf.reshape(mbox_conf_softmax, [-1, total], name='mbox_conf_flatten')200flattenLayersNames.append('mbox_conf_flatten')201202with tf.Session() as sess:203sess.run(tf.global_variables_initializer())204205### Check correctness ######################################################206out_nodes = ['mbox_loc', 'mbox_conf_flatten']207inp_nodes = [inp.name[:inp.name.find(':')]]208209np.random.seed(2701)210inputData = np.random.standard_normal([1, 3, 300, 300]).astype(np.float32)211212cvNet.setInput(inputData)213cvNet.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)214outDNN = cvNet.forward(out_nodes)215216outTF = sess.run([mbox_loc, mbox_conf_flatten], feed_dict={inp: inputData.transpose(0, 2, 3, 1)})217print('Max diff @ locations: %e' % np.max(np.abs(outDNN[0] - outTF[0])))218print('Max diff @ confidence: %e' % np.max(np.abs(outDNN[1] - outTF[1])))219220# Save a graph221graph_def = sess.graph.as_graph_def()222223# Freeze graph. Replaces variables to constants.224graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, out_nodes)225# Optimize graph. Removes training-only ops, unused nodes.226graph_def = optimize_for_inference_lib.optimize_for_inference(graph_def, inp_nodes, out_nodes, dtype.as_datatype_enum)227# Fuse constant operations.228transforms = ["fold_constants(ignore_errors=True)"]229if args.quantize:230transforms += ["quantize_weights(minimum_size=0)"]231transforms += ["sort_by_execution_order"]232graph_def = TransformGraph(graph_def, inp_nodes, out_nodes, transforms)233234# By default, float16 weights are stored in repeated tensor's field called235# `half_val`. It has type int32 with leading zeros for unused bytes.236# This type is encoded by Variant that means only 7 bits are used for value237# representation but the last one is indicated the end of encoding. This way238# float16 might takes 1 or 2 or 3 bytes depends on value. To improve compression,239# we replace all `half_val` values to `tensor_content` using only 2 bytes for everyone.240for node in graph_def.node:241if 'value' in node.attr:242halfs = node.attr["value"].tensor.half_val243if not node.attr["value"].tensor.tensor_content and halfs:244node.attr["value"].tensor.tensor_content = struct.pack('H' * len(halfs), *halfs)245node.attr["value"].tensor.ClearField('half_val')246247# Serialize248with tf.gfile.FastGFile(args.pb, 'wb') as f:249f.write(graph_def.SerializeToString())250251252################################################################################253# Write a text graph representation254################################################################################255def tensorMsg(values):256msg = 'tensor { dtype: DT_FLOAT tensor_shape { dim { size: %d } }' % len(values)257for value in values:258msg += 'float_val: %f ' % value259return msg + '}'260261# Remove Const nodes and unused attributes.262for i in reversed(range(len(graph_def.node))):263if graph_def.node[i].op in ['Const', 'Dequantize']:264del graph_def.node[i]265for attr in ['T', 'data_format', 'Tshape', 'N', 'Tidx', 'Tdim',266'use_cudnn_on_gpu', 'Index', 'Tperm', 'is_training',267'Tpaddings', 'Tblock_shape', 'Tcrops']:268if attr in graph_def.node[i].attr:269del graph_def.node[i].attr[attr]270271# Append prior box generators272min_sizes = [30, 60, 111, 162, 213, 264]273max_sizes = [60, 111, 162, 213, 264, 315]274steps = [8, 16, 32, 64, 100, 300]275aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]276layers = [conv4_3_norm, fc7, conv6_2_h, conv7_2_h, conv8_2_h, conv9_2_h]277for i in range(6):278priorBox = NodeDef()279priorBox.name = 'PriorBox_%d' % i280priorBox.op = 'PriorBox'281priorBox.input.append(layers[i].name[:layers[i].name.find(':')])282priorBox.input.append(inp_nodes[0]) # data283284text_format.Merge('i: %d' % min_sizes[i], priorBox.attr["min_size"])285text_format.Merge('i: %d' % max_sizes[i], priorBox.attr["max_size"])286text_format.Merge('b: true', priorBox.attr["flip"])287text_format.Merge('b: false', priorBox.attr["clip"])288text_format.Merge(tensorMsg(aspect_ratios[i]), priorBox.attr["aspect_ratio"])289text_format.Merge(tensorMsg([0.1, 0.1, 0.2, 0.2]), priorBox.attr["variance"])290text_format.Merge('f: %f' % steps[i], priorBox.attr["step"])291text_format.Merge('f: 0.5', priorBox.attr["offset"])292graph_def.node.extend([priorBox])293294# Concatenate prior boxes295concat = NodeDef()296concat.name = 'mbox_priorbox'297concat.op = 'ConcatV2'298for i in range(6):299concat.input.append('PriorBox_%d' % i)300concat.input.append('mbox_loc/axis')301graph_def.node.extend([concat])302303# DetectionOutput layer304detectionOut = NodeDef()305detectionOut.name = 'detection_out'306detectionOut.op = 'DetectionOutput'307308detectionOut.input.append('mbox_loc')309detectionOut.input.append('mbox_conf_flatten')310detectionOut.input.append('mbox_priorbox')311312text_format.Merge('i: 2', detectionOut.attr['num_classes'])313text_format.Merge('b: true', detectionOut.attr['share_location'])314text_format.Merge('i: 0', detectionOut.attr['background_label_id'])315text_format.Merge('f: 0.45', detectionOut.attr['nms_threshold'])316text_format.Merge('i: 400', detectionOut.attr['top_k'])317text_format.Merge('s: "CENTER_SIZE"', detectionOut.attr['code_type'])318text_format.Merge('i: 200', detectionOut.attr['keep_top_k'])319text_format.Merge('f: 0.01', detectionOut.attr['confidence_threshold'])320321graph_def.node.extend([detectionOut])322323# Replace L2Normalization subgraph onto a single node.324for i in reversed(range(len(graph_def.node))):325if graph_def.node[i].name in ['conv4_3_norm/l2_normalize/Square',326'conv4_3_norm/l2_normalize/Sum',327'conv4_3_norm/l2_normalize/Maximum',328'conv4_3_norm/l2_normalize/Rsqrt']:329del graph_def.node[i]330for node in graph_def.node:331if node.name == 'conv4_3_norm/l2_normalize':332node.op = 'L2Normalize'333node.input.pop()334node.input.pop()335node.input.append(layer_256_1_relu1.name)336node.input.append('conv4_3_norm/l2_normalize/Sum/reduction_indices')337break338339softmaxShape = NodeDef()340softmaxShape.name = 'reshape_before_softmax'341softmaxShape.op = 'Const'342text_format.Merge(343'tensor {'344' dtype: DT_INT32'345' tensor_shape { dim { size: 3 } }'346' int_val: 0'347' int_val: -1'348' int_val: 2'349'}', softmaxShape.attr["value"])350graph_def.node.extend([softmaxShape])351352for node in graph_def.node:353if node.name == 'mbox_conf_reshape':354node.input[1] = softmaxShape.name355elif node.name == 'mbox_conf_softmax':356text_format.Merge('i: 2', node.attr['axis'])357elif node.name in flattenLayersNames:358node.op = 'Flatten'359inpName = node.input[0]360node.input.pop()361node.input.pop()362node.input.append(inpName)363364tf.train.write_graph(graph_def, "", args.pbtxt, as_text=True)365366367