使用Tensorflow对模型进行量化

本文旨在将迁移学习训练好的模型基于tensorflow工具进行量化。
环境配置及迁移学习部分可参考博文[https://www.cnblogs.com/hayley111/p/12887853.html]。

首先使用如下workflow理解模型部署的过程，本文主要描述的是quant这一步。

1. 环境准备：

安装bazel
bazel是一个开源的构造和测试工具，在EIQ中指定用tf配套版本的bazel进行构建。参照如下官方指导链接
[https://docs.bazel.build/versions/3.2.0/install-ubuntu.html#step-1-add-bazel-distribution-uri-as-a-package-source]
注意这里使用oracle的JDK，在官方指南中安装open JDK的部分替换参考如下博文安装oracle JDK：
[https://www.cnblogs.com/hayley111/p/13024148.html]
安装完毕后使用我们将使用bazel对已完成训练的模型进行构建。
在tensorflow文件夹下执行

bazel build tensorflow/python/tools:freeze_graph

出现以下信息表示bazel-bin编译成功：

2.使用transform_graph tool，对模型量化参数进行配置：

使用bazel对已完成训练的model进行配置，参考EIQ指南：

bazel build tensorflow/tools/graph_transforms:transform_graph

这一步可能会需要几分钟，出现如下表示信息表示成功：

接下来对input和output参数进行配置，参考指南，路径设置为自己存放model的路径：

bazel-bin/tensorflow/tools/graph_transforms/transform_graph 
--in_graph="frozen_inference_graph.pb" 
--out_graph="frozen_inference_graph_ssd_part.pb" 
--inputs="Preprocessor/sub" 
--outputs="concat,concat_1" 
--transforms='strip_unused_nodes(type=float, shape="1,300,300,3") remove_nodes(op=Identity,op=CheckNumerics) fold_constants(ignore_errors=true)'

出现以下信息表示配置完成：

3.使用tf toco对模型量化进行优化配置：

这里使用tensorflow lite工具，相关材料详见tf官网.
注意The toco target has been moved from //tensorflow/contrib/lite/toco to //tensorflow/lite/toco, 根据tf版本确认好你的toco文件夹位置。

bazel build tensorflow/lite/toco:toco

bazel run -c opt //tensorflow/lite/toco:toco --  
--input_file=$/mnt/d/tensorflow-1.14.0/frozen_inference_graph_ssd_part.pb 
--input_format=TENSORFLOW_GRAPHDEF  
--output_format=TENSORFLOW_GRAPHDEF 
--output_file=frozen_inference_graph_ssd_part_float.pb 
--input_arrays=Preprocessor/sub  
--output_arrays=concat,concat_1 
--drop_control_dependency

执行成功出现以下信息：

4.最后使用eiq的脚本完成模型的量化。

首先完成环境配置：

export PYTHONPATH=$PYTHONPATH:/your/path/to/s32v234_sdk/tools

将以上步骤中得到的frozen_inference_graph.pb和frozen_inference_graph_ssd_part_float.pb放入对应的文件夹中：
cp frozen_inference_graph.pb your_path/s32v234_sdk/tools/eiq_auto_data/models/tf/mssdv2deployment

EIQ已经写好了量化的py脚本。按照eIQ Auto User Guide P12中的脚本进行量化：

cd eiq_auto_data/models/tf/mssdv2/workspace
python quantize_graph.py

执行成功返回如下信息：

回到eiq_auto_data/models/tf/mssdv2/deployment下能够看到量化的模型已经生成：

实际上NXP根据自己的芯片特性，使用tf的lite工具完成了量化脚本的编写（即本步用到的quantize_graph）。
下面看看这个脚本都做了哪些工作:

import os
import copy
import math
import numpy as np 
from eiq_auto.analyze import runner 
import tensorflow as tf
from .quantize import Quantize, minmax_scale, MIN_SUFFIX, MAX_SUFFIX
from tensorflow.core.framework import attr_value_pb2
from tensorflow.core.framework import graph_pb2
from tensorflow.core.framework import node_def_pb2
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import tensor_util
from abc import ABC, abstractmethod

OPS = ['Conv2D', 'DepthwiseConv2dNative', 'Relu6', 'BiasAdd', 'Relu', 'FusedBatchNorm', 'Add', 'MaxPool', 'AvgPool','BatchNormWithGlobalNormalization', 'MatMul', 'Conv2DBackpropInput', 'Mul']

def create_node(op, name, inputs):
    new_node = node_def_pb2.NodeDef()
    new_node.op = op
    new_node.name = name
    for input_name in inputs:
        new_node.input.extend([input_name])
    return new_node

def create_constant_node(name, value, dtype, shape=None):
    node = create_node("Const", name, [])
    set_attr_dtype(node, "dtype", dtype)
    set_attr_tensor(node, "value", value, dtype, shape)
    return node

def set_attr_bool(node, key, value):
    try:
        node.attr[key].CopyFrom(attr_value_pb2.AttrValue(b=value))
    except KeyError:
        pass

def set_attr_int(node, key, value):
    try:
        node.attr[key].CopyFrom(attr_value_pb2.AttrValue(i=value))
    except KeyError:
        pass

def set_attr_tensor(node, key, value, dtype, shape=None):
    try:
        node.attr[key].CopyFrom(
            attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto(
                value, dtype=dtype, shape=shape)))
    except KeyError:
        pass

def set_attr_dtype(node, key, value):
    try:
        node.attr[key].CopyFrom(
            attr_value_pb2.AttrValue(type=value.as_datatype_enum))
    except KeyError:
        pass

class TfNodeQuantize(ABC):
  @abstractmethod
  def match(self, node):
    pass

  @abstractmethod
  def get_minmax(self, node, graph, graph_def):
    pass

class TfConcatQuantize(TfNodeQuantize):
  def match(self, node):
    if node.op == 'ConcatV2':
      return True 
    return False

  def get_minmax(self, node, graph, graph_def):
    res = []
    op = graph.get_operation_by_name(node.name)
    for input_tensor in op.inputs:
      if input_tensor.op.type == 'Const' and 'axis' not in input_tensor.op.name:
        const_node = next((n for n in graph_def.node if n.name == input_tensor.op.name), None)
        res.append((const_node, -1, 0))
    return res

class TfBoxPredictorQuantize(TfNodeQuantize):
  def match(self, node):
    if 'BoxPredictor' in node.name and node.op in OPS:
      return True 
    return False

  def get_minmax(self, node, graph, graph_def):
    res = []
    res.append((node, 0, 16))
    return res

CustomNodeQuantize = [] 
CustomNodeQuantize.append(TfConcatQuantize())
CustomNodeQuantize.append(TfBoxPredictorQuantize())

class TfQuantize(Quantize):
  def __init__(self, model_file):
    super(TfQuantize, self).__init__(model_file)

  def annotate_minmax(self, input_data, **kwargs):
    helper_graph = runner.make_res_gen(self.model_file)

    graph_def = helper_graph.get_runner().graph_def
    graph = helper_graph.get_runner().graph
 
    # nodes to be monitored
    probe_points = [] 
    tempBN = None
    for node in graph_def.node:
      if node.op in OPS:
        #add fake quant node after fusedbatchnorm if there is no relu that follows
        if node.op == "FusedBatchNorm":
          tempBN = node
          continue
        if tempBN != None and node.op != "Relu" and node.op != "Relu6":
          probe_points.append(tempBN.name)
        if node.op == "Relu" or node.op == "Relu6":
          tempBN = None
        
        probe_points.append(node.name)
    helper_graph.set_intermediate_result_probes(probe_points)
    
    # run inference
    raw_results = helper_graph.run(input_data, **kwargs)

    # create new model adding FakeQuant nodes
    self.output_graph_def = graph_pb2.GraphDef()
    node_added = []
    for node in graph_def.node:
      # special cases
      for c in CustomNodeQuantize:
        if c.match(node):
          for tup in c.get_minmax(node, graph, graph_def):
            node_added.append(tup[0].name)
            self._create_quant_node(*tup)

      # default case
      if node.name in probe_points and node.name not in node_added:
        node_added.append(node.name)
        calc_result = raw_results[helper_graph.get_output_names().index(node.name + ':0')]
        min_value = np.min(calc_result)
        max_value = np.max(calc_result)

        self._create_quant_node(node, min_value, max_value)

    for node in graph_def.node:
      if node.name not in node_added:
        output_node = node_def_pb2.NodeDef()
        output_node.CopyFrom(node)
        self.output_graph_def.node.extend([output_node])

    # return original output results
    return raw_results[:1]

  def _create_quant_node(self, node, min_value, max_value):
    original_node = node_def_pb2.NodeDef()
    original_node.CopyFrom(node)
    original_node.name = node.name + "_original"

    min_name = node.name + MIN_SUFFIX 
    max_name = node.name + MAX_SUFFIX 

    min_value, max_value = minmax_scale(min_value, max_value)
    amplitude = max(abs(min_value), abs(max_value))
    amplitude = pow(2, math.ceil(math.log(amplitude, 2)))
    min_value = -amplitude
    max_value = amplitude-1

    if 'dtype' in node.attr:
      datatype = dtypes.as_dtype(node.attr["dtype"].type)
    else:
      datatype = dtypes.as_dtype(node.attr["T"].type)

    min_const_node = create_constant_node(min_name, min_value, datatype, None)
    max_const_node = create_constant_node(max_name, max_value, datatype, None)
    # Add a downsteam node to attach min/max quant information.
    downstream_node = create_node("FakeQuantWithMinMaxVars", node.name, [
      original_node.name, min_const_node.name,
      max_const_node.name ])
            
    set_attr_int(downstream_node, "num_bits", 8)
    set_attr_bool(downstream_node, "narrow_range", False)
                    
    self.output_graph_def.node.extend([original_node, min_const_node, max_const_node, downstream_node])
    

  def export_model(self, dir_path = os.curdir, basename = None):
    if not os.path.isdir(dir_path):
      print(dir_path + " does not exist!\n")
      dir_path = os.curdir 
    if basename == None:
      basename = basename 
    model_path = dir_path + os.sep + basename  + "_quant.pb"
    with tf.gfile.GFile(model_path, "wb") as f:
        f.write(self.output_graph_def.SerializeToString())
    print("Model exported: " + model_path)