diff --git a/.gitignore b/.gitignore index 0e57cb0b4c..2450ee8fc5 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ /lcov/ /src /mapfile +/tools/python/__pycache__/ diff --git a/tools/python/convert.py b/tools/python/convert.py new file mode 100644 index 0000000000..662b429066 --- /dev/null +++ b/tools/python/convert.py @@ -0,0 +1,52 @@ +# Copyright (c) 2019 Guo Yejun +# +# This file is part of FFmpeg. +# +# FFmpeg is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# FFmpeg is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with FFmpeg; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# ============================================================================== + +# verified with Python 3.5.2 on Ubuntu 16.04 +import argparse +import os +from convert_from_tensorflow import * + +def get_arguments(): + parser = argparse.ArgumentParser(description='generate native mode model with weights from deep learning model') + parser.add_argument('--outdir', type=str, default='./', help='where to put generated files') + parser.add_argument('--infmt', type=str, default='tensorflow', help='format of the deep learning model') + parser.add_argument('infile', help='path to the deep learning model with weights') + + return parser.parse_args() + +def main(): + args = get_arguments() + + if not os.path.isfile(args.infile): + print('the specified input file %s does not exist' % args.infile) + exit(1) + + if not os.path.exists(args.outdir): + print('create output directory %s' % args.outdir) + os.mkdir(args.outdir) + + basefile = os.path.split(args.infile)[1] + basefile = os.path.splitext(basefile)[0] + outfile = os.path.join(args.outdir, basefile) + '.model' + + if args.infmt == 'tensorflow': + convert_from_tensorflow(args.infile, outfile) + +if __name__ == '__main__': + main() diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py new file mode 100644 index 0000000000..37049e58df --- /dev/null +++ b/tools/python/convert_from_tensorflow.py @@ -0,0 +1,201 @@ +# Copyright (c) 2019 Guo Yejun +# +# This file is part of FFmpeg. +# +# FFmpeg is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# FFmpeg is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with FFmpeg; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# ============================================================================== + +import tensorflow as tf +import numpy as np +import sys, struct + +__all__ = ['convert_from_tensorflow'] + +# as the first step to be compatible with vf_sr, it is not general. +# it will be refined step by step. + +class TFConverter: + def __init__(self, graph_def, nodes, outfile): + self.graph_def = graph_def + self.nodes = nodes + self.outfile = outfile + self.layer_number = 0 + self.output_names = [] + self.name_node_dict = {} + self.edges = {} + self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'LeakyRelu':4} + self.conv_paddings = {'VALID':2, 'SAME':1} + self.converted_nodes = set() + self.op2code = {'Conv2D':1, 'DepthToSpace':2} + + + def dump_for_tensorboard(self): + graph = tf.get_default_graph() + tf.import_graph_def(self.graph_def, name="") + # tensorboard --logdir=/tmp/graph + tf.summary.FileWriter('/tmp/graph', graph) + + + def get_conv2d_params(self, node): + knode = self.name_node_dict[node.input[1]] + bnode = None + activation = 'None' + next = self.edges[node.name][0] + if next.op == 'BiasAdd': + self.converted_nodes.add(next.name) + bnode = self.name_node_dict[next.input[1]] + next = self.edges[next.name][0] + if next.op in self.conv_activations: + self.converted_nodes.add(next.name) + activation = next.op + return knode, bnode, activation + + + def dump_conv2d_to_file(self, node, f): + assert(node.op == 'Conv2D') + self.layer_number = self.layer_number + 1 + self.converted_nodes.add(node.name) + knode, bnode, activation = self.get_conv2d_params(node) + + dilation = node.attr['dilations'].list.i[0] + padding = node.attr['padding'].s + padding = self.conv_paddings[padding.decode("utf-8")] + + ktensor = knode.attr['value'].tensor + filter_height = ktensor.tensor_shape.dim[0].size + filter_width = ktensor.tensor_shape.dim[1].size + in_channels = ktensor.tensor_shape.dim[2].size + out_channels = ktensor.tensor_shape.dim[3].size + kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32) + kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels) + kernel = np.transpose(kernel, [3, 0, 1, 2]) + + np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height], dtype=np.uint32).tofile(f) + kernel.tofile(f) + + btensor = bnode.attr['value'].tensor + if btensor.tensor_shape.dim[0].size == 1: + bias = struct.pack("f", btensor.float_val[0]) + else: + bias = btensor.tensor_content + f.write(bias) + + + def dump_depth2space_to_file(self, node, f): + assert(node.op == 'DepthToSpace') + self.layer_number = self.layer_number + 1 + block_size = node.attr['block_size'].i + np.array([self.op2code[node.op], block_size], dtype=np.uint32).tofile(f) + self.converted_nodes.add(node.name) + + + def generate_layer_number(self): + # in current hard code implementation, the layer number is the first data written to the native model file + # it is not easy to know it at the beginning time in the general converter, so first do a dry run for compatibility + # will be refined later. + with open('/tmp/tmp.model', 'wb') as f: + self.dump_layers_to_file(f) + self.converted_nodes.clear() + + + def dump_layers_to_file(self, f): + for node in self.nodes: + if node.name in self.converted_nodes: + continue + if node.op == 'Conv2D': + self.dump_conv2d_to_file(node, f) + elif node.op == 'DepthToSpace': + self.dump_depth2space_to_file(node, f) + + + def dump_to_file(self): + self.generate_layer_number() + with open(self.outfile, 'wb') as f: + np.array([self.layer_number], dtype=np.uint32).tofile(f) + self.dump_layers_to_file(f) + + + def generate_name_node_dict(self): + for node in self.nodes: + self.name_node_dict[node.name] = node + + + def generate_output_names(self): + used_names = [] + for node in self.nodes: + for input in node.input: + used_names.append(input) + + for node in self.nodes: + if node.name not in used_names: + self.output_names.append(node.name) + + + def remove_identity(self): + id_nodes = [] + id_dict = {} + for node in self.nodes: + if node.op == 'Identity': + name = node.name + input = node.input[0] + id_nodes.append(node) + # do not change the output name + if name in self.output_names: + self.name_node_dict[input].name = name + self.name_node_dict[name] = self.name_node_dict[input] + del self.name_node_dict[input] + else: + id_dict[name] = input + + for idnode in id_nodes: + self.nodes.remove(idnode) + + for node in self.nodes: + for i in range(len(node.input)): + input = node.input[i] + if input in id_dict: + node.input[i] = id_dict[input] + + + def generate_edges(self): + for node in self.nodes: + for input in node.input: + if input in self.edges: + self.edges[input].append(node) + else: + self.edges[input] = [node] + + + def run(self): + self.generate_name_node_dict() + self.generate_output_names() + self.remove_identity() + self.generate_edges() + + #check the graph with tensorboard with human eyes + #self.dump_for_tensorboard() + + self.dump_to_file() + + +def convert_from_tensorflow(infile, outfile): + with open(infile, 'rb') as f: + # read the file in .proto format + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + nodes = graph_def.node + + converter = TFConverter(graph_def, nodes, outfile) + converter.run()