libavfilter: Adds on the fly generation of default DNN models for tensorflow backend instead of storing binary model.

Signed-off-by: Pedro Arthur <bygrandao@gmail.com>
pull/293/merge
Sergey Lavrushkin 7 years ago committed by Pedro Arthur
parent 243ecadad5
commit 4eb63efbda
  1. 12
      libavfilter/dnn_backend_native.c
  2. 271
      libavfilter/dnn_backend_tf.c
  3. 17981
      libavfilter/dnn_espcn.h
  4. 7013
      libavfilter/dnn_srcnn.h
  5. 24
      libavfilter/vf_sr.c

@ -334,17 +334,17 @@ DNNModel* ff_dnn_load_default_model_native(DNNDefaultModel model_type)
switch (model_type){
case DNN_SRCNN:
if (set_up_conv_layer(network->layers + 1, srcnn_conv1_kernel, srcnn_conv1_biases, RELU, 1, 64, 9) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 2, srcnn_conv2_kernel, srcnn_conv2_biases, RELU, 64, 32, 1) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 3, srcnn_conv3_kernel, srcnn_conv3_biases, RELU, 32, 1, 5) != DNN_SUCCESS){
if (set_up_conv_layer(network->layers + 1, srcnn_conv1_kernel, srcnn_conv1_bias, RELU, 1, 64, 9) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 2, srcnn_conv2_kernel, srcnn_conv2_bias, RELU, 64, 32, 1) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 3, srcnn_conv3_kernel, srcnn_conv3_bias, RELU, 32, 1, 5) != DNN_SUCCESS){
ff_dnn_free_model_native(&model);
return NULL;
}
break;
case DNN_ESPCN:
if (set_up_conv_layer(network->layers + 1, espcn_conv1_kernel, espcn_conv1_biases, TANH, 1, 64, 5) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 2, espcn_conv2_kernel, espcn_conv2_biases, TANH, 64, 32, 3) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 3, espcn_conv3_kernel, espcn_conv3_biases, SIGMOID, 32, 4, 3) != DNN_SUCCESS){
if (set_up_conv_layer(network->layers + 1, espcn_conv1_kernel, espcn_conv1_bias, TANH, 1, 64, 5) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 2, espcn_conv2_kernel, espcn_conv2_bias, TANH, 64, 32, 3) != DNN_SUCCESS ||
set_up_conv_layer(network->layers + 3, espcn_conv3_kernel, espcn_conv3_bias, SIGMOID, 32, 4, 3) != DNN_SUCCESS){
ff_dnn_free_model_native(&model);
return NULL;
}

@ -203,52 +203,191 @@ DNNModel* ff_dnn_load_model_tf(const char* model_filename)
return model;
}
DNNModel* ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
static TF_Operation* add_pad_op(TFModel* tf_model, TF_Operation* input_op, int32_t pad)
{
DNNModel* model = NULL;
TFModel* tf_model = NULL;
TF_Buffer* graph_def;
unsigned char* graph_data = NULL;
TF_ImportGraphDefOptions* graph_opts;
TF_OperationDescription* op_desc;
TF_Operation* op;
TF_Tensor* tensor;
TF_Output input;
int32_t* pads;
int64_t pads_shape[] = {4, 2};
op_desc = TF_NewOperation(tf_model->graph, "Const", "pads");
TF_SetAttrType(op_desc, "dtype", TF_INT32);
tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t));
pads = (int32_t*)TF_TensorData(tensor);
pads[0] = 0; pads[1] = 0;
pads[2] = pad; pads[3] = pad;
pads[4] = pad; pads[5] = pad;
pads[6] = 0; pads[7] = 0;
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
graph_def = TF_NewBuffer();
switch (model_type){
case DNN_SRCNN:
graph_data = av_malloc(srcnn_tf_size);
if (!graph_data){
TF_DeleteBuffer(graph_def);
op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad");
input.oper = input_op;
input.index = 0;
TF_AddInput(op_desc, input);
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrType(op_desc, "Tpaddings", TF_INT32);
TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9);
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
return op;
}
static TF_Operation* add_const_op(TFModel* tf_model, const float* values, const int64_t* dims, int dims_len, const char* name)
{
int dim;
TF_OperationDescription* op_desc;
TF_Tensor* tensor;
size_t len;
op_desc = TF_NewOperation(tf_model->graph, "Const", name);
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
len = sizeof(float);
for (dim = 0; dim < dims_len; ++dim){
len *= dims[dim];
}
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, len);
memcpy(TF_TensorData(tensor), values, len);
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
return TF_FinishOperation(op_desc, tf_model->status);
}
static TF_Operation* add_conv_layers(TFModel* tf_model, const float** consts, const int64_t** consts_dims,
const int* consts_dims_len, const char** activations,
TF_Operation* input_op, int layers_num)
{
int i;
TF_OperationDescription* op_desc;
TF_Operation* op;
TF_Operation* transpose_op;
TF_Output input;
int64_t strides[] = {1, 1, 1, 1};
int32_t* transpose_perm;
TF_Tensor* tensor;
int64_t transpose_perm_shape[] = {4};
#define NAME_BUFF_SIZE 256
char name_buffer[NAME_BUFF_SIZE];
op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm");
TF_SetAttrType(op_desc, "dtype", TF_INT32);
tensor = TF_AllocateTensor(TF_INT32, transpose_perm_shape, 1, 4 * sizeof(int32_t));
transpose_perm = (int32_t*)TF_TensorData(tensor);
transpose_perm[0] = 1;
transpose_perm[1] = 2;
transpose_perm[2] = 3;
transpose_perm[3] = 0;
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
transpose_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
input.index = 0;
for (i = 0; i < layers_num; ++i){
snprintf(name_buffer, NAME_BUFF_SIZE, "conv_kernel%d", i);
op = add_const_op(tf_model, consts[i << 1], consts_dims[i << 1], consts_dims_len[i << 1], name_buffer);
if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
return NULL;
}
memcpy(graph_data, srcnn_tf_model, srcnn_tf_size);
graph_def->data = (void*)graph_data;
graph_def->length = srcnn_tf_size;
graph_def->data_deallocator = free_buffer;
break;
case DNN_ESPCN:
graph_data = av_malloc(espcn_tf_size);
if (!graph_data){
TF_DeleteBuffer(graph_def);
snprintf(name_buffer, NAME_BUFF_SIZE, "transpose%d", i);
op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer);
input.oper = op;
TF_AddInput(op_desc, input);
input.oper = transpose_op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrType(op_desc, "Tperm", TF_INT32);
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
snprintf(name_buffer, NAME_BUFF_SIZE, "conv2d%d", i);
op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer);
input.oper = input_op;
TF_AddInput(op_desc, input);
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrIntList(op_desc, "strides", strides, 4);
TF_SetAttrString(op_desc, "padding", "VALID", 5);
input_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
snprintf(name_buffer, NAME_BUFF_SIZE, "conv_biases%d", i);
op = add_const_op(tf_model, consts[(i << 1) + 1], consts_dims[(i << 1) + 1], consts_dims_len[(i << 1) + 1], name_buffer);
if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){
return NULL;
}
snprintf(name_buffer, NAME_BUFF_SIZE, "bias_add%d", i);
op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer);
input.oper = input_op;
TF_AddInput(op_desc, input);
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
input_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
snprintf(name_buffer, NAME_BUFF_SIZE, "activation%d", i);
op_desc = TF_NewOperation(tf_model->graph, activations[i], name_buffer);
input.oper = input_op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
input_op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
return NULL;
}
memcpy(graph_data, espcn_tf_model, espcn_tf_size);
graph_def->data = (void*)graph_data;
graph_def->length = espcn_tf_size;
graph_def->data_deallocator = free_buffer;
break;
default:
TF_DeleteBuffer(graph_def);
return NULL;
}
return input_op;
}
DNNModel* ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
{
DNNModel* model = NULL;
TFModel* tf_model = NULL;
TF_OperationDescription* op_desc;
TF_Operation* op;
TF_Operation* const_ops_buffer[6];
TF_Output input;
int64_t input_shape[] = {1, -1, -1, 1};
input.index = 0;
model = av_malloc(sizeof(DNNModel));
if (!model){
TF_DeleteBuffer(graph_def);
return NULL;
}
tf_model = av_malloc(sizeof(TFModel));
if (!tf_model){
TF_DeleteBuffer(graph_def);
av_freep(&model);
return NULL;
}
@ -258,16 +397,68 @@ DNNModel* ff_dnn_load_default_model_tf(DNNDefaultModel model_type)
tf_model->graph = TF_NewGraph();
tf_model->status = TF_NewStatus();
graph_opts = TF_NewImportGraphDefOptions();
TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status);
TF_DeleteImportGraphDefOptions(graph_opts);
TF_DeleteBuffer(graph_def);
#define CLEANUP_ON_ERROR(tf_model, model) { \
TF_DeleteGraph(tf_model->graph); \
TF_DeleteStatus(tf_model->status); \
av_freep(&tf_model); \
av_freep(&model); \
return NULL; \
}
op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x");
TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
TF_SetAttrShape(op_desc, "shape", input_shape, 4);
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
TF_DeleteGraph(tf_model->graph);
TF_DeleteStatus(tf_model->status);
av_freep(&tf_model);
av_freep(&model);
return NULL;
CLEANUP_ON_ERROR(tf_model, model);
}
switch (model_type){
case DNN_SRCNN:
op = add_pad_op(tf_model, op, 6);
if (!op){
CLEANUP_ON_ERROR(tf_model, model);
}
op = add_conv_layers(tf_model, srcnn_consts,
srcnn_consts_dims, srcnn_consts_dims_len,
srcnn_activations, op, 3);
if (!op){
CLEANUP_ON_ERROR(tf_model, model);
}
break;
case DNN_ESPCN:
op = add_pad_op(tf_model, op, 4);
if (!op){
CLEANUP_ON_ERROR(tf_model, model);
}
op = add_conv_layers(tf_model, espcn_consts,
espcn_consts_dims, espcn_consts_dims_len,
espcn_activations, op, 3);
if (!op){
CLEANUP_ON_ERROR(tf_model, model);
}
op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", "depth_to_space");
input.oper = op;
TF_AddInput(op_desc, input);
TF_SetAttrType(op_desc, "T", TF_FLOAT);
TF_SetAttrInt(op_desc, "block_size", 2);
op = TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
CLEANUP_ON_ERROR(tf_model, model);
}
break;
default:
CLEANUP_ON_ERROR(tf_model, model);
}
op_desc = TF_NewOperation(tf_model->graph, "Identity", "y");
input.oper = op;
TF_AddInput(op_desc, input);
TF_FinishOperation(op_desc, tf_model->status);
if (TF_GetCode(tf_model->status) != TF_OK){
CLEANUP_ON_ERROR(tf_model, model);
}
model->model = (void*)tf_model;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -162,26 +162,26 @@ static int config_props(AVFilterLink* inlink)
switch (inlink->format){
case AV_PIX_FMT_YUV420P:
sws_src_h = (sws_src_h >> 1) + (sws_src_h % 2 != 0 ? 1 : 0);
sws_src_w = (sws_src_w >> 1) + (sws_src_w % 2 != 0 ? 1 : 0);
sws_dst_h = (sws_dst_h >> 1) + (sws_dst_h % 2 != 0 ? 1 : 0);
sws_dst_w = (sws_dst_w >> 1) + (sws_dst_w % 2 != 0 ? 1 : 0);
sws_src_h = AV_CEIL_RSHIFT(sws_src_h, 1);
sws_src_w = AV_CEIL_RSHIFT(sws_src_w, 1);
sws_dst_h = AV_CEIL_RSHIFT(sws_dst_h, 1);
sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 1);
break;
case AV_PIX_FMT_YUV422P:
sws_src_w = (sws_src_w >> 1) + (sws_src_w % 2 != 0 ? 1 : 0);
sws_dst_w = (sws_dst_w >> 1) + (sws_dst_w % 2 != 0 ? 1 : 0);
sws_src_w = AV_CEIL_RSHIFT(sws_src_w, 1);
sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 1);
break;
case AV_PIX_FMT_YUV444P:
break;
case AV_PIX_FMT_YUV410P:
sws_src_h = (sws_src_h >> 2) + (sws_src_h % 4 != 0 ? 1 : 0);
sws_src_w = (sws_src_w >> 2) + (sws_src_w % 4 != 0 ? 1 : 0);
sws_dst_h = (sws_dst_h >> 2) + (sws_dst_h % 4 != 0 ? 1 : 0);
sws_dst_w = (sws_dst_w >> 2) + (sws_dst_w % 4 != 0 ? 1 : 0);
sws_src_h = AV_CEIL_RSHIFT(sws_src_h, 2);
sws_src_w = AV_CEIL_RSHIFT(sws_src_w, 2);
sws_dst_h = AV_CEIL_RSHIFT(sws_dst_h, 2);
sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 2);
break;
case AV_PIX_FMT_YUV411P:
sws_src_w = (sws_src_w >> 2) + (sws_src_w % 4 != 0 ? 1 : 0);
sws_dst_w = (sws_dst_w >> 2) + (sws_dst_w % 4 != 0 ? 1 : 0);
sws_src_w = AV_CEIL_RSHIFT(sws_src_w, 2);
sws_dst_w = AV_CEIL_RSHIFT(sws_dst_w, 2);
break;
default:
av_log(context, AV_LOG_ERROR, "could not create SwsContext for input pixel format");

Loading…
Cancel
Save