|
|
|
@ -33,12 +33,11 @@ typedef struct thread_common_param{ |
|
|
|
|
const void *parameters; |
|
|
|
|
NativeContext *ctx; |
|
|
|
|
float *output_data; |
|
|
|
|
int thread_num; |
|
|
|
|
} thread_common_param; |
|
|
|
|
|
|
|
|
|
typedef struct thread_param{ |
|
|
|
|
thread_common_param *thread_common_param; |
|
|
|
|
int thread_index; |
|
|
|
|
int thread_start, thread_end; |
|
|
|
|
} thread_param; |
|
|
|
|
|
|
|
|
|
int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num) |
|
|
|
@ -125,16 +124,12 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg) |
|
|
|
|
int filter_size = conv_params->kernel_size * filter_linesize; |
|
|
|
|
int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0; |
|
|
|
|
|
|
|
|
|
int thread_stride = (height - pad_size * 2) / thread_common_param->thread_num; |
|
|
|
|
int thread_start = thread_stride * thread_param->thread_index + pad_size; |
|
|
|
|
int thread_end = (thread_param->thread_index == thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start + thread_stride); |
|
|
|
|
|
|
|
|
|
float *output = thread_common_param->output_data; |
|
|
|
|
output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_start - pad_size); |
|
|
|
|
output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_param->thread_start - pad_size); |
|
|
|
|
|
|
|
|
|
av_assert0(channel == conv_params->input_num); |
|
|
|
|
|
|
|
|
|
for (int y = thread_start; y < thread_end; ++y) { |
|
|
|
|
for (int y = thread_param->thread_start; y < thread_param->thread_end; ++y) { |
|
|
|
|
for (int x = pad_size; x < width - pad_size; ++x) { |
|
|
|
|
for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) { |
|
|
|
|
if (conv_params->has_bias) |
|
|
|
@ -193,16 +188,19 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ |
|
|
|
|
? (av_cpu_count() + 1) : (ctx->options.conv2d_threads); |
|
|
|
|
#if HAVE_PTHREAD_CANCEL |
|
|
|
|
pthread_t *thread_id = av_malloc(thread_num * sizeof(pthread_t)); |
|
|
|
|
int thread_stride; |
|
|
|
|
#endif |
|
|
|
|
thread_param **thread_param = av_malloc(thread_num * sizeof(*thread_param)); |
|
|
|
|
thread_common_param thread_common_param; |
|
|
|
|
const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(parameters); |
|
|
|
|
int height = operands[input_operand_indexes[0]].dims[1]; |
|
|
|
|
int width = operands[input_operand_indexes[0]].dims[2]; |
|
|
|
|
int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0; |
|
|
|
|
DnnOperand *output_operand = &operands[output_operand_index]; |
|
|
|
|
|
|
|
|
|
output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0]; |
|
|
|
|
output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] - pad_size * 2; |
|
|
|
|
output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] - pad_size * 2; |
|
|
|
|
output_operand->dims[1] = height - pad_size * 2; |
|
|
|
|
output_operand->dims[2] = width - pad_size * 2; |
|
|
|
|
output_operand->dims[3] = conv_params->output_num; |
|
|
|
|
output_operand->data_type = operands[input_operand_indexes[0]].data_type; |
|
|
|
|
output_operand->length = calculate_operand_data_length(output_operand); |
|
|
|
@ -223,13 +221,13 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ |
|
|
|
|
thread_common_param.ctx = ctx; |
|
|
|
|
|
|
|
|
|
#if HAVE_PTHREAD_CANCEL |
|
|
|
|
thread_common_param.thread_num = thread_num; |
|
|
|
|
|
|
|
|
|
thread_stride = (height - pad_size * 2) / thread_num; |
|
|
|
|
//create threads
|
|
|
|
|
for (int i = 0; i < thread_num; i++){ |
|
|
|
|
thread_param[i] = av_malloc(sizeof(**thread_param)); |
|
|
|
|
thread_param[i]->thread_common_param = &thread_common_param; |
|
|
|
|
thread_param[i]->thread_index = i; |
|
|
|
|
thread_param[i]->thread_start = thread_stride * i + pad_size; |
|
|
|
|
thread_param[i]->thread_end = (i == thread_num - 1) ? (height - pad_size) : (thread_param[i]->thread_start + thread_stride); |
|
|
|
|
pthread_create(&thread_id[i], NULL, dnn_execute_layer_conv2d_thread, (void *)thread_param[i]); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -245,10 +243,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ |
|
|
|
|
av_free(thread_param[i]); |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
thread_common_param.thread_num = 1; |
|
|
|
|
thread_param[0] = av_malloc(sizeof(thread_param)); |
|
|
|
|
thread_param[0] = av_malloc(sizeof(**thread_param)); |
|
|
|
|
thread_param[0]->thread_common_param = &thread_common_param; |
|
|
|
|
thread_param[0]->thread_index = 0; |
|
|
|
|
thread_param[0]->thread_start = 0; |
|
|
|
|
thread_param[0]->thread_end = height - pad_size; |
|
|
|
|
dnn_execute_layer_conv2d_thread((void *)thread_param[0]); |
|
|
|
|
av_free(thread_param[0]); |
|
|
|
|
#endif |
|
|
|
|