|
|
|
@ -248,39 +248,38 @@ convolve_simd( |
|
|
|
|
|
|
|
|
|
int curr_y = or * STRIDE_Y; |
|
|
|
|
int curr_x = oc * STRIDE_X + lid; |
|
|
|
|
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 |
|
|
|
|
int saved_y = curr_y; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
int in_addr = input_batch_offset |
|
|
|
|
+ (curr_y - INPUT_PAD_H) * INPUT_WIDTH // y tile offset |
|
|
|
|
+ curr_x - INPUT_PAD_W; // x tile offset |
|
|
|
|
|
|
|
|
|
const int in_limit = (get_global_size(2) / ALIGNED_NUM_FILTERS) * TOTAL_INPUT_DEPTH_SIZE * INPUT_PITCH - 1; |
|
|
|
|
|
|
|
|
|
Dtype in_buf[INVEC_SIZE]; |
|
|
|
|
|
|
|
|
|
for(int kd = 0; kd < INPUT_DEPTH; kd++) |
|
|
|
|
{ |
|
|
|
|
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 |
|
|
|
|
const bool cx_out_of_range = !(curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W); |
|
|
|
|
int in_offset = in_addr; |
|
|
|
|
__attribute__((opencl_unroll_hint(INVEC_SIZE))) |
|
|
|
|
for (int reg = 0; reg < INVEC_SIZE; reg++) |
|
|
|
|
for (int reg = 0; reg < INVEC_SIZE; reg++, in_offset += INPUT_WIDTH) |
|
|
|
|
{ |
|
|
|
|
in_buf[reg] = inputs[in_offset]; |
|
|
|
|
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 |
|
|
|
|
if (!(curr_y >= INPUT_PAD_H && curr_y < INPUT_HEIGHT + INPUT_PAD_H && |
|
|
|
|
curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W)) |
|
|
|
|
{ |
|
|
|
|
in_buf[reg] = 0; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
curr_y += 1; |
|
|
|
|
in_offset += INPUT_WIDTH; |
|
|
|
|
Dtype input = inputs[clamp(in_offset, 0, in_limit)]; |
|
|
|
|
int cy = curr_y + reg; |
|
|
|
|
in_buf[reg] = (cx_out_of_range || cy < INPUT_PAD_H || cy >= INPUT_HEIGHT + INPUT_PAD_H) ? 0 : input; |
|
|
|
|
} |
|
|
|
|
#else |
|
|
|
|
int in_offset = in_addr; |
|
|
|
|
__attribute__((opencl_unroll_hint(INVEC_SIZE))) |
|
|
|
|
for (int reg = 0; reg < INVEC_SIZE; reg++, in_offset += INPUT_WIDTH) |
|
|
|
|
{ |
|
|
|
|
in_buf[reg] = inputs[min(in_offset, in_limit)]; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
in_addr += INPUT_PITCH; |
|
|
|
|
|
|
|
|
|
#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 |
|
|
|
|
curr_y = saved_y; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
Dtype weight_buf[WEIGHT_PREF]; |
|
|
|
|
int w_idx=0; |
|
|
|
|
|
|
|
|
|