mirror of https://github.com/FFmpeg/FFmpeg.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
309 lines
12 KiB
309 lines
12 KiB
/* |
|
* Copyright (C) 2019 Philip Langdale <philipl@overt.org> |
|
* |
|
* This file is part of FFmpeg. |
|
* |
|
* FFmpeg is free software; you can redistribute it and/or |
|
* modify it under the terms of the GNU Lesser General Public |
|
* License as published by the Free Software Foundation; either |
|
* version 2.1 of the License, or (at your option) any later version. |
|
* |
|
* FFmpeg is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
* Lesser General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU Lesser General Public |
|
* License along with FFmpeg; if not, write to the Free Software |
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
*/ |
|
|
|
__device__ static const int coef_lf[2] = { 4309, 213 }; |
|
__device__ static const int coef_hf[3] = { 5570, 3801, 1016 }; |
|
__device__ static const int coef_sp[2] = { 5077, 981 }; |
|
|
|
template<typename T> |
|
__inline__ __device__ T max3(T a, T b, T c) |
|
{ |
|
T x = max(a, b); |
|
return max(x, c); |
|
} |
|
|
|
template<typename T> |
|
__inline__ __device__ T min3(T a, T b, T c) |
|
{ |
|
T x = min(a, b); |
|
return min(x, c); |
|
} |
|
|
|
template<typename T> |
|
__inline__ __device__ T clip(T a, T min, T max) |
|
{ |
|
if (a < min) { |
|
return min; |
|
} else if (a > max) { |
|
return max; |
|
} else { |
|
return a; |
|
} |
|
} |
|
|
|
template<typename T> |
|
__inline__ __device__ T filter_intra(T cur_prefs3, T cur_prefs, |
|
T cur_mrefs, T cur_mrefs3, |
|
int clip_max) |
|
{ |
|
int final = (coef_sp[0] * (cur_mrefs + cur_prefs) - |
|
coef_sp[1] * (cur_mrefs3 + cur_prefs3)) >> 13; |
|
return clip(final, 0, clip_max); |
|
} |
|
|
|
template<typename T> |
|
__inline__ __device__ T filter(T cur_prefs3, T cur_prefs, T cur_mrefs, T cur_mrefs3, |
|
T prev2_prefs4, T prev2_prefs2, T prev2_0, T prev2_mrefs2, T prev2_mrefs4, |
|
T prev_prefs, T prev_mrefs, T next_prefs, T next_mrefs, |
|
T next2_prefs4, T next2_prefs2, T next2_0, T next2_mrefs2, T next2_mrefs4, |
|
int clip_max) |
|
{ |
|
T final; |
|
|
|
int c = cur_mrefs; |
|
int d = (prev2_0 + next2_0) >> 1; |
|
int e = cur_prefs; |
|
|
|
int temporal_diff0 = abs(prev2_0 - next2_0); |
|
int temporal_diff1 = (abs(prev_mrefs - c) + abs(prev_prefs - e)) >> 1; |
|
int temporal_diff2 = (abs(next_mrefs - c) + abs(next_prefs - e)) >> 1; |
|
int diff = max3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); |
|
|
|
if (!diff) { |
|
final = d; |
|
} else { |
|
int b = ((prev2_mrefs2 + next2_mrefs2) >> 1) - c; |
|
int f = ((prev2_prefs2 + next2_prefs2) >> 1) - e; |
|
int dc = d - c; |
|
int de = d - e; |
|
int mmax = max3(de, dc, min(b, f)); |
|
int mmin = min3(de, dc, max(b, f)); |
|
diff = max3(diff, mmin, -mmax); |
|
|
|
int interpol; |
|
if (abs(c - e) > temporal_diff0) { |
|
interpol = (((coef_hf[0] * (prev2_0 + next2_0) |
|
- coef_hf[1] * (prev2_mrefs2 + next2_mrefs2 + prev2_prefs2 + next2_prefs2) |
|
+ coef_hf[2] * (prev2_mrefs4 + next2_mrefs4 + prev2_prefs4 + next2_mrefs4)) >> 2) |
|
+ coef_lf[0] * (c + e) - coef_lf[1] * (cur_mrefs3 + cur_prefs3)) >> 13; |
|
} else { |
|
interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur_mrefs3 + cur_prefs3)) >> 13; |
|
} |
|
|
|
if (interpol > d + diff) { |
|
interpol = d + diff; |
|
} else if (interpol < d - diff) { |
|
interpol = d - diff; |
|
} |
|
final = clip(interpol, 0, clip_max); |
|
} |
|
|
|
return final; |
|
} |
|
|
|
template<typename T> |
|
__inline__ __device__ void bwdif_single(T *dst, |
|
cudaTextureObject_t prev, |
|
cudaTextureObject_t cur, |
|
cudaTextureObject_t next, |
|
int dst_width, int dst_height, int dst_pitch, |
|
int src_width, int src_height, |
|
int parity, int tff, |
|
int is_field_end, int clip_max) |
|
{ |
|
// Identify location |
|
int xo = blockIdx.x * blockDim.x + threadIdx.x; |
|
int yo = blockIdx.y * blockDim.y + threadIdx.y; |
|
|
|
if (xo >= dst_width || yo >= dst_height) { |
|
return; |
|
} |
|
|
|
// Don't modify the primary field |
|
if (yo % 2 == parity) { |
|
dst[yo*dst_pitch+xo] = tex2D<T>(cur, xo, yo); |
|
return; |
|
} |
|
|
|
T cur_prefs3 = tex2D<T>(cur, xo, yo + 3); |
|
T cur_prefs = tex2D<T>(cur, xo, yo + 1); |
|
T cur_mrefs = tex2D<T>(cur, xo, yo - 1); |
|
T cur_mrefs3 = tex2D<T>(cur, xo, yo - 3); |
|
|
|
if (is_field_end) { |
|
dst[yo*dst_pitch+xo] = |
|
filter_intra(cur_prefs3, cur_prefs, cur_mrefs, cur_mrefs3, clip_max); |
|
return; |
|
} |
|
|
|
// Calculate temporal prediction |
|
int is_second_field = !(parity ^ tff); |
|
|
|
cudaTextureObject_t prev2 = prev; |
|
cudaTextureObject_t prev1 = is_second_field ? cur : prev; |
|
cudaTextureObject_t next1 = is_second_field ? next : cur; |
|
cudaTextureObject_t next2 = next; |
|
|
|
T prev2_prefs4 = tex2D<T>(prev2, xo, yo + 4); |
|
T prev2_prefs2 = tex2D<T>(prev2, xo, yo + 2); |
|
T prev2_0 = tex2D<T>(prev2, xo, yo + 0); |
|
T prev2_mrefs2 = tex2D<T>(prev2, xo, yo - 2); |
|
T prev2_mrefs4 = tex2D<T>(prev2, xo, yo - 4); |
|
T prev_prefs = tex2D<T>(prev1, xo, yo + 1); |
|
T prev_mrefs = tex2D<T>(prev1, xo, yo - 1); |
|
T next_prefs = tex2D<T>(next1, xo, yo + 1); |
|
T next_mrefs = tex2D<T>(next1, xo, yo - 1); |
|
T next2_prefs4 = tex2D<T>(next2, xo, yo + 4); |
|
T next2_prefs2 = tex2D<T>(next2, xo, yo + 2); |
|
T next2_0 = tex2D<T>(next2, xo, yo + 0); |
|
T next2_mrefs2 = tex2D<T>(next2, xo, yo - 2); |
|
T next2_mrefs4 = tex2D<T>(next2, xo, yo - 4); |
|
|
|
dst[yo*dst_pitch+xo] = filter(cur_prefs3, cur_prefs, cur_mrefs, cur_mrefs3, |
|
prev2_prefs4, prev2_prefs2, prev2_0, prev2_mrefs2, prev2_mrefs4, |
|
prev_prefs, prev_mrefs, next_prefs, next_mrefs, |
|
next2_prefs4, next2_prefs2, next2_0, next2_mrefs2, next2_mrefs4, |
|
clip_max); |
|
} |
|
|
|
template <typename T> |
|
__inline__ __device__ void bwdif_double(T *dst, |
|
cudaTextureObject_t prev, |
|
cudaTextureObject_t cur, |
|
cudaTextureObject_t next, |
|
int dst_width, int dst_height, int dst_pitch, |
|
int src_width, int src_height, |
|
int parity, int tff, |
|
int is_field_end, int clip_max) |
|
{ |
|
int xo = blockIdx.x * blockDim.x + threadIdx.x; |
|
int yo = blockIdx.y * blockDim.y + threadIdx.y; |
|
|
|
if (xo >= dst_width || yo >= dst_height) { |
|
return; |
|
} |
|
|
|
if (yo % 2 == parity) { |
|
// Don't modify the primary field |
|
dst[yo*dst_pitch+xo] = tex2D<T>(cur, xo, yo); |
|
return; |
|
} |
|
|
|
T cur_prefs3 = tex2D<T>(cur, xo, yo + 3); |
|
T cur_prefs = tex2D<T>(cur, xo, yo + 1); |
|
T cur_mrefs = tex2D<T>(cur, xo, yo - 1); |
|
T cur_mrefs3 = tex2D<T>(cur, xo, yo - 3); |
|
|
|
if (is_field_end) { |
|
T final; |
|
final.x = filter_intra(cur_prefs3.x, cur_prefs.x, cur_mrefs.x, cur_mrefs3.x, |
|
clip_max); |
|
final.y = filter_intra(cur_prefs3.y, cur_prefs.y, cur_mrefs.y, cur_mrefs3.y, |
|
clip_max); |
|
dst[yo*dst_pitch+xo] = final; |
|
return; |
|
} |
|
|
|
int is_second_field = !(parity ^ tff); |
|
|
|
cudaTextureObject_t prev2 = prev; |
|
cudaTextureObject_t prev1 = is_second_field ? cur : prev; |
|
cudaTextureObject_t next1 = is_second_field ? next : cur; |
|
cudaTextureObject_t next2 = next; |
|
|
|
T prev2_prefs4 = tex2D<T>(prev2, xo, yo + 4); |
|
T prev2_prefs2 = tex2D<T>(prev2, xo, yo + 2); |
|
T prev2_0 = tex2D<T>(prev2, xo, yo + 0); |
|
T prev2_mrefs2 = tex2D<T>(prev2, xo, yo - 2); |
|
T prev2_mrefs4 = tex2D<T>(prev2, xo, yo - 4); |
|
T prev_prefs = tex2D<T>(prev1, xo, yo + 1); |
|
T prev_mrefs = tex2D<T>(prev1, xo, yo - 1); |
|
T next_prefs = tex2D<T>(next1, xo, yo + 1); |
|
T next_mrefs = tex2D<T>(next1, xo, yo - 1); |
|
T next2_prefs4 = tex2D<T>(next2, xo, yo + 4); |
|
T next2_prefs2 = tex2D<T>(next2, xo, yo + 2); |
|
T next2_0 = tex2D<T>(next2, xo, yo + 0); |
|
T next2_mrefs2 = tex2D<T>(next2, xo, yo - 2); |
|
T next2_mrefs4 = tex2D<T>(next2, xo, yo - 4); |
|
|
|
T final; |
|
final.x = filter(cur_prefs3.x, cur_prefs.x, cur_mrefs.x, cur_mrefs3.x, |
|
prev2_prefs4.x, prev2_prefs2.x, prev2_0.x, prev2_mrefs2.x, prev2_mrefs4.x, |
|
prev_prefs.x, prev_mrefs.x, next_prefs.x, next_mrefs.x, |
|
next2_prefs4.x, next2_prefs2.x, next2_0.x, next2_mrefs2.x, next2_mrefs4.x, |
|
clip_max); |
|
final.y = filter(cur_prefs3.y, cur_prefs.y, cur_mrefs.y, cur_mrefs3.y, |
|
prev2_prefs4.y, prev2_prefs2.y, prev2_0.y, prev2_mrefs2.y, prev2_mrefs4.y, |
|
prev_prefs.y, prev_mrefs.y, next_prefs.y, next_mrefs.y, |
|
next2_prefs4.y, next2_prefs2.y, next2_0.y, next2_mrefs2.y, next2_mrefs4.y, |
|
clip_max); |
|
|
|
dst[yo*dst_pitch+xo] = final; |
|
} |
|
|
|
extern "C" { |
|
|
|
__global__ void bwdif_uchar(unsigned char *dst, |
|
cudaTextureObject_t prev, |
|
cudaTextureObject_t cur, |
|
cudaTextureObject_t next, |
|
int dst_width, int dst_height, int dst_pitch, |
|
int src_width, int src_height, |
|
int parity, int tff, int is_field_end, int clip_max) |
|
{ |
|
bwdif_single(dst, prev, cur, next, |
|
dst_width, dst_height, dst_pitch, |
|
src_width, src_height, |
|
parity, tff, is_field_end, clip_max); |
|
} |
|
|
|
__global__ void bwdif_ushort(unsigned short *dst, |
|
cudaTextureObject_t prev, |
|
cudaTextureObject_t cur, |
|
cudaTextureObject_t next, |
|
int dst_width, int dst_height, int dst_pitch, |
|
int src_width, int src_height, |
|
int parity, int tff, int is_field_end, int clip_max) |
|
{ |
|
bwdif_single(dst, prev, cur, next, |
|
dst_width, dst_height, dst_pitch, |
|
src_width, src_height, |
|
parity, tff, is_field_end, clip_max); |
|
} |
|
|
|
__global__ void bwdif_uchar2(uchar2 *dst, |
|
cudaTextureObject_t prev, |
|
cudaTextureObject_t cur, |
|
cudaTextureObject_t next, |
|
int dst_width, int dst_height, int dst_pitch, |
|
int src_width, int src_height, |
|
int parity, int tff, int is_field_end, int clip_max) |
|
{ |
|
bwdif_double(dst, prev, cur, next, |
|
dst_width, dst_height, dst_pitch, |
|
src_width, src_height, |
|
parity, tff, is_field_end, clip_max); |
|
} |
|
|
|
__global__ void bwdif_ushort2(ushort2 *dst, |
|
cudaTextureObject_t prev, |
|
cudaTextureObject_t cur, |
|
cudaTextureObject_t next, |
|
int dst_width, int dst_height, int dst_pitch, |
|
int src_width, int src_height, |
|
int parity, int tff, int is_field_end, int clip_max) |
|
{ |
|
bwdif_double(dst, prev, cur, next, |
|
dst_width, dst_height, dst_pitch, |
|
src_width, src_height, |
|
parity, tff, is_field_end, clip_max); |
|
} |
|
|
|
} /* extern "C" */
|
|
|