diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c
index 35f4ac5369..77a0b2e2fb 100644
--- a/libavfilter/af_silenceremove.c
+++ b/libavfilter/af_silenceremove.c
@@ -69,6 +69,8 @@ typedef struct SilenceRemoveContext {
     int64_t stop_silence_opt;
     int stop_mode;
 
+    int64_t window_duration_opt;
+
     AVFrame *start_holdoff;
     AVFrame *start_silence_hold;
     size_t start_holdoff_offset;
@@ -85,10 +87,9 @@ typedef struct SilenceRemoveContext {
     size_t stop_silence_end;
     int    stop_found_periods;
 
-    double window_ratio;
     AVFrame *window;
     int window_offset;
-    int window_size;
+    int64_t window_duration;
     double sum;
 
     int restart;
@@ -120,7 +121,7 @@ static const AVOption silenceremove_options[] = {
     { "detection",       "set how silence is detected",                        OFFSET(detection),           AV_OPT_TYPE_INT,      {.i64=D_RMS}, D_PEAK,D_RMS, AF, "detection" },
     {   "peak",          "use absolute values of samples",                     0,                           AV_OPT_TYPE_CONST,    {.i64=D_PEAK},0,         0, AF, "detection" },
     {   "rms",           "use squared values of samples",                      0,                           AV_OPT_TYPE_CONST,    {.i64=D_RMS}, 0,         0, AF, "detection" },
-    { "window",          "set duration of window in seconds",                  OFFSET(window_ratio),        AV_OPT_TYPE_DOUBLE,   {.dbl=0.02},  0,        10, AF },
+    { "window",          "set duration of window for silence detection",       OFFSET(window_duration_opt), AV_OPT_TYPE_DURATION, {.i64=20000}, 0, 100000000, AF },
     { NULL }
 };
 
@@ -178,7 +179,7 @@ static double compute_peak_double(SilenceRemoveContext *s, AVFrame *frame, int c
     new_sum -= wsample;
     new_sum += fabs(sample);
 
-    return new_sum / s->window_size;
+    return new_sum / s->window_duration;
 }
 
 static void update_peak_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
@@ -205,7 +206,7 @@ static double compute_peak_float(SilenceRemoveContext *s, AVFrame *frame, int ch
     new_sum -= wsample;
     new_sum += fabsf(sample);
 
-    return new_sum / s->window_size;
+    return new_sum / s->window_duration;
 }
 
 static void update_peak_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
@@ -232,7 +233,7 @@ static double compute_rms_double(SilenceRemoveContext *s, AVFrame *frame, int ch
     new_sum -= wsample;
     new_sum += sample * sample;
 
-    return sqrt(new_sum / s->window_size);
+    return sqrt(new_sum / s->window_duration);
 }
 
 static void update_rms_double(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
@@ -259,7 +260,7 @@ static double compute_rms_float(SilenceRemoveContext *s, AVFrame *frame, int ch,
     new_sum -= wsample;
     new_sum += sample * sample;
 
-    return sqrtf(new_sum / s->window_size);
+    return sqrtf(new_sum / s->window_duration);
 }
 
 static void update_rms_float(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
@@ -286,7 +287,7 @@ static double compute_peak_doublep(SilenceRemoveContext *s, AVFrame *frame, int
     new_sum -= wsample;
     new_sum += fabs(sample);
 
-    return new_sum / s->window_size;
+    return new_sum / s->window_duration;
 }
 
 static void update_peak_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
@@ -313,7 +314,7 @@ static double compute_peak_floatp(SilenceRemoveContext *s, AVFrame *frame, int c
     new_sum -= wsample;
     new_sum += fabsf(sample);
 
-    return new_sum / s->window_size;
+    return new_sum / s->window_duration;
 }
 
 static void update_peak_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
@@ -340,7 +341,7 @@ static double compute_rms_doublep(SilenceRemoveContext *s, AVFrame *frame, int c
     new_sum -= wsample;
     new_sum += sample * sample;
 
-    return sqrt(new_sum / s->window_size);
+    return sqrt(new_sum / s->window_duration);
 }
 
 static void update_rms_doublep(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
@@ -367,7 +368,7 @@ static double compute_rms_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch
     new_sum -= wsample;
     new_sum += sample * sample;
 
-    return sqrtf(new_sum / s->window_size);
+    return sqrtf(new_sum / s->window_duration);
 }
 
 static void update_rms_floatp(SilenceRemoveContext *s, AVFrame *frame, int ch, int offset)
@@ -396,7 +397,7 @@ static av_cold int init(AVFilterContext *ctx)
 
 static void clear_window(SilenceRemoveContext *s)
 {
-    av_samples_set_silence(s->window->extended_data, 0, s->window_size,
+    av_samples_set_silence(s->window->extended_data, 0, s->window_duration,
                            s->window->channels, s->window->format);
 
     s->window_offset = 0;
@@ -409,8 +410,10 @@ static int config_input(AVFilterLink *inlink)
     SilenceRemoveContext *s = ctx->priv;
 
     s->next_pts = AV_NOPTS_VALUE;
-    s->window_size = FFMAX((inlink->sample_rate * s->window_ratio), 1);
-    s->window = ff_get_audio_buffer(ctx->outputs[0], s->window_size);
+    s->window_duration = av_rescale(s->window_duration_opt, inlink->sample_rate,
+                                   AV_TIME_BASE);
+    s->window_duration = FFMAX(1, s->window_duration);
+    s->window = ff_get_audio_buffer(ctx->outputs[0], s->window_duration);
     if (!s->window)
         return AVERROR(ENOMEM);
 
@@ -615,7 +618,7 @@ silence_trim:
                 }
 
                 s->window_offset++;
-                if (s->window_offset >= s->window_size)
+                if (s->window_offset >= s->window_duration)
                     s->window_offset = 0;
                 s->start_holdoff_end++;
                 nb_samples_read++;
@@ -641,7 +644,7 @@ silence_trim:
                 }
 
                 s->window_offset++;
-                if (s->window_offset >= s->window_size)
+                if (s->window_offset >= s->window_duration)
                     s->window_offset = 0;
                 nb_samples_read++;
                 s->start_silence_offset++;
@@ -744,7 +747,7 @@ silence_copy:
                     }
 
                     s->window_offset++;
-                    if (s->window_offset >= s->window_size)
+                    if (s->window_offset >= s->window_duration)
                         s->window_offset = 0;
                     nb_samples_read++;
                     nb_samples_written++;
@@ -763,7 +766,7 @@ silence_copy:
                     }
 
                     s->window_offset++;
-                    if (s->window_offset >= s->window_size)
+                    if (s->window_offset >= s->window_duration)
                         s->window_offset = 0;
                     nb_samples_read++;
                     s->stop_holdoff_end++;