diff --git a/modules/videoio/src/cap_v4l.cpp b/modules/videoio/src/cap_v4l.cpp
index 4f00e43d51..fc9f6e06a8 100644
--- a/modules/videoio/src/cap_v4l.cpp
+++ b/modules/videoio/src/cap_v4l.cpp
@@ -294,6 +294,9 @@ typedef struct CvCaptureCAM_V4L
    int index;
    int width, height;
    __u32 fps;
+   bool convert_rgb;
+   bool frame_allocated;
+
    /* V4L2 variables */
    buffer buffers[MAX_V4L_BUFFERS + 1];
    struct v4l2_capability cap;
@@ -674,6 +677,43 @@ static int v4l2_set_fps(CvCaptureCAM_V4L* capture) {
     return ioctl (capture->deviceHandle, VIDIOC_S_PARM, &setfps);
 }
 
+static int v4l2_num_channels(__u32 palette) {
+    switch(palette) {
+    case V4L2_PIX_FMT_MJPEG:
+    case V4L2_PIX_FMT_JPEG:
+        return 1;
+    case V4L2_PIX_FMT_YUYV:
+    case V4L2_PIX_FMT_UYVY:
+        return 2;
+    default:
+        return 0;
+    }
+}
+
+static void v4l2_create_frame(CvCaptureCAM_V4L *capture) {
+    CvSize size(capture->form.fmt.pix.width, capture->form.fmt.pix.height);
+    int channels = 3;
+
+    if (!capture->convert_rgb) {
+        if (capture->palette == V4L2_PIX_FMT_MJPEG || capture->palette == V4L2_PIX_FMT_JPEG) {
+            size = CvSize(capture->buffers[capture->bufferIndex].length, 1);
+        }
+        channels = v4l2_num_channels(capture->palette);
+    }
+
+    /* Set up Image data */
+    cvInitImageHeader(&capture->frame, size, IPL_DEPTH_8U, channels);
+
+    /* Allocate space for pixelformat we convert to.
+     * If we do not convert frame is just points to the buffer
+     */
+    if(capture->convert_rgb) {
+        capture->frame.imageData = (char*)cvAlloc(capture->frame.imageSize);
+    }
+
+    capture->frame_allocated = capture->convert_rgb;
+}
+
 static int _capture_V4L2 (CvCaptureCAM_V4L *capture)
 {
    char deviceName[MAX_DEVICE_DRIVER_NAME];
@@ -829,13 +869,7 @@ static int _capture_V4L2 (CvCaptureCAM_V4L *capture)
        }
    }
 
-   /* Set up Image data */
-   cvInitImageHeader( &capture->frame,
-                      cvSize( capture->form.fmt.pix.width,
-                              capture->form.fmt.pix.height ),
-                      IPL_DEPTH_8U, 3, IPL_ORIGIN_TL, 4 );
-   /* Allocate space for RGBA data */
-   capture->frame.imageData = (char *)cvAlloc(capture->frame.imageSize);
+   v4l2_create_frame(capture);
 
    // reinitialize buffers
    capture->FirstCapture = 1;
@@ -1022,6 +1056,7 @@ static CvCaptureCAM_V4L * icvCaptureFromCAM_V4L (int index)
    capture->width = DEFAULT_V4L_WIDTH;
    capture->height = DEFAULT_V4L_HEIGHT;
    capture->fps = DEFAULT_V4L_FPS;
+   capture->convert_rgb = true;
 
    if (_capture_V4L2 (capture) == -1) {
        icvCloseCAM_V4L(capture);
@@ -1939,18 +1974,16 @@ static int sonix_decompress(int width, int height, unsigned char *inp, unsigned
 static IplImage* icvRetrieveFrameCAM_V4L( CvCaptureCAM_V4L* capture, int) {
 
 #ifdef HAVE_CAMV4L2
-  if (V4L2_SUPPORT == 0)
-#endif /* HAVE_CAMV4L2 */
 #ifdef HAVE_CAMV4L
+  if (V4L2_SUPPORT == 0)
   {
-
     /* [FD] this really belongs here */
     if (ioctl(capture->deviceHandle, VIDIOCSYNC, &capture->mmaps[capture->bufferIndex].frame) == -1) {
       fprintf( stderr, "VIDEOIO ERROR: V4L: Could not SYNC to video stream. %s\n", strerror(errno));
     }
-
   }
 #endif /* HAVE_CAMV4L */
+#endif /* HAVE_CAMV4L2 */
 
    /* Now get what has already been captured as a IplImage return */
 
@@ -1960,15 +1993,19 @@ static IplImage* icvRetrieveFrameCAM_V4L( CvCaptureCAM_V4L* capture, int) {
 
   if (V4L2_SUPPORT == 1)
   {
+    // we need memory iff convert_rgb is true
+    bool recreate_frame = capture->frame_allocated != capture->convert_rgb;
 
-    if(((unsigned long)capture->frame.width != capture->form.fmt.pix.width)
-       || ((unsigned long)capture->frame.height != capture->form.fmt.pix.height)) {
-        cvFree(&capture->frame.imageData);
-        cvInitImageHeader( &capture->frame,
-              cvSize( capture->form.fmt.pix.width,
-                  capture->form.fmt.pix.height ),
-              IPL_DEPTH_8U, 3, IPL_ORIGIN_TL, 4 );
-       capture->frame.imageData = (char *)cvAlloc(capture->frame.imageSize);
+    if (!capture->convert_rgb) {
+        // for mjpeg streams the size might change in between, so we have to change the header
+        recreate_frame += capture->frame.imageSize != (int)capture->buffers[capture->bufferIndex].length;
+    }
+
+    if(recreate_frame) {
+        // printf("realloc %d %zu\n", capture->frame.imageSize, capture->buffers[capture->bufferIndex].length);
+        if(capture->frame_allocated)
+            cvFree(&capture->frame.imageData);
+        v4l2_create_frame(capture);
     }
 
   }
@@ -1996,6 +2033,11 @@ static IplImage* icvRetrieveFrameCAM_V4L( CvCaptureCAM_V4L* capture, int) {
 
   if (V4L2_SUPPORT == 1)
   {
+    if(!capture->convert_rgb) {
+        capture->frame.imageData = (char*)capture->buffers[capture->bufferIndex].start;
+        return &capture->frame;
+    }
+
     switch (capture->palette)
     {
     case V4L2_PIX_FMT_BGR24:
@@ -2174,6 +2216,8 @@ static double icvGetPropertyCAM_V4L (CvCaptureCAM_V4L* capture,
           return capture->palette;
       case CV_CAP_PROP_FORMAT:
           return CV_8UC3;
+      case CV_CAP_PROP_CONVERT_RGB:
+          return capture->convert_rgb;
       }
 
       if(property_id == CV_CAP_PROP_FPS) {
@@ -2416,10 +2460,8 @@ static int icvSetControl (CvCaptureCAM_V4L* capture,
 static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
                                   int property_id, double value ){
     static int width = 0, height = 0;
-    int retval;
-
-    /* initialization */
-    retval = 0;
+    int retval = 0;
+    bool possible;
 
     /* two subsequent calls setting WIDTH and HEIGHT will change
        the video size */
@@ -2447,6 +2489,12 @@ static int icvSetPropertyCAM_V4L( CvCaptureCAM_V4L* capture,
         capture->fps = value;
         retval = v4l2_reset( capture);
         break;
+    case CV_CAP_PROP_CONVERT_RGB:
+        // returns "0" for formats we do not know how to map to IplImage
+        possible = v4l2_num_channels(capture->palette);
+        capture->convert_rgb = bool(value) && possible;
+        retval = !possible && bool(value) ? -1 : 0;
+        break;
     default:
         retval = icvSetControl(capture, property_id, value);
         break;
diff --git a/samples/python2/video_v4l2.py b/samples/python2/video_v4l2.py
new file mode 100644
index 0000000000..0f03e6bf00
--- /dev/null
+++ b/samples/python2/video_v4l2.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+'''
+VideoCapture sample showcasing  some features of the Video4Linux2 backend
+
+Sample shows how VideoCapture class can be used to control parameters
+of a webcam such as focus or framerate.
+Also the sample provides an example how to access raw images delivered
+by the hardware to get a grayscale image in a very efficient fashion.
+
+Keys:
+    ESC    - exit
+    g      - toggle optimized grayscale conversion
+
+'''
+
+import cv2
+
+def decode_fourcc(v):
+    v = int(v)
+    return "".join([chr((v >> 8 * i) & 0xFF) for i in range(4)])
+
+font = cv2.FONT_HERSHEY_SIMPLEX
+color = (0, 255, 0)
+
+cap = cv2.VideoCapture(0)
+cap.set(cv2.CAP_PROP_AUTOFOCUS, False)
+
+cv2.namedWindow("Video")
+
+convert_rgb = True
+fps = int(cap.get(cv2.CAP_PROP_FPS))
+focus = int(cap.get(cv2.CAP_PROP_FOCUS)) * 100
+
+cv2.createTrackbar("FPS", "Video", fps, 30, lambda v: cap.set(cv2.CAP_PROP_FPS, v))
+cv2.createTrackbar("Focus", "Video", focus, 100, lambda v: cap.set(cv2.CAP_PROP_FOCUS, v / 100))
+
+while True:
+    status, img = cap.read()
+
+    fourcc = decode_fourcc(cap.get(cv2.CAP_PROP_FOURCC))
+
+    fps = cap.get(cv2.CAP_PROP_FPS)
+
+    if not bool(cap.get(cv2.CAP_PROP_CONVERT_RGB)):
+        if fourcc == "MJPG":
+            img = cv2.imdecode(img, cv2.IMREAD_GRAYSCALE)
+        elif fourcc == "YUYV":
+            img = cv2.cvtColor(img, cv2.COLOR_YUV2GRAY_YUYV)
+        else:
+            print("unsupported format")
+            break
+
+    cv2.putText(img, "Mode: {}".format(fourcc), (15, 40), font, 1.0, color)
+    cv2.putText(img, "FPS: {}".format(fps), (15, 80), font, 1.0, color)
+    cv2.imshow("Video", img)
+
+    k = cv2.waitKey(1)
+
+    if k == 27:
+        break
+    elif k == ord("g"):
+        convert_rgb = not convert_rgb
+        cap.set(cv2.CAP_PROP_CONVERT_RGB, convert_rgb)