From 9a1cc06ebe8039ee5be508f4bade8e038c1a31e8 Mon Sep 17 00:00:00 2001 From: Leszek Swirski Date: Tue, 21 May 2013 17:53:36 +0100 Subject: [PATCH 01/27] Fix pixel value rendering for non-fixed-size QT windows --- modules/highgui/src/window_QT.cpp | 71 +++++++++++++++---------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp index 50f2b9e787..438c356f73 100644 --- a/modules/highgui/src/window_QT.cpp +++ b/modules/highgui/src/window_QT.cpp @@ -2651,17 +2651,16 @@ void DefaultViewPort::paintEvent(QPaintEvent* evnt) //Now disable matrixWorld for overlay display myPainter.setWorldMatrixEnabled(false); + //overlay pixel values if zoomed in far enough + if (param_matrixWorld.m11()*ratioX >= threshold_zoom_img_region && + param_matrixWorld.m11()*ratioY >= threshold_zoom_img_region) + { + drawImgRegion(&myPainter); + } + //in mode zoom/panning if (param_matrixWorld.m11() > 1) { - if (param_matrixWorld.m11() >= threshold_zoom_img_region) - { - if (centralWidget->param_flags == CV_WINDOW_NORMAL) - startDisplayInfo("WARNING: The values displayed are the resized image's values. If you want the original image's values, use CV_WINDOW_AUTOSIZE", 1000); - - drawImgRegion(&myPainter); - } - drawViewOverview(&myPainter); } @@ -2887,22 +2886,24 @@ void DefaultViewPort::drawStatusBar() //accept only CV_8UC1 and CV_8UC8 image for now void DefaultViewPort::drawImgRegion(QPainter *painter) { - if (nbChannelOriginImage!=CV_8UC1 && nbChannelOriginImage!=CV_8UC3) return; - qreal offsetX = param_matrixWorld.dx()/param_matrixWorld.m11(); + double pixel_width = param_matrixWorld.m11()*ratioX; + double pixel_height = param_matrixWorld.m11()*ratioY; + + qreal offsetX = param_matrixWorld.dx()/pixel_width; offsetX = offsetX - floor(offsetX); - qreal offsetY = param_matrixWorld.dy()/param_matrixWorld.m11(); + qreal offsetY = param_matrixWorld.dy()/pixel_height; offsetY = offsetY - floor(offsetY); QSize view = size(); QVarLengthArray linesX; - for (qreal _x = offsetX*param_matrixWorld.m11(); _x < view.width(); _x += param_matrixWorld.m11() ) + for (qreal _x = offsetX*pixel_width; _x < view.width(); _x += pixel_width ) linesX.append(QLineF(_x, 0, _x, view.height())); QVarLengthArray linesY; - for (qreal _y = offsetY*param_matrixWorld.m11(); _y < view.height(); _y += param_matrixWorld.m11() ) + for (qreal _y = offsetY*pixel_height; _y < view.height(); _y += pixel_height ) linesY.append(QLineF(0, _y, view.width(), _y)); @@ -2910,27 +2911,25 @@ void DefaultViewPort::drawImgRegion(QPainter *painter) int original_font_size = f.pointSize(); //change font size //f.setPointSize(4+(param_matrixWorld.m11()-threshold_zoom_img_region)/5); - f.setPixelSize(10+(param_matrixWorld.m11()-threshold_zoom_img_region)/5); + f.setPixelSize(10+(pixel_height-threshold_zoom_img_region)/5); painter->setFont(f); - QString val; - QRgb rgbValue; - QPointF point1;//sorry, I do not know how to name it - QPointF point2;//idem - for (int j=-1;j= 0 && point2.y() >= 0) - rgbValue = image2Draw_qt_resized.pixel(QPoint(point2.x(),point2.y())); + // Calculate top left of the pixel's position in the viewport (screen space) + QPointF pos_in_view((i+offsetX)*pixel_width, (j+offsetY)*pixel_height); + + // Calculate top left of the pixel's position in the image (image space) + QPointF pos_in_image = matrixWorld_inv.map(pos_in_view);// Top left of pixel in view + pos_in_image.rx() = pos_in_image.x()/ratioX; + pos_in_image.ry() = pos_in_image.y()/ratioY; + QPoint point_in_image(pos_in_image.x() + 0.5f,pos_in_image.y() + 0.5f);// Add 0.5 for rounding + + QRgb rgbValue; + if (image2Draw_qt.valid(point_in_image)) + rgbValue = image2Draw_qt.pixel(point_in_image); else rgbValue = qRgb(0,0,0); @@ -2943,29 +2942,29 @@ void DefaultViewPort::drawImgRegion(QPainter *painter) painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/2), Qt::AlignCenter, val); */ + QString val; val = tr("%1").arg(qRed(rgbValue)); painter->setPen(QPen(Qt::red, 1)); - painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/3), + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height/3), Qt::AlignCenter, val); val = tr("%1").arg(qGreen(rgbValue)); painter->setPen(QPen(Qt::green, 1)); - painter->drawText(QRect(point1.x(),point1.y()+param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3), + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+pixel_height/3,pixel_width,pixel_height/3), Qt::AlignCenter, val); val = tr("%1").arg(qBlue(rgbValue)); painter->setPen(QPen(Qt::blue, 1)); - painter->drawText(QRect(point1.x(),point1.y()+2*param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3), + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+2*pixel_height/3,pixel_width,pixel_height/3), Qt::AlignCenter, val); } if (nbChannelOriginImage==CV_8UC1) { - - val = tr("%1").arg(qRed(rgbValue)); - painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()), + QString val = tr("%1").arg(qRed(rgbValue)); + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height), Qt::AlignCenter, val); } } From 7d0f6b4d68b37234acdb0a399e2e95b9a7d39143 Mon Sep 17 00:00:00 2001 From: Leszek Swirski Date: Tue, 21 May 2013 17:54:58 +0100 Subject: [PATCH 02/27] Fix image saving from QT toolbar --- modules/highgui/src/window_QT.cpp | 18 ++++++++---------- modules/highgui/src/window_QT.h | 1 - 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp index 438c356f73..0c50c7070c 100644 --- a/modules/highgui/src/window_QT.cpp +++ b/modules/highgui/src/window_QT.cpp @@ -2473,35 +2473,33 @@ void DefaultViewPort::saveView() if (!fileName.isEmpty()) //save the picture { QString extension = fileName.right(3); - - // (no need anymore) create the image resized to receive the 'screenshot' - // image2Draw_qt_resized = QImage(viewport()->width(), viewport()->height(),QImage::Format_RGB888); - - QPainter saveimage(&image2Draw_qt_resized); - this->render(&saveimage); + + // Create a new pixmap to render the viewport into + QPixmap viewportPixmap(viewport()->size()); + viewport()->render(&viewportPixmap); // Save it.. if (QString::compare(extension, "png", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "PNG"); + viewportPixmap.save(fileName, "PNG"); return; } if (QString::compare(extension, "jpg", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "JPG"); + viewportPixmap.save(fileName, "JPG"); return; } if (QString::compare(extension, "bmp", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "BMP"); + viewportPixmap.save(fileName, "BMP"); return; } if (QString::compare(extension, "jpeg", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "JPEG"); + viewportPixmap.save(fileName, "JPEG"); return; } diff --git a/modules/highgui/src/window_QT.h b/modules/highgui/src/window_QT.h index 089997f514..a96a8c6e69 100644 --- a/modules/highgui/src/window_QT.h +++ b/modules/highgui/src/window_QT.h @@ -522,7 +522,6 @@ private: CvMat* image2Draw_mat; QImage image2Draw_qt; - QImage image2Draw_qt_resized; int nbChannelOriginImage; //for mouse callback From 0cee15eb7f8e10361e008b0428f70e9a781a75d6 Mon Sep 17 00:00:00 2001 From: Alexander Shishkov Date: Fri, 14 Jun 2013 15:10:25 +0400 Subject: [PATCH 03/27] Updated iOS camera. Added rotation flag. Added functions to lock/unlock focus, white balance and exposure. --- .../highgui/include/opencv2/highgui/cap_ios.h | 12 ++- .../highgui/src/cap_ios_abstract_camera.mm | 85 +++++++++++++++++++ modules/highgui/src/cap_ios_photo_camera.mm | 2 +- modules/highgui/src/cap_ios_video_camera.mm | 53 +++++++++--- 4 files changed, 138 insertions(+), 14 deletions(-) diff --git a/modules/highgui/include/opencv2/highgui/cap_ios.h b/modules/highgui/include/opencv2/highgui/cap_ios.h index 5bd5fe3c67..db3928f13b 100644 --- a/modules/highgui/include/opencv2/highgui/cap_ios.h +++ b/modules/highgui/include/opencv2/highgui/cap_ios.h @@ -1,6 +1,4 @@ -/* - * cap_ios.h - * For iOS video I/O +/* For iOS video I/O * by Eduard Feicho on 29/07/12 * Copyright 2012. All rights reserved. * @@ -90,6 +88,12 @@ - (void)createVideoPreviewLayer; - (void)updateOrientation; +- (void)lockFocus; +- (void)unlockFocus; +- (void)lockExposure; +- (void)unlockExposure; +- (void)lockBalance; +- (void)unlockBalance; @end @@ -116,6 +120,7 @@ BOOL grayscaleMode; BOOL recordVideo; + BOOL rotateVideo; AVAssetWriterInput* recordAssetWriterInput; AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor; AVAssetWriter* recordAssetWriter; @@ -128,6 +133,7 @@ @property (nonatomic, assign) BOOL grayscaleMode; @property (nonatomic, assign) BOOL recordVideo; +@property (nonatomic, assign) BOOL rotateVideo; @property (nonatomic, retain) AVAssetWriterInput* recordAssetWriterInput; @property (nonatomic, retain) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor; @property (nonatomic, retain) AVAssetWriter* recordAssetWriter; diff --git a/modules/highgui/src/cap_ios_abstract_camera.mm b/modules/highgui/src/cap_ios_abstract_camera.mm index b6a7d944fa..a0e8f3e8b5 100644 --- a/modules/highgui/src/cap_ios_abstract_camera.mm +++ b/modules/highgui/src/cap_ios_abstract_camera.mm @@ -405,4 +405,89 @@ } } +- (void)lockFocus; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isFocusModeSupported:AVCaptureFocusModeLocked]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.focusMode = AVCaptureFocusModeLocked; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for locked focus configuration %@", [error localizedDescription]); + } + } +} + +- (void) unlockFocus; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isFocusModeSupported:AVCaptureFocusModeContinuousAutoFocus]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.focusMode = AVCaptureFocusModeContinuousAutoFocus; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for autofocus configuration %@", [error localizedDescription]); + } + } +} + +- (void)lockExposure; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isExposureModeSupported:AVCaptureExposureModeLocked]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.exposureMode = AVCaptureExposureModeLocked; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]); + } + } +} + +- (void) unlockExposure; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isExposureModeSupported:AVCaptureExposureModeContinuousAutoExposure]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.exposureMode = AVCaptureExposureModeContinuousAutoExposure; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]); + } + } +} + +- (void)lockBalance; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeLocked]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.whiteBalanceMode = AVCaptureWhiteBalanceModeLocked; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]); + } + } +} + +- (void) unlockBalance; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.whiteBalanceMode = AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]); + } + } +} + @end + diff --git a/modules/highgui/src/cap_ios_photo_camera.mm b/modules/highgui/src/cap_ios_photo_camera.mm index f05cfa5f87..f8891f2277 100644 --- a/modules/highgui/src/cap_ios_photo_camera.mm +++ b/modules/highgui/src/cap_ios_photo_camera.mm @@ -32,7 +32,7 @@ #import "opencv2/highgui/cap_ios.h" #include "precomp.hpp" -#pragma mark - Private Interface +#pragma mark - Private Interface mark - Private Interface @interface CvPhotoCamera () diff --git a/modules/highgui/src/cap_ios_video_camera.mm b/modules/highgui/src/cap_ios_video_camera.mm index 1f9ea14bf8..588adfc9cc 100644 --- a/modules/highgui/src/cap_ios_video_camera.mm +++ b/modules/highgui/src/cap_ios_video_camera.mm @@ -30,7 +30,6 @@ #import "opencv2/highgui/cap_ios.h" #include "precomp.hpp" - #import @@ -70,6 +69,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; @synthesize videoDataOutput; @synthesize recordVideo; +@synthesize rotateVideo; //@synthesize videoFileOutput; @synthesize recordAssetWriterInput; @synthesize recordPixelBufferAdaptor; @@ -85,6 +85,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; if (self) { self.useAVCaptureVideoPreviewLayer = NO; self.recordVideo = NO; + self.rotateVideo = NO; } return self; } @@ -269,13 +270,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; } - - - #pragma mark - Private Interface - - - (void)createVideoDataOutput; { // Make a video data output @@ -389,6 +385,38 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; [self.parentView.layer addSublayer:self.customPreviewLayer]; } +- (CVPixelBufferRef) pixelBufferFromCGImage: (CGImageRef) image +{ + + CGSize frameSize = CGSizeMake(CGImageGetWidth(image), CGImageGetHeight(image)); + NSDictionary *options = [NSDictionary dictionaryWithObjectsAndKeys: + [NSNumber numberWithBool:NO], kCVPixelBufferCGImageCompatibilityKey, + [NSNumber numberWithBool:NO], kCVPixelBufferCGBitmapContextCompatibilityKey, + nil]; + CVPixelBufferRef pxbuffer = NULL; + CVReturn status = CVPixelBufferCreate(kCFAllocatorDefault, frameSize.width, + frameSize.height, kCVPixelFormatType_32ARGB, (CFDictionaryRef) CFBridgingRetain(options), + &pxbuffer); + NSParameterAssert(status == kCVReturnSuccess && pxbuffer != NULL); + + CVPixelBufferLockBaseAddress(pxbuffer, 0); + void *pxdata = CVPixelBufferGetBaseAddress(pxbuffer); + + + CGColorSpaceRef rgbColorSpace = CGColorSpaceCreateDeviceRGB(); + CGContextRef context = CGBitmapContextCreate(pxdata, frameSize.width, + frameSize.height, 8, 4*frameSize.width, rgbColorSpace, + kCGImageAlphaPremultipliedFirst); + + CGContextDrawImage(context, CGRectMake(0, 0, CGImageGetWidth(image), + CGImageGetHeight(image)), image); + CGColorSpaceRelease(rgbColorSpace); + CGContextRelease(context); + + CVPixelBufferUnlockBaseAddress(pxbuffer, 0); + + return pxbuffer; +} #pragma mark - Protocol AVCaptureVideoDataOutputSampleBufferDelegate @@ -522,7 +550,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; } if (self.recordAssetWriterInput.readyForMoreMediaData) { - if (! [self.recordPixelBufferAdaptor appendPixelBuffer:imageBuffer + CVImageBufferRef pixelBuffer = [self pixelBufferFromCGImage:dstImage]; + if (! [self.recordPixelBufferAdaptor appendPixelBuffer:pixelBuffer withPresentationTime:lastSampleTime] ) { NSLog(@"Video Writing Error"); } @@ -543,9 +572,12 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; - (void)updateOrientation; { - NSLog(@"rotate.."); - self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height); - [self layoutPreviewLayer]; + if (self.rotateVideo == YES) + { + NSLog(@"rotate.."); + self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height); + [self layoutPreviewLayer]; + } } @@ -583,3 +615,4 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; } @end + From fee81210405ce01bccc810be59c957b8f9d227dc Mon Sep 17 00:00:00 2001 From: Ivan Korolev Date: Fri, 14 Jun 2013 17:03:15 +0400 Subject: [PATCH 04/27] Added regression tests for SURF/SIFT (related to #2892) --- modules/nonfree/test/test_features2d.cpp | 73 ++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/modules/nonfree/test/test_features2d.cpp b/modules/nonfree/test/test_features2d.cpp index 001d628aaa..4cce77b9d5 100644 --- a/modules/nonfree/test/test_features2d.cpp +++ b/modules/nonfree/test/test_features2d.cpp @@ -1146,3 +1146,76 @@ protected: TEST(Features2d_SIFTHomographyTest, regression) { CV_DetectPlanarTest test("SIFT", 80); test.safe_run(); } TEST(Features2d_SURFHomographyTest, regression) { CV_DetectPlanarTest test("SURF", 80); test.safe_run(); } +class FeatureDetectorUsingMaskTest : public cvtest::BaseTest +{ +public: + FeatureDetectorUsingMaskTest(const Ptr& featureDetector) : + featureDetector_(featureDetector) + { + CV_Assert(!featureDetector_.empty()); + } + +protected: + + void run(int) + { + const int nStepX = 2; + const int nStepY = 2; + + const string imageFilename = string(ts->get_data_path()) + "/features2d/tsukuba.png"; + + Mat image = imread(imageFilename); + if(image.empty()) + { + ts->printf(cvtest::TS::LOG, "Image %s can not be read.\n", imageFilename.c_str()); + ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA); + return; + } + + Mat mask(image.size(), CV_8U); + + const int stepX = image.size().width / nStepX; + const int stepY = image.size().height / nStepY; + + vector keyPoints; + vector points; + for(int i=0; idetect(image, keyPoints, mask); + KeyPoint::convert(keyPoints, points); + + for(size_t k=0; kprintf(cvtest::TS::LOG, "The feature point is outside of the mask."); + ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT); + return; + } + } + } + + ts->set_failed_test_info( cvtest::TS::OK ); + } + + Ptr featureDetector_; +}; + +TEST(Features2d_SIFT_using_mask, regression) +{ + FeatureDetectorUsingMaskTest test(Algorithm::create("Feature2D.SIFT")); + test.safe_run(); +} + +TEST(DISABLED_Features2d_SURF_using_mask, regression) +{ + FeatureDetectorUsingMaskTest test(Algorithm::create("Feature2D.SURF")); + test.safe_run(); +} + From 5db08961cec08f309c3165fa086a0eb8e8e5d6ee Mon Sep 17 00:00:00 2001 From: Alexander Shishkov Date: Tue, 18 Jun 2013 06:59:52 +0400 Subject: [PATCH 05/27] fixed Kirill's comments --- modules/highgui/src/cap_ios_abstract_camera.mm | 4 ++-- modules/highgui/src/cap_ios_photo_camera.mm | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/highgui/src/cap_ios_abstract_camera.mm b/modules/highgui/src/cap_ios_abstract_camera.mm index a0e8f3e8b5..dc4faaaeff 100644 --- a/modules/highgui/src/cap_ios_abstract_camera.mm +++ b/modules/highgui/src/cap_ios_abstract_camera.mm @@ -470,7 +470,7 @@ device.whiteBalanceMode = AVCaptureWhiteBalanceModeLocked; [device unlockForConfiguration]; } else { - NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]); + NSLog(@"unable to lock device for locked white balance configuration %@", [error localizedDescription]); } } } @@ -484,7 +484,7 @@ device.whiteBalanceMode = AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance; [device unlockForConfiguration]; } else { - NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]); + NSLog(@"unable to lock device for auto white balance configuration %@", [error localizedDescription]); } } } diff --git a/modules/highgui/src/cap_ios_photo_camera.mm b/modules/highgui/src/cap_ios_photo_camera.mm index f8891f2277..f05cfa5f87 100644 --- a/modules/highgui/src/cap_ios_photo_camera.mm +++ b/modules/highgui/src/cap_ios_photo_camera.mm @@ -32,7 +32,7 @@ #import "opencv2/highgui/cap_ios.h" #include "precomp.hpp" -#pragma mark - Private Interface mark - Private Interface +#pragma mark - Private Interface @interface CvPhotoCamera () From 24fd2cc326db17a511eda02670dd64209b7b689a Mon Sep 17 00:00:00 2001 From: Alexander Shishkov Date: Tue, 18 Jun 2013 07:02:09 +0400 Subject: [PATCH 06/27] updated licenses --- modules/highgui/src/cap_ios_abstract_camera.mm | 1 + modules/highgui/src/cap_ios_video_camera.mm | 1 + 2 files changed, 2 insertions(+) diff --git a/modules/highgui/src/cap_ios_abstract_camera.mm b/modules/highgui/src/cap_ios_abstract_camera.mm index dc4faaaeff..38e1c12e68 100644 --- a/modules/highgui/src/cap_ios_abstract_camera.mm +++ b/modules/highgui/src/cap_ios_abstract_camera.mm @@ -2,6 +2,7 @@ * cap_ios_abstract_camera.mm * For iOS video I/O * by Eduard Feicho on 29/07/12 + * by Alexander Shishkov on 17/07/13 * Copyright 2012. All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/modules/highgui/src/cap_ios_video_camera.mm b/modules/highgui/src/cap_ios_video_camera.mm index 588adfc9cc..ac85f79ee5 100644 --- a/modules/highgui/src/cap_ios_video_camera.mm +++ b/modules/highgui/src/cap_ios_video_camera.mm @@ -2,6 +2,7 @@ * cap_ios_video_camera.mm * For iOS video I/O * by Eduard Feicho on 29/07/12 + * by Alexander Shishkov on 17/07/13 * Copyright 2012. All rights reserved. * * Redistribution and use in source and binary forms, with or without From f003e29dc0e10fa7d28dd5c717fbec134b2bf67e Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 13 Jun 2013 12:22:12 +0400 Subject: [PATCH 07/27] Updated testlog_parser.py to the latest version from the private repo. --- modules/ts/misc/testlog_parser.py | 39 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/modules/ts/misc/testlog_parser.py b/modules/ts/misc/testlog_parser.py index 7ae6aa5980..8ab21417ca 100755 --- a/modules/ts/misc/testlog_parser.py +++ b/modules/ts/misc/testlog_parser.py @@ -100,34 +100,39 @@ class TestInfo(object): def dump(self, units="ms"): print "%s ->\t\033[1;31m%s\033[0m = \t%.2f%s" % (str(self), self.status, self.get("gmean", units), units) - def shortName(self): + + def getName(self): pos = self.name.find("/") if pos > 0: - name = self.name[:pos] - else: - name = self.name - if self.fixture.endswith(name): - fixture = self.fixture[:-len(name)] + return self.name[:pos] + return self.name + + + def getFixture(self): + if self.fixture.endswith(self.getName()): + fixture = self.fixture[:-len(self.getName())] else: fixture = self.fixture if fixture.endswith("_"): fixture = fixture[:-1] + return fixture + + + def param(self): + return '::'.join(filter(None, [self.type_param, self.value_param])) + + def shortName(self): + name = self.getName() + fixture = self.getFixture() return '::'.join(filter(None, [name, fixture])) + def __str__(self): - pos = self.name.find("/") - if pos > 0: - name = self.name[:pos] - else: - name = self.name - if self.fixture.endswith(name): - fixture = self.fixture[:-len(name)] - else: - fixture = self.fixture - if fixture.endswith("_"): - fixture = fixture[:-1] + name = self.getName() + fixture = self.getFixture() return '::'.join(filter(None, [name, fixture, self.type_param, self.value_param])) + def __cmp__(self, other): r = cmp(self.fixture, other.fixture); if r != 0: From 6ff207b53a6379933018c88167ee11b5b1a62e2d Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 14 Jun 2013 14:53:02 +0400 Subject: [PATCH 08/27] Added a new and improved version of the XLS report generator. --- modules/ts/misc/xls-report.py | 171 ++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100755 modules/ts/misc/xls-report.py diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py new file mode 100755 index 0000000000..fb6cfd0960 --- /dev/null +++ b/modules/ts/misc/xls-report.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +from __future__ import division + +import ast +import logging +import os, os.path +import re + +from argparse import ArgumentParser +from glob import glob +from itertools import ifilter + +import xlwt + +from testlog_parser import parseLogFile + +# To build XLS report you neet to put your xmls (OpenCV tests output) in the +# following way: +# +# "root" --- folder, representing the whole XLS document. It contains several +# subfolders --- sheet-paths of the XLS document. Each sheet-path contains it's +# subfolders --- config-paths. Config-paths are columns of the sheet and +# they contains xmls files --- output of OpenCV modules testing. +# Config-path means OpenCV build configuration, including different +# options such as NEON, TBB, GPU enabling/disabling. +# +# root +# root\sheet_path +# root\sheet_path\configuration1 (column 1) +# root\sheet_path\configuration2 (column 2) + +re_image_size = re.compile(r'^ \d+ x \d+$', re.VERBOSE) +re_data_type = re.compile(r'^ (?: 8 | 16 | 32 | 64 ) [USF] C [1234] $', re.VERBOSE) + +time_style = xlwt.easyxf(num_format_str='#0.00') +no_time_style = xlwt.easyxf('pattern: pattern solid, fore_color gray25') + +speedup_style = time_style +good_speedup_style = xlwt.easyxf('font: color green', num_format_str='#0.00') +bad_speedup_style = xlwt.easyxf('font: color red', num_format_str='#0.00') +no_speedup_style = no_time_style +error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange') +header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top') + +def collect_xml(collection, configuration, xml_fullname): + xml_fname = os.path.split(xml_fullname)[1] + module = xml_fname[:xml_fname.index('_')] + + if module not in collection: + collection[module] = {} + + for test in sorted(parseLogFile(xml_fullname)): + if test.shortName() not in collection[module]: + collection[module][test.shortName()] = {} + if test.param() not in collection[module][test.shortName()]: + collection[module][test.shortName()][test.param()] = {} + collection[module][test.shortName()][test.param()][configuration] = \ + test.get("gmean") + +def main(): + arg_parser = ArgumentParser(description='Build an XLS performance report.') + arg_parser.add_argument('sheet_dirs', nargs='+', metavar='DIR', help='directory containing perf test logs') + arg_parser.add_argument('-o', '--output', metavar='XLS', default='report.xls', help='name of output file') + arg_parser.add_argument('-c', '--config', metavar='CONF', help='global configuration file') + + args = arg_parser.parse_args() + + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) + + if args.config is not None: + with open(args.config) as global_conf_file: + global_conf = ast.literal_eval(global_conf_file.read()) + else: + global_conf = {} + + wb = xlwt.Workbook() + + for sheet_path in args.sheet_dirs: + try: + with open(os.path.join(sheet_path, 'sheet.conf')) as sheet_conf_file: + sheet_conf = ast.literal_eval(sheet_conf_file.read()) + except Exception: + sheet_conf = {} + logging.debug('no sheet.conf for {}'.format(sheet_path)) + + sheet_conf = dict(global_conf.items() + sheet_conf.items()) + + if 'configurations' in sheet_conf: + config_names = sheet_conf['configurations'] + else: + try: + config_names = [p for p in os.listdir(sheet_path) + if os.path.isdir(os.path.join(sheet_path, p))] + except Exception as e: + logging.warning(e) + continue + + collection = {} + + for configuration, configuration_path in \ + [(c, os.path.join(sheet_path, c)) for c in config_names]: + logging.info('processing {}'.format(configuration_path)) + for xml_fullname in glob(os.path.join(configuration_path, '*.xml')): + collect_xml(collection, configuration, xml_fullname) + + sheet = wb.add_sheet(sheet_conf.get('sheet_name', os.path.basename(os.path.abspath(sheet_path)))) + + sheet.row(0).height = 800 + sheet.panes_frozen = True + sheet.remove_splits = True + sheet.horz_split_pos = 1 + sheet.horz_split_first_visible = 1 + + sheet_comparisons = sheet_conf.get('comparisons', []) + + for i, w in enumerate([2000, 15000, 2500, 2000, 15000] + + (len(config_names) + 1 + len(sheet_comparisons)) * [3000]): + sheet.col(i).width = w + + for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Parameters'] + + config_names + [None] + + [comp['from'] + '\nvs\n' + comp['to'] for comp in sheet_comparisons]): + sheet.row(0).write(i, caption, header_style) + + row = 1 + + module_colors = sheet_conf.get('module_colors', {}) + module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color)) + for module, color in module_colors.iteritems()} + + for module, tests in collection.iteritems(): + for test, params in tests.iteritems(): + for param, configs in params.iteritems(): + sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) + sheet.write(row, 1, test) + + param_list = param[1:-1].split(", ") + sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None)) + sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None)) + + sheet.row(row).write(4, param) + for i, c in enumerate(config_names): + if c in configs: + sheet.write(row, 5 + i, configs[c], time_style) + else: + sheet.write(row, 5 + i, None, no_time_style) + + for i, comp in enumerate(sheet_comparisons): + left = configs.get(comp["from"]) + right = configs.get(comp["to"]) + col = 5 + len(config_names) + 1 + i + + if left is not None and right is not None: + try: + speedup = left / right + sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else + bad_speedup_style if speedup < 0.9 else + speedup_style) + except ArithmeticError as e: + sheet.write(row, col, None, error_speedup_style) + else: + sheet.write(row, col, None, no_speedup_style) + + row += 1 + if row % 1000 == 0: sheet.flush_row_data() + + wb.save(args.output) + +if __name__ == '__main__': + main() From 4d7b1b5eded9cfbb456b0238a2f55c6f6ae491ee Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Mon, 17 Jun 2013 21:06:02 +0400 Subject: [PATCH 09/27] In the XLS report, enabled word wrapping for header cells. Otherwise, Excel ignores line breaks in them. --- modules/ts/misc/xls-report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index fb6cfd0960..f8288e16da 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -41,7 +41,7 @@ good_speedup_style = xlwt.easyxf('font: color green', num_format_str='#0.00') bad_speedup_style = xlwt.easyxf('font: color red', num_format_str='#0.00') no_speedup_style = no_time_style error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange') -header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top') +header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top, wrap True') def collect_xml(collection, configuration, xml_fullname): xml_fname = os.path.split(xml_fullname)[1] From 0f1156bbb61efa0ec7d7b48e8a0cd02ec72378ba Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Tue, 18 Jun 2013 13:36:20 +0400 Subject: [PATCH 10/27] Made the order of tests in XLS reports deterministic. --- modules/ts/misc/xls-report.py | 81 ++++++++++++++++------------------- 1 file changed, 38 insertions(+), 43 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index f8288e16da..7e63b6737c 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -8,6 +8,7 @@ import os, os.path import re from argparse import ArgumentParser +from collections import OrderedDict from glob import glob from itertools import ifilter @@ -47,16 +48,11 @@ def collect_xml(collection, configuration, xml_fullname): xml_fname = os.path.split(xml_fullname)[1] module = xml_fname[:xml_fname.index('_')] - if module not in collection: - collection[module] = {} + module_tests = collection.setdefault(module, OrderedDict()) for test in sorted(parseLogFile(xml_fullname)): - if test.shortName() not in collection[module]: - collection[module][test.shortName()] = {} - if test.param() not in collection[module][test.shortName()]: - collection[module][test.shortName()][test.param()] = {} - collection[module][test.shortName()][test.param()][configuration] = \ - test.get("gmean") + test_results = module_tests.setdefault((test.shortName(), test.param()), {}) + test_results[configuration] = test.get("gmean") def main(): arg_parser = ArgumentParser(description='Build an XLS performance report.') @@ -129,41 +125,40 @@ def main(): module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color)) for module, color in module_colors.iteritems()} - for module, tests in collection.iteritems(): - for test, params in tests.iteritems(): - for param, configs in params.iteritems(): - sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) - sheet.write(row, 1, test) - - param_list = param[1:-1].split(", ") - sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None)) - sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None)) - - sheet.row(row).write(4, param) - for i, c in enumerate(config_names): - if c in configs: - sheet.write(row, 5 + i, configs[c], time_style) - else: - sheet.write(row, 5 + i, None, no_time_style) - - for i, comp in enumerate(sheet_comparisons): - left = configs.get(comp["from"]) - right = configs.get(comp["to"]) - col = 5 + len(config_names) + 1 + i - - if left is not None and right is not None: - try: - speedup = left / right - sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else - bad_speedup_style if speedup < 0.9 else - speedup_style) - except ArithmeticError as e: - sheet.write(row, col, None, error_speedup_style) - else: - sheet.write(row, col, None, no_speedup_style) - - row += 1 - if row % 1000 == 0: sheet.flush_row_data() + for module, tests in sorted(collection.iteritems()): + for ((test, param), configs) in tests.iteritems(): + sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) + sheet.write(row, 1, test) + + param_list = param[1:-1].split(", ") + sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None)) + sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None)) + + sheet.row(row).write(4, param) + for i, c in enumerate(config_names): + if c in configs: + sheet.write(row, 5 + i, configs[c], time_style) + else: + sheet.write(row, 5 + i, None, no_time_style) + + for i, comp in enumerate(sheet_comparisons): + left = configs.get(comp["from"]) + right = configs.get(comp["to"]) + col = 5 + len(config_names) + 1 + i + + if left is not None and right is not None: + try: + speedup = left / right + sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else + bad_speedup_style if speedup < 0.9 else + speedup_style) + except ArithmeticError as e: + sheet.write(row, col, None, error_speedup_style) + else: + sheet.write(row, col, None, no_speedup_style) + + row += 1 + if row % 1000 == 0: sheet.flush_row_data() wb.save(args.output) From 584f0745d0f917c993629c6e77bf898c6d243bf0 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Tue, 18 Jun 2013 12:30:05 +0400 Subject: [PATCH 11/27] Made xls-report.py ignore tests that were not successful. --- modules/ts/misc/xls-report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index 7e63b6737c..f6278bae00 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -52,7 +52,8 @@ def collect_xml(collection, configuration, xml_fullname): for test in sorted(parseLogFile(xml_fullname)): test_results = module_tests.setdefault((test.shortName(), test.param()), {}) - test_results[configuration] = test.get("gmean") + if test.status == 'run': + test_results[configuration] = test.get("gmean") def main(): arg_parser = ArgumentParser(description='Build an XLS performance report.') From 16c4aad36de4e42624e70baf677dc67d0c17fefa Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 13 Jun 2013 15:38:21 +0400 Subject: [PATCH 12/27] Java/Python bindings for computeCorrespondEpilines added. Simle Java test for computeCorrespondEpilines added. --- .../calib3d/include/opencv2/calib3d/calib3d.hpp | 6 +++--- .../src/org/opencv/test/calib3d/Calib3dTest.java | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/modules/calib3d/include/opencv2/calib3d/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d/calib3d.hpp index 0d1cc46915..f213a114f4 100644 --- a/modules/calib3d/include/opencv2/calib3d/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d/calib3d.hpp @@ -639,9 +639,9 @@ CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2, double param1=3., double param2=0.99); //! finds coordinates of epipolar lines corresponding the specified points -CV_EXPORTS void computeCorrespondEpilines( InputArray points, - int whichImage, InputArray F, - OutputArray lines ); +CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, + int whichImage, InputArray F, + OutputArray lines ); CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2, InputArray projPoints1, InputArray projPoints2, diff --git a/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java b/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java index 8bcaf58a05..db806b6fc9 100644 --- a/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java +++ b/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java @@ -585,4 +585,18 @@ public class Calib3dTest extends OpenCVTestCase { public void testValidateDisparityMatMatIntIntInt() { fail("Not yet implemented"); } + + public void testComputeCorrespondEpilines() + { + Mat fundamental = new Mat(3, 3, CvType.CV_64F); + fundamental.put(0, 0, 0, -0.577, 0.288, 0.577, 0, 0.288, -0.288, -0.288, 0); + MatOfPoint2f left = new MatOfPoint2f(); + left.alloc(1); + left.put(0, 0, 2, 3); //add(new Point(x, y)); + Mat lines = new Mat(); + Mat truth = new Mat(1, 1, CvType.CV_32FC3); + truth.put(0, 0, -0.70735186, 0.70686162, -0.70588124); + Calib3d.computeCorrespondEpilines(left, 1, fundamental, lines); + assertMatEqual(truth, lines, EPS); + } } From 1492b204727066daae2967f1bb2831acde42eb92 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 18 Jun 2013 13:17:33 +0400 Subject: [PATCH 13/27] fix gpu warnings with signed/unsigned char --- .../gpu/include/opencv2/gpu/device/limits.hpp | 231 +++++------------- modules/gpu/src/nvidia/core/NCV.hpp | 2 +- .../src/nvidia/core/NCVPixelOperations.hpp | 4 +- 3 files changed, 62 insertions(+), 175 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/device/limits.hpp b/modules/gpu/include/opencv2/gpu/device/limits.hpp index b040f199d6..595978006c 100644 --- a/modules/gpu/include/opencv2/gpu/device/limits.hpp +++ b/modules/gpu/include/opencv2/gpu/device/limits.hpp @@ -43,193 +43,80 @@ #ifndef __OPENCV_GPU_LIMITS_GPU_HPP__ #define __OPENCV_GPU_LIMITS_GPU_HPP__ -#include +#include +#include #include "common.hpp" namespace cv { namespace gpu { namespace device { - template struct numeric_limits - { - typedef T type; - __device__ __forceinline__ static type min() { return type(); }; - __device__ __forceinline__ static type max() { return type(); }; - __device__ __forceinline__ static type epsilon() { return type(); } - __device__ __forceinline__ static type round_error() { return type(); } - __device__ __forceinline__ static type denorm_min() { return type(); } - __device__ __forceinline__ static type infinity() { return type(); } - __device__ __forceinline__ static type quiet_NaN() { return type(); } - __device__ __forceinline__ static type signaling_NaN() { return T(); } - static const bool is_signed; - }; - template<> struct numeric_limits - { - typedef bool type; - __device__ __forceinline__ static type min() { return false; }; - __device__ __forceinline__ static type max() { return true; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template struct numeric_limits; - template<> struct numeric_limits - { - typedef char type; - __device__ __forceinline__ static type min() { return CHAR_MIN; }; - __device__ __forceinline__ static type max() { return CHAR_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = (char)-1 == -1; - }; - - template<> struct numeric_limits - { - typedef char type; - __device__ __forceinline__ static type min() { return SCHAR_MIN; }; - __device__ __forceinline__ static type max() { return SCHAR_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = (signed char)-1 == -1; - }; - - template<> struct numeric_limits - { - typedef unsigned char type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return UCHAR_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static bool min() { return false; } + __device__ __forceinline__ static bool max() { return true; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef short type; - __device__ __forceinline__ static type min() { return SHRT_MIN; }; - __device__ __forceinline__ static type max() { return SHRT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static signed char min() { return SCHAR_MIN; } + __device__ __forceinline__ static signed char max() { return SCHAR_MAX; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef unsigned short type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return USHRT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned char min() { return 0; } + __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef int type; - __device__ __forceinline__ static type min() { return INT_MIN; }; - __device__ __forceinline__ static type max() { return INT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static short min() { return SHRT_MIN; } + __device__ __forceinline__ static short max() { return SHRT_MAX; } + static const bool is_signed = true; +}; +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned short min() { return 0; } + __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef unsigned int type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return UINT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static int min() { return INT_MIN; } + __device__ __forceinline__ static int max() { return INT_MAX; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef long type; - __device__ __forceinline__ static type min() { return LONG_MIN; }; - __device__ __forceinline__ static type max() { return LONG_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned int min() { return 0; } + __device__ __forceinline__ static unsigned int max() { return UINT_MAX; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef unsigned long type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return ULONG_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static float min() { return FLT_MIN; } + __device__ __forceinline__ static float max() { return FLT_MAX; } + __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef float type; - __device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; }; - __device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; }; - __device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; }; - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static double min() { return DBL_MIN; } + __device__ __forceinline__ static double max() { return DBL_MAX; } + __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef double type; - __device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; }; - __device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; }}} // namespace cv { namespace gpu { namespace device { #endif // __OPENCV_GPU_LIMITS_GPU_HPP__ diff --git a/modules/gpu/src/nvidia/core/NCV.hpp b/modules/gpu/src/nvidia/core/NCV.hpp index 0394dba186..80e1da7953 100644 --- a/modules/gpu/src/nvidia/core/NCV.hpp +++ b/modules/gpu/src/nvidia/core/NCV.hpp @@ -130,7 +130,7 @@ typedef int Ncv32s; typedef unsigned int Ncv32u; typedef short Ncv16s; typedef unsigned short Ncv16u; -typedef char Ncv8s; +typedef signed char Ncv8s; typedef unsigned char Ncv8u; typedef float Ncv32f; typedef double Ncv64f; diff --git a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp index ec2f16ebb7..c1e06b434e 100644 --- a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp +++ b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp @@ -51,7 +51,7 @@ template inline __host__ __device__ TBase _pixMaxVal(); template<> static inline __host__ __device__ Ncv8u _pixMaxVal() {return UCHAR_MAX;} template<> static inline __host__ __device__ Ncv16u _pixMaxVal() {return USHRT_MAX;} template<> static inline __host__ __device__ Ncv32u _pixMaxVal() {return UINT_MAX;} -template<> static inline __host__ __device__ Ncv8s _pixMaxVal() {return CHAR_MAX;} +template<> static inline __host__ __device__ Ncv8s _pixMaxVal() {return SCHAR_MAX;} template<> static inline __host__ __device__ Ncv16s _pixMaxVal() {return SHRT_MAX;} template<> static inline __host__ __device__ Ncv32s _pixMaxVal() {return INT_MAX;} template<> static inline __host__ __device__ Ncv32f _pixMaxVal() {return FLT_MAX;} @@ -61,7 +61,7 @@ template inline __host__ __device__ TBase _pixMinVal(); template<> static inline __host__ __device__ Ncv8u _pixMinVal() {return 0;} template<> static inline __host__ __device__ Ncv16u _pixMinVal() {return 0;} template<> static inline __host__ __device__ Ncv32u _pixMinVal() {return 0;} -template<> static inline __host__ __device__ Ncv8s _pixMinVal() {return CHAR_MIN;} +template<> static inline __host__ __device__ Ncv8s _pixMinVal() {return SCHAR_MIN;} template<> static inline __host__ __device__ Ncv16s _pixMinVal() {return SHRT_MIN;} template<> static inline __host__ __device__ Ncv32s _pixMinVal() {return INT_MIN;} template<> static inline __host__ __device__ Ncv32f _pixMinVal() {return FLT_MIN;} From 24d84a45b19dd3d2016bacf943a3811c67e804d4 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Mon, 17 Jun 2013 21:06:15 +0400 Subject: [PATCH 14/27] Made tests record in the XML output which parallel framework was used. --- .../core/include/opencv2/core/internal.hpp | 26 ++++++++++++++++++ modules/core/src/parallel.cpp | 27 +++++-------------- modules/ts/src/precomp.hpp | 2 ++ modules/ts/src/ts_func.cpp | 8 ++++++ 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index 5335fa01f8..10cd2caf93 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -50,6 +50,8 @@ #include +#include "cvconfig.h" + #if defined WIN32 || defined _WIN32 # ifndef WIN32 # define WIN32 @@ -184,6 +186,30 @@ CV_INLINE IppiSize ippiSize(int width, int height) # include "opencv2/core/eigen.hpp" #endif +#ifdef _OPENMP +# define HAVE_OPENMP +#endif + +#ifdef __APPLE__ +# define HAVE_GCD +#endif + +#if defined _MSC_VER && _MSC_VER >= 1600 +# define HAVE_CONCURRENCY +#endif + +#if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 +# define CV_PARALLEL_FRAMEWORK "tbb" +#elif defined HAVE_CSTRIPES +# define CV_PARALLEL_FRAMEWORK "cstripes" +#elif defined HAVE_OPENMP +# define CV_PARALLEL_FRAMEWORK "openmp" +#elif defined HAVE_GCD +# define CV_PARALLEL_FRAMEWORK "gcd" +#elif defined HAVE_CONCURRENCY +# define CV_PARALLEL_FRAMEWORK "ms-concurrency" +#endif + #ifdef __cplusplus namespace cv diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 0b2a845ac1..51b165275f 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -61,17 +61,6 @@ #endif #endif -#ifdef _OPENMP - #define HAVE_OPENMP -#endif - -#ifdef __APPLE__ - #define HAVE_GCD -#endif - -#if defined _MSC_VER && _MSC_VER >= 1600 - #define HAVE_CONCURRENCY -#endif /* IMPORTANT: always use the same order of defines 1. HAVE_TBB - 3rdparty library, should be explicitly enabled @@ -110,10 +99,6 @@ #endif #endif -#if defined HAVE_TBB || defined HAVE_CSTRIPES || defined HAVE_OPENMP || defined HAVE_GCD || defined HAVE_CONCURRENCY - #define HAVE_PARALLEL_FRAMEWORK -#endif - namespace cv { ParallelLoopBody::~ParallelLoopBody() {} @@ -121,7 +106,7 @@ namespace cv namespace { -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK class ParallelLoopBodyWrapper { public: @@ -218,7 +203,7 @@ public: static SchedPtr pplScheduler; #endif -#endif // HAVE_PARALLEL_FRAMEWORK +#endif // CV_PARALLEL_FRAMEWORK } //namespace @@ -226,7 +211,7 @@ static SchedPtr pplScheduler; void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) { -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK if(numThreads != 0) { @@ -281,7 +266,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, } else -#endif // HAVE_PARALLEL_FRAMEWORK +#endif // CV_PARALLEL_FRAMEWORK { (void)nstripes; body(range); @@ -290,7 +275,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, int cv::getNumThreads(void) { -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK if(numThreads == 0) return 1; @@ -333,7 +318,7 @@ int cv::getNumThreads(void) void cv::setNumThreads( int threads ) { (void)threads; -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK numThreads = threads; #endif diff --git a/modules/ts/src/precomp.hpp b/modules/ts/src/precomp.hpp index 10acd7ad8f..0b2adacc4d 100644 --- a/modules/ts/src/precomp.hpp +++ b/modules/ts/src/precomp.hpp @@ -1,4 +1,6 @@ +#include "opencv2/core/core.hpp" #include "opencv2/core/core_c.h" +#include "opencv2/core/internal.hpp" #include "opencv2/ts/ts.hpp" #ifdef GTEST_LINKED_AS_SHARED_LIBRARY diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index 1d636e6746..7a292d71cf 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -2958,6 +2958,14 @@ void printVersionInfo(bool useStdOut) ::testing::Test::RecordProperty("inner_version", ver); if(useStdOut) std::cout << ver << std::endl; } + +#ifdef CV_PARALLEL_FRAMEWORK + ::testing::Test::RecordProperty("cv_parallel_framework", CV_PARALLEL_FRAMEWORK); + if (useStdOut) + { + std::cout << "Parallel framework: " << CV_PARALLEL_FRAMEWORK << std::endl; + } +#endif } } //namespace cvtest From 4af7d65224f23739176c49341d8bcf795a8ab5ea Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Tue, 18 Jun 2013 18:08:38 +0400 Subject: [PATCH 15/27] Made tests record information about CPU features and Tegra optimization status. --- modules/ts/src/ts_func.cpp | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index 7a292d71cf..e2998149d5 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -2,6 +2,10 @@ #include #include +#ifdef HAVE_TEGRA_OPTIMIZATION +#include "tegra.hpp" +#endif + using namespace cv; namespace cvtest @@ -2966,6 +2970,44 @@ void printVersionInfo(bool useStdOut) std::cout << "Parallel framework: " << CV_PARALLEL_FRAMEWORK << std::endl; } #endif + + std::string cpu_features; + +#if CV_SSE + if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse"; +#endif +#if CV_SSE2 + if (checkHardwareSupport(CV_CPU_SSE2)) cpu_features += " sse2"; +#endif +#if CV_SSE3 + if (checkHardwareSupport(CV_CPU_SSE3)) cpu_features += " sse3"; +#endif +#if CV_SSSE3 + if (checkHardwareSupport(CV_CPU_SSSE3)) cpu_features += " ssse3"; +#endif +#if CV_SSE4_1 + if (checkHardwareSupport(CV_CPU_SSE4_1)) cpu_features += " sse4.1"; +#endif +#if CV_SSE4_2 + if (checkHardwareSupport(CV_CPU_SSE4_2)) cpu_features += " sse4.2"; +#endif +#if CV_AVX + if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx"; +#endif +#if CV_NEON + cpu_features += " neon"; // NEON is currently not checked at runtime +#endif + + cpu_features.erase(0, 1); // erase initial space + + ::testing::Test::RecordProperty("cv_cpu_features", cpu_features); + if (useStdOut) std::cout << "CPU features: " << cpu_features << std::endl; + +#ifdef HAVE_TEGRA_OPTIMIZATION + const char * tegra_optimization = tegra::isDeviceSupported() ? "enabled" : "disabled"; + ::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization); + if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl; +#endif } } //namespace cvtest From 26c246140a31556fd116bb53044575a0f9b02b84 Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 19 Jun 2013 11:20:45 +0800 Subject: [PATCH 16/27] optimize hog --- modules/ocl/src/hog.cpp | 512 +++++++++++++++-------- modules/ocl/src/opencl/objdetect_hog.cl | 520 +++++++++++++++++------- 2 files changed, 709 insertions(+), 323 deletions(-) diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index a3514586fa..3533cce69a 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Wenju He, wenju@multicorewareinc.com +// Wenju He, wenju@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -48,13 +48,107 @@ using namespace cv; using namespace cv::ocl; using namespace std; - #define CELL_WIDTH 8 #define CELL_HEIGHT 8 #define CELLS_PER_BLOCK_X 2 #define CELLS_PER_BLOCK_Y 2 #define NTHREADS 256 +static oclMat gauss_w_lut; +static bool hog_device_cpu; +/* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */ +static const float gaussian_interp_lut[] = +{ + /* gaussian lut */ + 0.01831564f, 0.02926831f, 0.04393693f, 0.06196101f, 0.08208500f, 0.10215643f, + 0.11943297f, 0.13117145f, 0.13533528f, 0.13117145f, 0.11943297f, 0.10215643f, + 0.08208500f, 0.06196101f, 0.04393693f, 0.02926831f, 0.02926831f, 0.04677062f, + 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 0.19085334f, 0.20961139f, + 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 0.13117145f, 0.09901341f, + 0.07021102f, 0.04677062f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f, + 0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f, + 0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f, + 0.06196101f, 0.09901341f, 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, + 0.40403652f, 0.44374731f, 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, + 0.27768996f, 0.20961139f, 0.14863673f, 0.09901341f, 0.08208500f, 0.13117145f, + 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 0.53526145f, 0.58786964f, + 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 0.36787945f, 0.27768996f, + 0.19691168f, 0.13117145f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f, + 0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f, + 0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f, + 0.11943297f, 0.19085334f, 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, + 0.77880079f, 0.85534531f, 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, + 0.53526145f, 0.40403652f, 0.28650481f, 0.19085334f, 0.13117145f, 0.20961139f, + 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 0.85534531f, 0.93941307f, + 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 0.58786964f, 0.44374731f, + 0.31466395f, 0.20961139f, 0.13533528f, 0.21626517f, 0.32465246f, 0.45783335f, + 0.60653067f, 0.75483960f, 0.88249689f, 0.96923321f, 1.00000000f, 0.96923321f, + 0.88249689f, 0.75483960f, 0.60653067f, 0.45783335f, 0.32465246f, 0.21626517f, + 0.13117145f, 0.20961139f, 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, + 0.85534531f, 0.93941307f, 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, + 0.58786964f, 0.44374731f, 0.31466395f, 0.20961139f, 0.11943297f, 0.19085334f, + 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 0.77880079f, 0.85534531f, + 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 0.53526145f, 0.40403652f, + 0.28650481f, 0.19085334f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f, + 0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f, + 0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f, + 0.08208500f, 0.13117145f, 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, + 0.53526145f, 0.58786964f, 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, + 0.36787945f, 0.27768996f, 0.19691168f, 0.13117145f, 0.06196101f, 0.09901341f, + 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 0.40403652f, 0.44374731f, + 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 0.27768996f, 0.20961139f, + 0.14863673f, 0.09901341f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f, + 0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f, + 0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f, + 0.02926831f, 0.04677062f, 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, + 0.19085334f, 0.20961139f, 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, + 0.13117145f, 0.09901341f, 0.07021102f, 0.04677062f, + /* interp_weight lut */ + 0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f, + 0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f, + 0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f, 0.01171875f, 0.03515625f, + 0.05859375f, 0.08203125f, 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, + 0.17578125f, 0.15234375f, 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, + 0.03515625f, 0.01171875f, 0.01953125f, 0.05859375f, 0.09765625f, 0.13671875f, + 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 0.29296875f, 0.25390625f, + 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 0.05859375f, 0.01953125f, + 0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f, + 0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f, + 0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.03515625f, 0.10546875f, + 0.17578125f, 0.24609375f, 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, + 0.52734375f, 0.45703125f, 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, + 0.10546875f, 0.03515625f, 0.04296875f, 0.12890625f, 0.21484375f, 0.30078125f, + 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 0.64453125f, 0.55859375f, + 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 0.12890625f, 0.04296875f, + 0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f, + 0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f, + 0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.05859375f, 0.17578125f, + 0.29296875f, 0.41015625f, 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, + 0.87890625f, 0.76171875f, 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, + 0.17578125f, 0.05859375f, 0.05859375f, 0.17578125f, 0.29296875f, 0.41015625f, + 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 0.87890625f, 0.76171875f, + 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 0.17578125f, 0.05859375f, + 0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f, + 0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f, + 0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.04296875f, 0.12890625f, + 0.21484375f, 0.30078125f, 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, + 0.64453125f, 0.55859375f, 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, + 0.12890625f, 0.04296875f, 0.03515625f, 0.10546875f, 0.17578125f, 0.24609375f, + 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 0.52734375f, 0.45703125f, + 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 0.10546875f, 0.03515625f, + 0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f, + 0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f, + 0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.01953125f, 0.05859375f, + 0.09765625f, 0.13671875f, 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, + 0.29296875f, 0.25390625f, 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, + 0.05859375f, 0.01953125f, 0.01171875f, 0.03515625f, 0.05859375f, 0.08203125f, + 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 0.17578125f, 0.15234375f, + 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 0.03515625f, 0.01171875f, + 0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f, + 0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f, + 0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f +}; + namespace cv { namespace ocl @@ -78,38 +172,43 @@ namespace cv int cnblocks_win_x; int cnblocks_win_y; int cblock_hist_size; - int cblock_hist_size_2up; int cdescr_size; int cdescr_width; + int cdescr_height; void set_up_constants(int nbins, int block_stride_x, int block_stride_y, int nblocks_win_x, int nblocks_win_y); void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, - int height, int width, const cv::ocl::oclMat &grad, - const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists); + int height, int width, float sigma, const cv::ocl::oclMat &grad, + const cv::ocl::oclMat &qangle, + const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists); void normalize_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, cv::ocl::oclMat &block_hists, float threshold); + int height, int width, cv::ocl::oclMat &block_hists, + float threshold); void classify_hists(int win_height, int win_width, int block_stride_y, - int block_stride_x, int win_stride_y, int win_stride_x, int height, - int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, + int block_stride_x, int win_stride_y, int win_stride_x, + int height, int width, const cv::ocl::oclMat &block_hists, + const cv::ocl::oclMat &coefs, float free_coef, float threshold, cv::ocl::oclMat &labels); - void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, + void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, + int height, int width, const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors); - void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, + void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, + int height, int width, const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors); void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); + float angle_scale, cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, bool correct_gamma); void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); - - void resize( const oclMat &src, oclMat &dst, const Size sz); + float angle_scale, cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, bool correct_gamma); } } } @@ -117,8 +216,14 @@ namespace cv using namespace ::cv::ocl::device; -cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_, - int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_) +static inline int divUp(int total, int grain) +{ + return (total + grain - 1) / grain; +} + +cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, + Size cell_size_, int nbins_, double win_sigma_, + double threshold_L2hys_, bool gamma_correction_, int nlevels_) : win_size(win_size_), block_size(block_size_), block_stride(block_stride_), @@ -132,19 +237,27 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 && (win_size.height - block_size.height) % block_stride.height == 0); - CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0); + CV_Assert(block_size.width % cell_size.width == 0 && + block_size.height % cell_size.height == 0); CV_Assert(block_stride == cell_size); CV_Assert(cell_size == Size(8, 8)); - Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); + Size cells_per_block(block_size.width / cell_size.width, + block_size.height / cell_size.height); CV_Assert(cells_per_block == Size(2, 2)); cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); - hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height); + hog::set_up_constants(nbins, block_stride.width, block_stride.height, + blocks_per_win.width, blocks_per_win.height); effect_size = Size(0, 0); + + if (queryDeviceInfo()) + hog_device_cpu = true; + else + hog_device_cpu = false; } size_t cv::ocl::HOGDescriptor::getDescriptorSize() const @@ -154,7 +267,8 @@ size_t cv::ocl::HOGDescriptor::getDescriptorSize() const size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const { - Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); + Size cells_per_block = Size(block_size.width / cell_size.width, + block_size.height / cell_size.height); return (size_t)(nbins * cells_per_block.area()); } @@ -167,7 +281,8 @@ bool cv::ocl::HOGDescriptor::checkDetectorSize() const { size_t detector_size = detector.rows * detector.cols; size_t descriptor_size = getDescriptorSize(); - return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1; + return detector_size == 0 || detector_size == descriptor_size || + detector_size == descriptor_size + 1; } void cv::ocl::HOGDescriptor::setSVMDetector(const vector &_detector) @@ -207,10 +322,16 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride) const size_t block_hist_size = getBlockHistogramSize(); const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); - block_hists.create(1, static_cast(block_hist_size * blocks_per_img.area()), CV_32F); + block_hists.create(1, + static_cast(block_hist_size * blocks_per_img.area()) + 256, CV_32F); Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); labels.create(1, wins_per_img.area(), CV_8U); + + vector v_lut = vector(gaussian_interp_lut, gaussian_interp_lut + + sizeof(gaussian_interp_lut) / sizeof(gaussian_interp_lut[0])); + Mat m_lut(v_lut); + gauss_w_lut.upload(m_lut.reshape(1,1)); } void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle) @@ -221,29 +342,34 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc switch (img.type()) { case CV_8UC1: - hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); + hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, + angleScale, grad, qangle, gamma_correction); break; case CV_8UC4: - hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); + hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, + angleScale, grad, qangle, gamma_correction); break; } } + void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img) { - computeGradient(img, grad, qangle); + computeGradient(img, this->grad, this->qangle); - hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, - grad, qangle, (float)getWinSigma(), block_hists); + hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, + effect_size.width, (float)getWinSigma(), grad, qangle, gauss_w_lut, block_hists); - hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, - block_hists, (float)threshold_L2hys); + hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, + effect_size.width, block_hists, (float)threshold_L2hys); } -void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format) +void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, + oclMat &descriptors, int descr_format) { - CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); init_buffer(img, win_stride); @@ -253,17 +379,20 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride); - descriptors.create(wins_per_img.area(), static_cast(blocks_per_win.area() * block_hist_size), CV_32F); + descriptors.create(wins_per_img.area(), + static_cast(blocks_per_win.area() * block_hist_size), CV_32F); switch (descr_format) { case DESCR_FORMAT_ROW_BY_ROW: - hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); + hog::extract_descrs_by_rows(win_size.height, win_size.width, + block_stride.height, block_stride.width, win_stride.height, win_stride.width, + effect_size.height, effect_size.width, block_hists, descriptors); break; case DESCR_FORMAT_COL_BY_COL: - hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); + hog::extract_descrs_by_cols(win_size.height, win_size.width, + block_stride.height, block_stride.width, win_stride.height, win_stride.width, + effect_size.height, effect_size.width, block_hists, descriptors); break; default: CV_Error(CV_StsBadArg, "Unknown descriptor format"); @@ -271,7 +400,8 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, } -void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, double hit_threshold, Size win_stride, Size padding) +void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, + double hit_threshold, Size win_stride, Size padding) { CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(padding == Size(0, 0)); @@ -283,14 +413,16 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, doub if (win_stride == Size()) win_stride = block_stride; else - CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); init_buffer(img, win_stride); computeBlockHistograms(img); - hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, - detector, (float)free_coef, (float)hit_threshold, labels); + hog::classify_hists(win_size.height, win_size.width, block_stride.height, + block_stride.width, win_stride.height, win_stride.width, + effect_size.height, effect_size.width, block_hists, detector, + (float)free_coef, (float)hit_threshold, labels); labels.download(labels_host); unsigned char *vec = labels_host.ptr(); @@ -306,8 +438,9 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, doub -void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &found_locations, double hit_threshold, - Size win_stride, Size padding, double scale0, int group_threshold) +void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &found_locations, + double hit_threshold, Size win_stride, Size padding, + double scale0, int group_threshold) { CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(scale0 > 1); @@ -333,7 +466,8 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &f if (win_stride == Size()) win_stride = block_stride; else - CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); init_buffer(img, win_stride); image_scale.create(img.size(), img.type()); @@ -347,16 +481,18 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &f } else { - hog::resize( img, image_scale, effect_size); + resize(img, image_scale, effect_size); detect(image_scale, locations, hit_threshold, win_stride, padding); } - Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale)); + Size scaled_win_size(cvRound(win_size.width * scale), + cvRound(win_size.height * scale)); for (size_t j = 0; j < locations.size(); j++) - all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size)); + all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, + scaled_win_size)); } found_locations.assign(all_candidates.begin(), all_candidates.end()); - groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/); + groupRectangles(found_locations, group_threshold, 0.2); } int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) @@ -364,9 +500,11 @@ int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) return (size - part_size + stride) / stride; } -cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride) +cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, + cv::Size stride) { - return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height)); + return Size(numPartsWithin(size.width, part_size.width, stride.width), + numPartsWithin(size.height, part_size.height, stride.height)); } std::vector cv::ocl::HOGDescriptor::getDefaultPeopleDetector() @@ -1547,8 +1685,9 @@ static int power_2up(unsigned int n) return -1; // Input is too big } -void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y, - int nblocks_win_x, int nblocks_win_y) +void cv::ocl::device::hog::set_up_constants(int nbins, + int block_stride_x, int block_stride_y, + int nblocks_win_x, int nblocks_win_y) { cnbins = nbins; cblock_stride_x = block_stride_x; @@ -1559,53 +1698,32 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; cblock_hist_size = block_hist_size; - int block_hist_size_2up = power_2up(block_hist_size); - cblock_hist_size_2up = block_hist_size_2up; - int descr_width = nblocks_win_x * block_hist_size; cdescr_width = descr_width; + cdescr_height = nblocks_win_y; int descr_size = descr_width * nblocks_win_y; cdescr_size = descr_size; } -static inline int divUp(int total, int grain) -{ - return (total + grain - 1) / grain; -} - -static void openCLExecuteKernel_hog(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args) -{ - cl_kernel kernel = openCLGetKernelFromSource(clCxt, source, kernelName); - size_t wave_size = queryDeviceInfo(kernel); - openCLSafeCall(clReleaseKernel(kernel)); - if (wave_size <= 16) - { - char build_options[64]; - sprintf(build_options, (wave_size == 16) ? "-D WAVE_SIZE_16" : "-D WAVE_SIZE_1"); - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, build_options); - } - else - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1); -} - -void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, const cv::ocl::oclMat &grad, - const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists) +void cv::ocl::device::hog::compute_hists(int nbins, + int block_stride_x, int block_stride_y, + int height, int width, float sigma, + const cv::ocl::oclMat &grad, + const cv::ocl::oclMat &qangle, + const cv::ocl::oclMat &gauss_w_lut, + cv::ocl::oclMat &block_hists) { Context *clCxt = Context::getContext(); - string kernelName = "compute_hists_kernel"; vector< pair > args; + string kernelName = (sigma == 4.0f) ? "compute_hists_lut_kernel" : + "compute_hists_kernel"; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; - + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) + / block_stride_x; + int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) + / block_stride_y; int blocks_total = img_block_width * img_block_height; - int blocks_in_group = 4; - size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; - size_t globalThreads[3] = { divUp(blocks_total, blocks_in_group) * localThreads[0], 2, 1 }; int grad_quadstep = grad.step >> 2; int qangle_step = qangle.step; @@ -1613,6 +1731,11 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc // Precompute gaussian spatial window parameter float scale = 1.f / (2.f * sigma * sigma); + int blocks_in_group = 4; + size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; + size_t globalThreads[3] = { + divUp(img_block_width * img_block_height, blocks_in_group) * localThreads[0], 2, 1 }; + int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float); int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float); int smem = (hists_size + final_hists_size) * blocks_in_group; @@ -1628,62 +1751,120 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step)); args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data)); - args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); + if (kernelName.compare("compute_hists_lut_kernel") == 0) + args.push_back( make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data)); + else + args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( smem, (void *)NULL)); - openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, cv::ocl::oclMat &block_hists, float threshold) +void cv::ocl::device::hog::normalize_hists(int nbins, + int block_stride_x, int block_stride_y, + int height, int width, + cv::ocl::oclMat &block_hists, + float threshold) { Context *clCxt = Context::getContext(); - string kernelName = "normalize_hists_kernel"; vector< pair > args; + string kernelName; int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; - int nthreads = power_2up(block_hist_size); - - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; - size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 }; - size_t localThreads[3] = { nthreads, 1, 1 }; - - if ((nthreads < 32) || (nthreads > 512) ) - cv::ocl::error("normalize_hists: histogram's size is too small or too big", __FILE__, __LINE__, "normalize_hists"); + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) + / block_stride_x; + int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) + / block_stride_y; + int nthreads; + size_t globalThreads[3] = { 1, 1, 1 }; + size_t localThreads[3] = { 1, 1, 1 }; + + if ( nbins == 9 ) + { + /* optimized for the case of 9 bins */ + kernelName = "normalize_hists_36_kernel"; + int blocks_in_group = NTHREADS / block_hist_size; + nthreads = blocks_in_group * block_hist_size; + int num_groups = divUp( img_block_width * img_block_height, blocks_in_group); + globalThreads[0] = nthreads * num_groups; + localThreads[0] = nthreads; + } + else + { + kernelName = "normalize_hists_kernel"; + nthreads = power_2up(block_hist_size); + globalThreads[0] = img_block_width * nthreads; + globalThreads[1] = img_block_height; + localThreads[0] = nthreads; + + if ((nthreads < 32) || (nthreads > 512) ) + cv::ocl::error("normalize_hists: histogram's size is too small or too big", + __FILE__, __LINE__, "normalize_hists"); + + args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); + } - args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL)); - openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); + if(hog_device_cpu) + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1, "-D CPU"); + else + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y, - int block_stride_x, int win_stride_y, int win_stride_x, int height, - int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, - float threshold, cv::ocl::oclMat &labels) +void cv::ocl::device::hog::classify_hists(int win_height, int win_width, + int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, + const cv::ocl::oclMat &block_hists, + const cv::ocl::oclMat &coefs, + float free_coef, float threshold, + cv::ocl::oclMat &labels) { Context *clCxt = Context::getContext(); - string kernelName = "classify_hists_kernel"; vector< pair > args; + int nthreads; + string kernelName; + switch (cdescr_width) + { + case 180: + nthreads = 180; + kernelName = "classify_hists_180_kernel"; + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height)); + break; + case 252: + nthreads = 256; + kernelName = "classify_hists_252_kernel"; + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height)); + break; + default: + nthreads = 256; + kernelName = "classify_hists_kernel"; + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + } + int win_block_stride_x = win_stride_x / block_stride_x; int win_block_stride_y = win_stride_y / block_stride_y; int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_height = (height - win_height + win_stride_y) / win_stride_y; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - - size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; - size_t localThreads[3] = { NTHREADS, 1, 1 }; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; + size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 }; + size_t localThreads[3] = { nthreads, 1, 1 }; args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); - args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); - args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&img_win_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x)); @@ -1694,12 +1875,20 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data)); - openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); + if(hog_device_cpu) + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1, "-D CPU"); + else + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, - const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) +void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, + int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, + const cv::ocl::oclMat &block_hists, + cv::ocl::oclMat &descriptors) { Context *clCxt = Context::getContext(); string kernelName = "extract_descrs_by_rows_kernel"; @@ -1709,7 +1898,8 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int win_block_stride_y = win_stride_y / block_stride_y; int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_height = (height - win_height + win_stride_y) / win_stride_y; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; int descriptors_quadstep = descriptors.step >> 2; size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; @@ -1725,12 +1915,16 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, - const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) +void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, + int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, + const cv::ocl::oclMat &block_hists, + cv::ocl::oclMat &descriptors) { Context *clCxt = Context::getContext(); string kernelName = "extract_descrs_by_cols_kernel"; @@ -1740,7 +1934,8 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int win_block_stride_y = win_stride_y / block_stride_y; int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_height = (height - win_height + win_stride_y) / win_stride_y; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; int descriptors_quadstep = descriptors.step >> 2; size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; @@ -1757,11 +1952,16 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) +void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, + const cv::ocl::oclMat &img, + float angle_scale, + cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, + bool correct_gamma) { Context *clCxt = Context::getContext(); string kernelName = "compute_gradients_8UC1_kernel"; @@ -1786,11 +1986,16 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) +void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, + const cv::ocl::oclMat &img, + float angle_scale, + cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, + bool correct_gamma) { Context *clCxt = Context::getContext(); string kernelName = "compute_gradients_8UC4_kernel"; @@ -1816,39 +2021,6 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); -} - -void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz) -{ - CV_Assert( (src.channels() == dst.channels()) ); - Context *clCxt = Context::getContext(); - - string kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel"; - size_t blkSizeX = 16, blkSizeY = 16; - size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX; - size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY; - size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; - size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; - - float ifx = (float)src.cols / sz.width; - float ify = (float)src.rows / sz.height; - int src_step = static_cast(src.step); - int dst_step = static_cast(dst.step); - - vector< pair > args; - args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src_step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width)); - args.push_back( make_pair(sizeof(cl_int), (void *)&sz.height)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ifx)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); - - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); -} + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); +} \ No newline at end of file diff --git a/modules/ocl/src/opencl/objdetect_hog.cl b/modules/ocl/src/opencl/objdetect_hog.cl index 8852facae8..05d538330f 100644 --- a/modules/ocl/src/opencl/objdetect_hog.cl +++ b/modules/ocl/src/opencl/objdetect_hog.cl @@ -43,7 +43,6 @@ // //M*/ - #define CELL_WIDTH 8 #define CELL_HEIGHT 8 #define CELLS_PER_BLOCK_X 2 @@ -51,6 +50,100 @@ #define NTHREADS 256 #define CV_PI_F 3.1415926535897932384626433832795f +//---------------------------------------------------------------------------- +// Histogram computation +// 12 threads for a cell, 12x4 threads per block +// Use pre-computed gaussian and interp_weight lookup tables if sigma is 4.0f +__kernel void compute_hists_lut_kernel( + const int cblock_stride_x, const int cblock_stride_y, + const int cnbins, const int cblock_hist_size, const int img_block_width, + const int blocks_in_group, const int blocks_total, + const int grad_quadstep, const int qangle_step, + __global const float* grad, __global const uchar* qangle, + __global const float* gauss_w_lut, + __global float* block_hists, __local float* smem) +{ + const int lx = get_local_id(0); + const int lp = lx / 24; /* local group id */ + const int gid = get_group_id(0) * blocks_in_group + lp;/* global group id */ + const int gidY = gid / img_block_width; + const int gidX = gid - gidY * img_block_width; + + const int lidX = lx - lp * 24; + const int lidY = get_local_id(1); + + const int cell_x = lidX / 12; + const int cell_y = lidY; + const int cell_thread_x = lidX - cell_x * 12; + + __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X * + CELLS_PER_BLOCK_Y * 12 + CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y); + __local float* final_hist = hists + cnbins * + (CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12); + + const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x; + const int offset_y = gidY * cblock_stride_y + (cell_y << 2); + + __global const float* grad_ptr = (gid < blocks_total) ? + grad + offset_y * grad_quadstep + (offset_x << 1) : grad; + __global const uchar* qangle_ptr = (gid < blocks_total) ? + qangle + offset_y * qangle_step + (offset_x << 1) : qangle; + + __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + + cell_thread_x; + for (int bin_id = 0; bin_id < cnbins; ++bin_id) + hist[bin_id * 48] = 0.f; + + const int dist_x = -4 + cell_thread_x - 4 * cell_x; + const int dist_center_x = dist_x - 4 * (1 - 2 * cell_x); + + const int dist_y_begin = -4 - 4 * lidY; + for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y) + { + float2 vote = (float2) (grad_ptr[0], grad_ptr[1]); + uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]); + + grad_ptr += grad_quadstep; + qangle_ptr += qangle_step; + + int dist_center_y = dist_y - 4 * (1 - 2 * cell_y); + + int idx = (dist_center_y + 8) * 16 + (dist_center_x + 8); + float gaussian = gauss_w_lut[idx]; + idx = (dist_y + 8) * 16 + (dist_x + 8); + float interp_weight = gauss_w_lut[256+idx]; + + hist[bin.x * 48] += gaussian * interp_weight * vote.x; + hist[bin.y * 48] += gaussian * interp_weight * vote.y; + } + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __local float* hist_ = hist; + for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48) + { + if (cell_thread_x < 6) + hist_[0] += hist_[6]; + barrier(CLK_LOCAL_MEM_FENCE); + if (cell_thread_x < 3) + hist_[0] += hist_[3]; +#ifdef CPU + barrier(CLK_LOCAL_MEM_FENCE); +#endif + if (cell_thread_x == 0) + final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = + hist_[0] + hist_[1] + hist_[2]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x; + if ((tid < cblock_hist_size) && (gid < blocks_total)) + { + __global float* block_hist = block_hists + + (gidY * img_block_width + gidX) * cblock_hist_size; + block_hist[tid] = final_hist[tid]; + } +} + //---------------------------------------------------------------------------- // Histogram computation // 12 threads for a cell, 12x4 threads per block @@ -125,16 +218,14 @@ __kernel void compute_hists_kernel( barrier(CLK_LOCAL_MEM_FENCE); if (cell_thread_x < 3) hist_[0] += hist_[3]; -#ifdef WAVE_SIZE_1 +#ifdef CPU barrier(CLK_LOCAL_MEM_FENCE); #endif if (cell_thread_x == 0) final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = hist_[0] + hist_[1] + hist_[2]; } -#ifdef WAVE_SIZE_1 barrier(CLK_LOCAL_MEM_FENCE); -#endif int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x; if ((tid < cblock_hist_size) && (gid < blocks_total)) @@ -145,6 +236,57 @@ __kernel void compute_hists_kernel( } } +//------------------------------------------------------------- +// Normalization of histograms via L2Hys_norm +// optimized for the case of 9 bins +__kernel void normalize_hists_36_kernel(__global float* block_hists, + const float threshold, __local float *squares) +{ + const int tid = get_local_id(0); + const int gid = get_global_id(0); + const int bid = tid / 36; /* block-hist id, (0 - 6) */ + const int boffset = bid * 36; /* block-hist offset in the work-group */ + const int hid = tid - boffset; /* histogram bin id, (0 - 35) */ + + float elem = block_hists[gid]; + squares[tid] = elem * elem; + barrier(CLK_LOCAL_MEM_FENCE); + + __local float* smem = squares + boffset; + float sum = smem[hid]; + if (hid < 18) + smem[hid] = sum = sum + smem[hid + 18]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 9) + smem[hid] = sum = sum + smem[hid + 9]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 4) + smem[hid] = sum + smem[hid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8]; + + elem = elem / (sqrt(sum) + 3.6f); + elem = min(elem, threshold); + + barrier(CLK_LOCAL_MEM_FENCE); + squares[tid] = elem * elem; + barrier(CLK_LOCAL_MEM_FENCE); + + sum = smem[hid]; + if (hid < 18) + smem[hid] = sum = sum + smem[hid + 18]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 9) + smem[hid] = sum = sum + smem[hid + 9]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 4) + smem[hid] = sum + smem[hid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8]; + + block_hists[gid] = elem / (sqrt(sum) + 1e-3f); +} + //------------------------------------------------------------- // Normalization of histograms via L2Hys_norm // @@ -153,76 +295,50 @@ float reduce_smem(volatile __local float* smem, int size) unsigned int tid = get_local_id(0); float sum = smem[tid]; - if (size >= 512) - { - if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; - barrier(CLK_LOCAL_MEM_FENCE); - } - if (size >= 256) - { - if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; - barrier(CLK_LOCAL_MEM_FENCE); - } - if (size >= 128) - { - if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; - barrier(CLK_LOCAL_MEM_FENCE); - } - + if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; + barrier(CLK_LOCAL_MEM_FENCE); } +#ifdef CPU + if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1]; + barrier(CLK_LOCAL_MEM_FENCE); } +#else if (tid < 32) { if (size >= 64) smem[tid] = sum = sum + smem[tid + 32]; -#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1) - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 16) - { -#endif if (size >= 32) smem[tid] = sum = sum + smem[tid + 16]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 8) - { -#endif if (size >= 16) smem[tid] = sum = sum + smem[tid + 8]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 4) - { -#endif if (size >= 8) smem[tid] = sum = sum + smem[tid + 4]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 2) - { -#endif if (size >= 4) smem[tid] = sum = sum + smem[tid + 2]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 1) - { -#endif if (size >= 2) smem[tid] = sum = sum + smem[tid + 1]; } - - barrier(CLK_LOCAL_MEM_FENCE); - sum = smem[0]; +#endif return sum; } -__kernel void normalize_hists_kernel(const int nthreads, const int block_hist_size, const int img_block_width, - __global float* block_hists, const float threshold, __local float *squares) +__kernel void normalize_hists_kernel( + const int nthreads, const int block_hist_size, const int img_block_width, + __global float* block_hists, const float threshold, __local float *squares) { const int tid = get_local_id(0); const int gidX = get_group_id(0); const int gidY = get_group_id(1); - __global float* hist = block_hists + (gidY * img_block_width + gidX) * block_hist_size + tid; + __global float* hist = block_hists + (gidY * img_block_width + gidX) * + block_hist_size + tid; float elem = 0.f; if (tid < block_hist_size) @@ -249,100 +365,226 @@ __kernel void normalize_hists_kernel(const int nthreads, const int block_hist_si //--------------------------------------------------------------------- // Linear SVM based classification -// -__kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr_size, const int cdescr_width, - const int img_win_width, const int img_block_width, - const int win_block_stride_x, const int win_block_stride_y, - __global const float * block_hists, __global const float* coefs, - float free_coef, float threshold, __global uchar* labels) +// 48x96 window, 9 bins and default parameters +// 180 threads, each thread corresponds to a bin in a row +__kernel void classify_hists_180_kernel( + const int cdescr_width, const int cdescr_height, const int cblock_hist_size, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) { const int tid = get_local_id(0); const int gidX = get_group_id(0); const int gidY = get_group_id(1); - __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; float product = 0.f; - for (int i = tid; i < cdescr_size; i += NTHREADS) + + for (int i = 0; i < cdescr_height; i++) { - int offset_y = i / cdescr_width; - int offset_x = i - offset_y * cdescr_width; - product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x]; + product += coefs[i * cdescr_width + tid] * + hist[i * img_block_width * cblock_hist_size + tid]; } - __local float products[NTHREADS]; + __local float products[180]; products[tid] = product; barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 128) products[tid] = product = product + products[tid + 128]; + if (tid < 90) products[tid] = product = product + products[tid + 90]; barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 64) products[tid] = product = product + products[tid + 64]; + if (tid < 45) products[tid] = product = product + products[tid + 45]; barrier(CLK_LOCAL_MEM_FENCE); volatile __local float* smem = products; - if (tid < 32) +#ifdef CPU + if (tid < 13) smem[tid] = product = product + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) smem[tid] = product = product + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<8) smem[tid] = product = product + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<4) smem[tid] = product = product + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<2) smem[tid] = product = product + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); +#else + if (tid < 13) { smem[tid] = product = product + smem[tid + 32]; -#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1) } - barrier(CLK_LOCAL_MEM_FENCE); if (tid < 16) { -#endif smem[tid] = product = product + smem[tid + 16]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 8) - { -#endif smem[tid] = product = product + smem[tid + 8]; -#ifdef WAVE_SIZE_1 + smem[tid] = product = product + smem[tid + 4]; + smem[tid] = product = product + smem[tid + 2]; } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 4) - { #endif - smem[tid] = product = product + smem[tid + 4]; -#ifdef WAVE_SIZE_1 + + if (tid == 0){ + product = product + smem[tid + 1]; + labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); + } +} + +//--------------------------------------------------------------------- +// Linear SVM based classification +// 64x128 window, 9 bins and default parameters +// 256 threads, 252 of them are used +__kernel void classify_hists_252_kernel( + const int cdescr_width, const int cdescr_height, const int cblock_hist_size, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) +{ + const int tid = get_local_id(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + + float product = 0.f; + if (tid < cdescr_width) + { + for (int i = 0; i < cdescr_height; i++) + product += coefs[i * cdescr_width + tid] * + hist[i * img_block_width * cblock_hist_size + tid]; } + + __local float products[NTHREADS]; + + products[tid] = product; + barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 2) - { -#endif + + if (tid < 128) products[tid] = product = product + products[tid + 128]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 64) products[tid] = product = product + products[tid + 64]; + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __local float* smem = products; +#ifdef CPU + if(tid<32) smem[tid] = product = product + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<16) smem[tid] = product = product + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<8) smem[tid] = product = product + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<4) smem[tid] = product = product + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<2) smem[tid] = product = product + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); +#else + if (tid < 32) + { + smem[tid] = product = product + smem[tid + 32]; + smem[tid] = product = product + smem[tid + 16]; + smem[tid] = product = product + smem[tid + 8]; + smem[tid] = product = product + smem[tid + 4]; smem[tid] = product = product + smem[tid + 2]; -#ifdef WAVE_SIZE_1 } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 1) - { #endif - smem[tid] = product = product + smem[tid + 1]; + if (tid == 0){ + product = product + smem[tid + 1]; + labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); + } +} + +//--------------------------------------------------------------------- +// Linear SVM based classification +// 256 threads +__kernel void classify_hists_kernel( + const int cdescr_size, const int cdescr_width, const int cblock_hist_size, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) +{ + const int tid = get_local_id(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + + float product = 0.f; + for (int i = tid; i < cdescr_size; i += NTHREADS) + { + int offset_y = i / cdescr_width; + int offset_x = i - offset_y * cdescr_width; + product += coefs[i] * + hist[offset_y * img_block_width * cblock_hist_size + offset_x]; } - if (tid == 0) + __local float products[NTHREADS]; + + products[tid] = product; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 128) products[tid] = product = product + products[tid + 128]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 64) products[tid] = product = product + products[tid + 64]; + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __local float* smem = products; +#ifdef CPU + if(tid<32) smem[tid] = product = product + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<16) smem[tid] = product = product + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<8) smem[tid] = product = product + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<4) smem[tid] = product = product + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<2) smem[tid] = product = product + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); +#else + if (tid < 32) + { + smem[tid] = product = product + smem[tid + 32]; + smem[tid] = product = product + smem[tid + 16]; + smem[tid] = product = product + smem[tid + 8]; + smem[tid] = product = product + smem[tid + 4]; + smem[tid] = product = product + smem[tid + 2]; + } +#endif + if (tid == 0){ + smem[tid] = product = product + smem[tid + 1]; labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); + } } //---------------------------------------------------------------------------- // Extract descriptors -__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width, - const int img_block_width, const int win_block_stride_x, const int win_block_stride_y, - __global const float* block_hists, __global float* descriptors) +__kernel void extract_descrs_by_rows_kernel( + const int cblock_hist_size, const int descriptors_quadstep, + const int cdescr_size, const int cdescr_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float* block_hists, __global float* descriptors) { int tid = get_local_id(0); int gidX = get_group_id(0); int gidY = get_group_id(1); // Get left top corner of the window in src - __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; // Get left top corner of the window in dst - __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; + __global float* descriptor = descriptors + + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; // Copy elements from src to dst for (int i = tid; i < cdescr_size; i += NTHREADS) @@ -353,19 +595,23 @@ __kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const in } } -__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, - const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x, - const int win_block_stride_y, __global const float* block_hists, __global float* descriptors) +__kernel void extract_descrs_by_cols_kernel( + const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, + const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float* block_hists, __global float* descriptors) { int tid = get_local_id(0); int gidX = get_group_id(0); int gidY = get_group_id(1); // Get left top corner of the window in src - __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; // Get left top corner of the window in dst - __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; + __global float* descriptor = descriptors + + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; // Copy elements from src to dst for (int i = tid; i < cdescr_size; i += NTHREADS) @@ -376,16 +622,19 @@ __kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const in int y = block_idx / cnblocks_win_x; int x = block_idx - y * cnblocks_win_x; - descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block]; + descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = + hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block]; } } //---------------------------------------------------------------------------- // Gradients computation -__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, - const __global uchar4 * img, __global float * grad, __global uchar * qangle, - const float angle_scale, const char correct_gamma, const int cnbins) +__kernel void compute_gradients_8UC4_kernel( + const int height, const int width, + const int img_step, const int grad_quadstep, const int qangle_step, + const __global uchar4 * img, __global float * grad, __global uchar * qangle, + const float angle_scale, const char correct_gamma, const int cnbins) { const int x = get_global_id(0); const int tid = get_local_id(0); @@ -426,8 +675,10 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c barrier(CLK_LOCAL_MEM_FENCE); if (x < width) { - float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], sh_row[tid + 2 * (NTHREADS + 2)]); - float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], sh_row[tid + 2 + 2 * (NTHREADS + 2)]); + float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], + sh_row[tid + 2 * (NTHREADS + 2)]); + float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], + sh_row[tid + 2 + 2 * (NTHREADS + 2)]); float3 dx; if (correct_gamma == 1) @@ -482,9 +733,11 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c } } -__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, - __global const uchar * img, __global float * grad, __global uchar * qangle, - const float angle_scale, const char correct_gamma, const int cnbins) +__kernel void compute_gradients_8UC1_kernel( + const int height, const int width, + const int img_step, const int grad_quadstep, const int qangle_step, + __global const uchar * img, __global float * grad, __global uchar * qangle, + const float angle_scale, const char correct_gamma, const int cnbins) { const int x = get_global_id(0); const int tid = get_local_id(0); @@ -539,43 +792,4 @@ __kernel void compute_gradients_8UC1_kernel(const int height, const int width, c grad[ (gidY * grad_quadstep + x) << 1 ] = mag * (1.f - ang); grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang; } -} - -//---------------------------------------------------------------------------- -// Resize - -__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src, - int dst_offset, int src_offset, int dst_step, int src_step, - int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify ) -{ - int dx = get_global_id(0); - int dy = get_global_id(1); - - int sx = (int)floor(dx*ifx+0.5f); - int sy = (int)floor(dy*ify+0.5f); - sx = min(sx, src_cols-1); - sy = min(sy, src_rows-1); - int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx; - int spos = (src_offset>>2) + sy * (src_step>>2) + sx; - - if(dx Date: Wed, 19 Jun 2013 11:31:42 +0800 Subject: [PATCH 17/27] Fix cmake path finding for amd libs. There is no WIN64 defined in the environment. --- cmake/OpenCVDetectOpenCL.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake index a1e8bbac70..2c96274a8c 100644 --- a/cmake/OpenCVDetectOpenCL.cmake +++ b/cmake/OpenCVDetectOpenCL.cmake @@ -44,7 +44,7 @@ if(OPENCL_FOUND) set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR}) set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) - if(WIN64) + if(WIN32 AND X86_64) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import) elseif(WIN32) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) From 2c198f6cd6802ebfc8d7216f2b06b7c7fb42f6b9 Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 19 Jun 2013 13:03:35 +0800 Subject: [PATCH 18/27] revise accuracy and perf tests --- modules/ocl/perf/main.cpp | 2 + .../perf_calib3d.cpp} | 91 ++++---- modules/ocl/perf/perf_filters.cpp | 16 +- modules/ocl/perf/perf_hog.cpp | 76 +------ modules/ocl/perf/perf_imgproc.cpp | 46 +++- .../{perf_columnsum.cpp => perf_moments.cpp} | 62 ++--- modules/ocl/perf/precomp.cpp | 14 -- modules/ocl/test/test_haar.cpp | 180 --------------- modules/ocl/test/test_imgproc.cpp | 46 +++- .../test/{test_hog.cpp => test_objdetect.cpp} | 215 ++++++++++-------- .../{test_pyrdown.cpp => test_pyramids.cpp} | 44 +++- modules/ocl/test/test_pyrup.cpp | 91 -------- modules/ocl/test/utility.cpp | 102 ++++----- modules/ocl/test/utility.hpp | 11 +- 14 files changed, 387 insertions(+), 609 deletions(-) rename modules/ocl/{test/test_columnsum.cpp => perf/perf_calib3d.cpp} (65%) rename modules/ocl/perf/{perf_columnsum.cpp => perf_moments.cpp} (68%) delete mode 100644 modules/ocl/test/test_haar.cpp rename modules/ocl/test/{test_hog.cpp => test_objdetect.cpp} (51%) rename modules/ocl/test/{test_pyrdown.cpp => test_pyramids.cpp} (75%) delete mode 100644 modules/ocl/test/test_pyrup.cpp diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index 2da17755eb..dfcac20bc0 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -52,6 +52,8 @@ int main(int argc, const char *argv[]) cerr << "no device found\n"; return -1; } + // set this to overwrite binary cache every time the test starts + ocl::setBinaryDiskCache(ocl::CACHE_UPDATE); int devidx = 0; diff --git a/modules/ocl/test/test_columnsum.cpp b/modules/ocl/perf/perf_calib3d.cpp similarity index 65% rename from modules/ocl/test/test_columnsum.cpp rename to modules/ocl/perf/perf_calib3d.cpp index 231f0657b0..f998ddf0f3 100644 --- a/modules/ocl/test/test_columnsum.cpp +++ b/modules/ocl/perf/perf_calib3d.cpp @@ -15,8 +15,8 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Chunpeng Zhang chunpeng@multicorewareinc.com -// +// Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -31,7 +31,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -45,50 +45,57 @@ //M*/ #include "precomp.hpp" -#include +///////////// StereoMatchBM //////////////////////// +PERFTEST(StereoMatchBM) +{ + Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE); + Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE); + Mat disp,dst; + ocl::oclMat d_left, d_right,d_disp; + int n_disp= 128; + int winSize =19; -#ifdef HAVE_OPENCL + SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg "; -PARAM_TEST_CASE(ColumnSum, cv::Size) -{ - cv::Size size; - cv::Mat src; + StereoBM bm(0, n_disp, winSize); + bm(left_image, right_image, dst); - virtual void SetUp() - { - size = GET_PARAM(0); - } -}; + CPU_ON; + bm(left_image, right_image, dst); + CPU_OFF; -TEST_P(ColumnSum, Accuracy) -{ - cv::Mat src = randomMat(size, CV_32FC1); - cv::ocl::oclMat d_dst; - cv::ocl::oclMat d_src(src); - - cv::ocl::columnSum(d_src, d_dst); - - cv::Mat dst(d_dst); - - for (int j = 0; j < src.cols; ++j) - { - float gold = src.at(0, j); - float res = dst.at(0, j); - ASSERT_NEAR(res, gold, 1e-5); - } - - for (int i = 1; i < src.rows; ++i) - { - for (int j = 0; j < src.cols; ++j) - { - float gold = src.at(i, j) += src.at(i - 1, j); - float res = dst.at(i, j); - ASSERT_NEAR(res, gold, 1e-5); - } - } + d_left.upload(left_image); + d_right.upload(right_image); + + ocl::StereoBM_OCL d_bm(0, n_disp, winSize); + + WARMUP_ON; + d_bm(d_left, d_right, d_disp); + WARMUP_OFF; + + cv::Mat ocl_mat; + d_disp.download(ocl_mat); + ocl_mat.convertTo(ocl_mat, dst.type()); + + GPU_ON; + d_bm(d_left, d_right, d_disp); + GPU_OFF; + + GPU_FULL_ON; + d_left.upload(left_image); + d_right.upload(right_image); + d_bm(d_left, d_right, d_disp); + d_disp.download(disp); + GPU_FULL_OFF; + + TestSystem::instance().setAccurate(-1, 0.); } -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES); -#endif + + + + + + \ No newline at end of file diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index a05301b34c..e988ce09d6 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -284,6 +284,7 @@ PERFTEST(GaussianBlur) Mat src, dst, ocl_dst; int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; + const int ksize = 7; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { @@ -291,29 +292,28 @@ PERFTEST(GaussianBlur) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; - gen(src, size, size, all_type[j], 5, 16); + gen(src, size, size, all_type[j], 0, 256); - GaussianBlur(src, dst, Size(9, 9), 0); + GaussianBlur(src, dst, Size(ksize, ksize), 0); CPU_ON; - GaussianBlur(src, dst, Size(9, 9), 0); + GaussianBlur(src, dst, Size(ksize, ksize), 0); CPU_OFF; ocl::oclMat d_src(src); - ocl::oclMat d_dst(src.size(), src.type()); - ocl::oclMat d_buf; + ocl::oclMat d_dst; WARMUP_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0); WARMUP_OFF; GPU_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0); GPU_OFF; GPU_FULL_ON; d_src.upload(src); - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0); d_dst.download(ocl_dst); GPU_FULL_OFF; diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp index 05093811fe..7daa61396c 100644 --- a/modules/ocl/perf/perf_hog.cpp +++ b/modules/ocl/perf/perf_hog.cpp @@ -46,11 +46,6 @@ #include "precomp.hpp" ///////////// HOG//////////////////////// -bool match_rect(cv::Rect r1, cv::Rect r2, int threshold) -{ - return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) && - (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold)); -} PERFTEST(HOG) { @@ -61,13 +56,12 @@ PERFTEST(HOG) throw runtime_error("can't open road.png"); } - cv::HOGDescriptor hog; hog.setSVMDetector(hog.getDefaultPeopleDetector()); std::vector found_locations; std::vector d_found_locations; - SUBTEST << 768 << 'x' << 576 << "; road.png"; + SUBTEST << src.cols << 'x' << src.rows << "; road.png"; hog.detectMultiScale(src, found_locations); @@ -84,70 +78,10 @@ PERFTEST(HOG) ocl_hog.detectMultiScale(d_src, d_found_locations); WARMUP_OFF; - // Ground-truth rectangular people window - cv::Rect win1_64x128(231, 190, 72, 144); - cv::Rect win2_64x128(621, 156, 97, 194); - cv::Rect win1_48x96(238, 198, 63, 126); - cv::Rect win2_48x96(619, 161, 92, 185); - cv::Rect win3_48x96(488, 136, 56, 112); - - // Compare whether ground-truth windows are detected and compare the number of windows detected. - std::vector d_comp(4); - std::vector comp(4); - for(int i = 0; i < (int)d_comp.size(); i++) - { - d_comp[i] = 0; - comp[i] = 0; - } - - int threshold = 10; - int val = 32; - d_comp[0] = (int)d_found_locations.size(); - comp[0] = (int)found_locations.size(); - - cv::Size winSize = hog.winSize; - - if (winSize == cv::Size(48, 96)) - { - for(int i = 0; i < (int)d_found_locations.size(); i++) - { - if (match_rect(d_found_locations[i], win1_48x96, threshold)) - d_comp[1] = val; - if (match_rect(d_found_locations[i], win2_48x96, threshold)) - d_comp[2] = val; - if (match_rect(d_found_locations[i], win3_48x96, threshold)) - d_comp[3] = val; - } - for(int i = 0; i < (int)found_locations.size(); i++) - { - if (match_rect(found_locations[i], win1_48x96, threshold)) - comp[1] = val; - if (match_rect(found_locations[i], win2_48x96, threshold)) - comp[2] = val; - if (match_rect(found_locations[i], win3_48x96, threshold)) - comp[3] = val; - } - } - else if (winSize == cv::Size(64, 128)) - { - for(int i = 0; i < (int)d_found_locations.size(); i++) - { - if (match_rect(d_found_locations[i], win1_64x128, threshold)) - d_comp[1] = val; - if (match_rect(d_found_locations[i], win2_64x128, threshold)) - d_comp[2] = val; - } - for(int i = 0; i < (int)found_locations.size(); i++) - { - if (match_rect(found_locations[i], win1_64x128, threshold)) - comp[1] = val; - if (match_rect(found_locations[i], win2_64x128, threshold)) - comp[2] = val; - } - } - - cv::Mat gpu_rst(d_comp), cpu_rst(comp); - TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3); + if(d_found_locations.size() == found_locations.size()) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, abs((int)found_locations.size() - (int)d_found_locations.size())); GPU_ON; ocl_hog.detectMultiScale(d_src, found_locations); diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index e87e8213de..b330c5ffae 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -743,12 +743,12 @@ PERFTEST(meanShiftFiltering) WARMUP_OFF; GPU_ON; - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit); GPU_OFF; GPU_FULL_ON; d_src.upload(src); - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit); d_dst.download(ocl_dst); GPU_FULL_OFF; @@ -969,3 +969,45 @@ PERFTEST(CLAHE) } } } + +///////////// columnSum//////////////////////// +PERFTEST(columnSum) +{ + Mat src, dst, ocl_dst; + ocl::oclMat d_src, d_dst; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; CV_32FC1"; + + gen(src, size, size, CV_32FC1, 0, 256); + + CPU_ON; + dst.create(src.size(), src.type()); + for (int j = 0; j < src.cols; j++) + dst.at(0, j) = src.at(0, j); + + for (int i = 1; i < src.rows; ++i) + for (int j = 0; j < src.cols; ++j) + dst.at(i, j) = dst.at(i - 1 , j) + src.at(i , j); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::columnSum(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::columnSum(d_src, d_dst); + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::columnSum(d_src, d_dst); + d_dst.download(ocl_dst); + GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1); + } +} diff --git a/modules/ocl/perf/perf_columnsum.cpp b/modules/ocl/perf/perf_moments.cpp similarity index 68% rename from modules/ocl/perf/perf_columnsum.cpp rename to modules/ocl/perf/perf_moments.cpp index ff7ebcd1de..7fa3948dec 100644 --- a/modules/ocl/perf/perf_columnsum.cpp +++ b/modules/ocl/perf/perf_moments.cpp @@ -44,45 +44,49 @@ // //M*/ #include "precomp.hpp" - -///////////// columnSum//////////////////////// -PERFTEST(columnSum) +///////////// Moments //////////////////////// +PERFTEST(Moments) { - Mat src, dst, ocl_dst; - ocl::oclMat d_src, d_dst; + Mat src; + bool binaryImage = 0; + + int all_type[] = {CV_8UC1, CV_16SC1, CV_32FC1, CV_64FC1}; + std::string type_name[] = {"CV_8UC1", "CV_16SC1", "CV_32FC1", "CV_64FC1"}; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - SUBTEST << size << 'x' << size << "; CV_32FC1"; + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j]; + + gen(src, size, size, all_type[j], 0, 256); + + cv::Moments CvMom = moments(src, binaryImage); - gen(src, size, size, CV_32FC1, 0, 256); + CPU_ON; + moments(src, binaryImage); + CPU_OFF; - CPU_ON; - dst.create(src.size(), src.type()); - for (int j = 0; j < src.cols; j++) - dst.at(0, j) = src.at(0, j); + cv::Moments oclMom; + WARMUP_ON; + oclMom = ocl::ocl_moments(src, binaryImage); + WARMUP_OFF; - for (int i = 1; i < src.rows; ++i) - for (int j = 0; j < src.cols; ++j) - dst.at(i, j) = dst.at(i - 1 , j) + src.at(i , j); - CPU_OFF; + Mat gpu_dst, cpu_dst; + HuMoments(CvMom, cpu_dst); + HuMoments(oclMom, gpu_dst); - d_src.upload(src); + GPU_ON; + ocl::ocl_moments(src, binaryImage); + GPU_OFF; - WARMUP_ON; - ocl::columnSum(d_src, d_dst); - WARMUP_OFF; + GPU_FULL_ON; + ocl::ocl_moments(src, binaryImage); + GPU_FULL_OFF; - GPU_ON; - ocl::columnSum(d_src, d_dst); - GPU_OFF; + TestSystem::instance().ExpectedMatNear(gpu_dst, cpu_dst, .5); - GPU_FULL_ON; - d_src.upload(src); - ocl::columnSum(d_src, d_dst); - d_dst.download(ocl_dst); - GPU_FULL_OFF; + } - TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1); } -} \ No newline at end of file +} diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp index 71a13a1ee2..9fc634290e 100644 --- a/modules/ocl/perf/precomp.cpp +++ b/modules/ocl/perf/precomp.cpp @@ -331,20 +331,6 @@ void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time, cout << setiosflags(ios_base::left); stringstream stream; -#if 0 - if(is_accurate == 1) - stream << "Pass"; - else if(is_accurate_ == 0) - stream << "Fail"; - else if(is_accurate == -1) - stream << " "; - else - { - std::cout<<"is_accurate errer: "< faces, oclfaces; - - Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); - MemStorage storage(cvCreateMemStorage(0)); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - cv::ocl::oclMat image; - CvSeq *_objects; - image.upload(smallImg); - _objects = cascade.oclHaarDetectObjects( image, storage, 1.1, - 3, flags, Size(30, 30), Size(0, 0) ); - vector vecAvgComp; - Seq(_objects).copyTo(vecAvgComp); - oclfaces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); - - cpucascade.detectMultiScale( smallImg, faces, 1.1, 3, - flags, - Size(30, 30), Size(0, 0) ); - EXPECT_EQ(faces.size(), oclfaces.size()); -} - -TEST_P(Haar, FaceDetectUseBuf) -{ - string imgName = workdir + "lena.jpg"; - Mat img = imread( imgName, 1 ); - - if(img.empty()) - { - std::cout << "Couldn't read " << imgName << std::endl; - return ; - } - - vector faces, oclfaces; - - Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - cv::ocl::oclMat image; - image.upload(smallImg); - - cv::ocl::OclCascadeClassifierBuf cascadebuf; - if( !cascadebuf.load( cascadeName ) ) - { - cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl; - return; - } - cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3, - flags, - Size(30, 30), Size(0, 0) ); - - cpucascade.detectMultiScale( smallImg, faces, 1.1, 3, - flags, - Size(30, 30), Size(0, 0) ); - EXPECT_EQ(faces.size(), oclfaces.size()); - - // intentionally run ocl facedetect again and check if it still works after the first run - cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3, - flags, - Size(30, 30)); - cascadebuf.release(); - EXPECT_EQ(faces.size(), oclfaces.size()); -} - -INSTANTIATE_TEST_CASE_P(FaceDetect, Haar, - Combine(Values(1.0), - Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2))); - -#endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index b9f4740b17..3a98671d51 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -1573,6 +1573,47 @@ TEST_P(Convolve, Mat) } } +//////////////////////////////// ColumnSum ////////////////////////////////////// +PARAM_TEST_CASE(ColumnSum, cv::Size) +{ + cv::Size size; + cv::Mat src; + + virtual void SetUp() + { + size = GET_PARAM(0); + } +}; + +TEST_P(ColumnSum, Accuracy) +{ + cv::Mat src = randomMat(size, CV_32FC1); + cv::ocl::oclMat d_dst; + cv::ocl::oclMat d_src(src); + + cv::ocl::columnSum(d_src, d_dst); + + cv::Mat dst(d_dst); + + for (int j = 0; j < src.cols; ++j) + { + float gold = src.at(0, j); + float res = dst.at(0, j); + ASSERT_NEAR(res, gold, 1e-5); + } + + for (int i = 1; i < src.rows; ++i) + { + for (int j = 0; j < src.cols; ++j) + { + float gold = src.at(i, j) += src.at(i - 1, j); + float res = dst.at(i, j); + ASSERT_NEAR(res, gold, 1e-5); + } + } +} +///////////////////////////////////////////////////////////////////////////////////// + INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( ONE_TYPE(CV_8UC1), NULL_TYPE, @@ -1688,7 +1729,6 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine( Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)), Values(0.0, 40.0))); -//INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine( -// Values(CV_32FC1, CV_32FC1), -// Values(false))); // Values(false) is the reserved parameter +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES); + #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_hog.cpp b/modules/ocl/test/test_objdetect.cpp similarity index 51% rename from modules/ocl/test/test_hog.cpp rename to modules/ocl/test/test_objdetect.cpp index cfc4e3963f..86590f7981 100644 --- a/modules/ocl/test/test_hog.cpp +++ b/modules/ocl/test/test_objdetect.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Wenju He, wenju@multicorewareinc.com +// Yao Wang, bitwangyaoyao@gmail.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,51 +45,58 @@ #include "precomp.hpp" #include "opencv2/core/core.hpp" -using namespace std; +#include "opencv2/objdetect/objdetect.hpp" + +using namespace cv; +using namespace testing; #ifdef HAVE_OPENCL extern string workdir; -PARAM_TEST_CASE(HOG, cv::Size, int) + +///////////////////// HOG ///////////////////////////// +PARAM_TEST_CASE(HOG, Size, int) { - cv::Size winSize; + Size winSize; int type; + Mat img_rgb; virtual void SetUp() { winSize = GET_PARAM(0); type = GET_PARAM(1); + img_rgb = readImage(workdir + "../gpu/road.png"); + if(img_rgb.empty()) + { + std::cout << "Couldn't read road.png" << std::endl; + } } }; TEST_P(HOG, GetDescriptors) { - // Load image - cv::Mat img_rgb = readImage(workdir + "lena.jpg"); - ASSERT_FALSE(img_rgb.empty()); - // Convert image - cv::Mat img; + Mat img; switch (type) { case CV_8UC1: - cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + cvtColor(img_rgb, img, CV_BGR2GRAY); break; case CV_8UC4: default: - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + cvtColor(img_rgb, img, CV_BGR2BGRA); break; } - cv::ocl::oclMat d_img(img); + ocl::oclMat d_img(img); // HOGs - cv::ocl::HOGDescriptor ocl_hog; + ocl::HOGDescriptor ocl_hog; ocl_hog.gamma_correction = true; - cv::HOGDescriptor hog; + HOGDescriptor hog; hog.gammaCorrection = true; // Compute descriptor - cv::ocl::oclMat d_descriptors; + ocl::oclMat d_descriptors; ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL); - cv::Mat down_descriptors; + Mat down_descriptors; d_descriptors.download(down_descriptors); down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows); @@ -105,45 +112,34 @@ TEST_P(HOG, GetDescriptors) hog.compute(img_rgb, descriptors, ocl_hog.win_size); break; } - cv::Mat cpu_descriptors(descriptors); + Mat cpu_descriptors(descriptors); EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2); } - -bool match_rect(cv::Rect r1, cv::Rect r2, int threshold) -{ - return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) && - (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold)); -} - TEST_P(HOG, Detect) { - // Load image - cv::Mat img_rgb = readImage(workdir + "lena.jpg"); - ASSERT_FALSE(img_rgb.empty()); - // Convert image - cv::Mat img; + Mat img; switch (type) { case CV_8UC1: - cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + cvtColor(img_rgb, img, CV_BGR2GRAY); break; case CV_8UC4: default: - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + cvtColor(img_rgb, img, CV_BGR2BGRA); break; } - cv::ocl::oclMat d_img(img); + ocl::oclMat d_img(img); // HOGs - if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128))) - winSize = cv::Size(64, 128); - cv::ocl::HOGDescriptor ocl_hog(winSize); + if ((winSize != Size(48, 96)) && (winSize != Size(64, 128))) + winSize = Size(64, 128); + ocl::HOGDescriptor ocl_hog(winSize); ocl_hog.gamma_correction = true; - cv::HOGDescriptor hog; + HOGDescriptor hog; hog.winSize = winSize; hog.gammaCorrection = true; @@ -165,88 +161,117 @@ TEST_P(HOG, Detect) } // OpenCL detection - std::vector d_found; - ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + std::vector d_found; + ocl_hog.detectMultiScale(d_img, d_found, 0, Size(8, 8), Size(0, 0), 1.05, 6); // CPU detection - std::vector found; + std::vector found; switch (type) { case CV_8UC1: - hog.detectMultiScale(img, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + hog.detectMultiScale(img, found, 0, Size(8, 8), Size(0, 0), 1.05, 6); break; case CV_8UC4: default: - hog.detectMultiScale(img_rgb, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + hog.detectMultiScale(img_rgb, found, 0, Size(8, 8), Size(0, 0), 1.05, 6); break; } - // Ground-truth rectangular people window - cv::Rect win1_64x128(231, 190, 72, 144); - cv::Rect win2_64x128(621, 156, 97, 194); - cv::Rect win1_48x96(238, 198, 63, 126); - cv::Rect win2_48x96(619, 161, 92, 185); - cv::Rect win3_48x96(488, 136, 56, 112); - - // Compare whether ground-truth windows are detected and compare the number of windows detected. - std::vector d_comp(4); - std::vector comp(4); - for(int i = 0; i < (int)d_comp.size(); i++) - { - d_comp[i] = 0; - comp[i] = 0; - } + EXPECT_LT(checkRectSimilarity(img.size(), found, d_found), 1.0); +} - int threshold = 10; - int val = 32; - d_comp[0] = (int)d_found.size(); - comp[0] = (int)found.size(); - if (winSize == cv::Size(48, 96)) + +INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine( + testing::Values(Size(64, 128), Size(48, 96)), + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)))); + +///////////////////////////// Haar ////////////////////////////// +IMPLEMENT_PARAM_CLASS(CascadeName, std::string); +CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml")); +CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml")); +struct getRect +{ + Rect operator ()(const CvAvgComp &e) const { - for(int i = 0; i < (int)d_found.size(); i++) - { - if (match_rect(d_found[i], win1_48x96, threshold)) - d_comp[1] = val; - if (match_rect(d_found[i], win2_48x96, threshold)) - d_comp[2] = val; - if (match_rect(d_found[i], win3_48x96, threshold)) - d_comp[3] = val; - } - for(int i = 0; i < (int)found.size(); i++) - { - if (match_rect(found[i], win1_48x96, threshold)) - comp[1] = val; - if (match_rect(found[i], win2_48x96, threshold)) - comp[2] = val; - if (match_rect(found[i], win3_48x96, threshold)) - comp[3] = val; - } + return e.rect; } - else if (winSize == cv::Size(64, 128)) +}; + +PARAM_TEST_CASE(Haar, int, CascadeName) +{ + ocl::OclCascadeClassifier cascade, nestedCascade; + CascadeClassifier cpucascade, cpunestedCascade; + + int flags; + std::string cascadeName; + vector faces, oclfaces; + Mat img; + ocl::oclMat d_img; + + virtual void SetUp() { - for(int i = 0; i < (int)d_found.size(); i++) + flags = GET_PARAM(0); + cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(1)); + if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) ) { - if (match_rect(d_found[i], win1_64x128, threshold)) - d_comp[1] = val; - if (match_rect(d_found[i], win2_64x128, threshold)) - d_comp[2] = val; + std::cout << "ERROR: Could not load classifier cascade" << std::endl; + return; } - for(int i = 0; i < (int)found.size(); i++) + img = readImage(workdir + "lena.jpg", IMREAD_GRAYSCALE); + if(img.empty()) { - if (match_rect(found[i], win1_64x128, threshold)) - comp[1] = val; - if (match_rect(found[i], win2_64x128, threshold)) - comp[2] = val; + std::cout << "Couldn't read lena.jpg" << std::endl; + return ; } + equalizeHist(img, img); + d_img.upload(img); } +}; - EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3); +TEST_P(Haar, FaceDetect) +{ + MemStorage storage(cvCreateMemStorage(0)); + CvSeq *_objects; + _objects = cascade.oclHaarDetectObjects(d_img, storage, 1.1, 3, + flags, Size(30, 30), Size(0, 0)); + vector vecAvgComp; + Seq(_objects).copyTo(vecAvgComp); + oclfaces.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); + + cpucascade.detectMultiScale(img, faces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); + + EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0); } +TEST_P(Haar, FaceDetectUseBuf) +{ + ocl::OclCascadeClassifierBuf cascadebuf; + if(!cascadebuf.load(cascadeName)) + { + std::cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << std::endl; + return; + } + cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); + cpucascade.detectMultiScale(img, faces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); -INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine( - testing::Values(cv::Size(64, 128), cv::Size(48, 96)), - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)))); + // intentionally run ocl facedetect again and check if it still works after the first run + cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3, + flags, + Size(30, 30)); + cascadebuf.release(); + + EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0); +} +INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, Haar, + Combine(Values(CV_HAAR_SCALE_IMAGE, 0), + Values(cascade_frontalface_alt/*, cascade_frontalface_alt2*/))); -#endif //HAVE_OPENCL +#endif //HAVE_OPENCL \ No newline at end of file diff --git a/modules/ocl/test/test_pyrdown.cpp b/modules/ocl/test/test_pyramids.cpp similarity index 75% rename from modules/ocl/test/test_pyrdown.cpp rename to modules/ocl/test/test_pyramids.cpp index 6d00fb5e45..1bd188dea6 100644 --- a/modules/ocl/test/test_pyrdown.cpp +++ b/modules/ocl/test/test_pyramids.cpp @@ -15,7 +15,6 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Dachuan Zhao, dachuan@multicorewareinc.com // Yao Wang yao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, @@ -56,11 +55,12 @@ using namespace cvtest; using namespace testing; using namespace std; -PARAM_TEST_CASE(PyrDown, MatType, int) +PARAM_TEST_CASE(PyrBase, MatType, int) { int type; int channels; - + Mat dst_cpu; + oclMat gdst; virtual void SetUp() { type = GET_PARAM(0); @@ -69,19 +69,19 @@ PARAM_TEST_CASE(PyrDown, MatType, int) }; +/////////////////////// PyrDown ////////////////////////// +struct PyrDown : PyrBase {}; TEST_P(PyrDown, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { - cv::Size size(MWIDTH, MHEIGHT); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Mat src = randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false); - - cv::ocl::oclMat gsrc(src), gdst; - cv::Mat dst_cpu; - cv::pyrDown(src, dst_cpu); - cv::ocl::pyrDown(gsrc, gdst); + Size size(MWIDTH, MHEIGHT); + Mat src = randomMat(size, CV_MAKETYPE(type, channels)); + oclMat gsrc(src); + + pyrDown(src, dst_cpu); + pyrDown(gsrc, gdst); EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f); } @@ -90,5 +90,27 @@ TEST_P(PyrDown, Mat) INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine( Values(CV_8U, CV_32F), Values(1, 3, 4))); +/////////////////////// PyrUp ////////////////////////// + +struct PyrUp : PyrBase {}; + +TEST_P(PyrUp, Accuracy) +{ + for(int j = 0; j < LOOP_TIMES; j++) + { + Size size(MWIDTH, MHEIGHT); + Mat src = randomMat(size, CV_MAKETYPE(type, channels)); + oclMat gsrc(src); + + pyrUp(src, dst_cpu); + pyrUp(gsrc, gdst); + + EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (type == CV_32F ? 1e-4f : 1.0)); + } +} + + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine( + Values(CV_8U, CV_32F), Values(1, 3, 4))); #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_pyrup.cpp b/modules/ocl/test/test_pyrup.cpp deleted file mode 100644 index afd3e8b1b8..0000000000 --- a/modules/ocl/test/test_pyrup.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Zhang Chunpeng chunpeng@multicorewareinc.com -// Yao Wang yao@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "opencv2/core/core.hpp" - -#ifdef HAVE_OPENCL - -using namespace cv; -using namespace cvtest; -using namespace testing; -using namespace std; - -PARAM_TEST_CASE(PyrUp, MatType, int) -{ - int type; - int channels; - - virtual void SetUp() - { - type = GET_PARAM(0); - channels = GET_PARAM(1); - } -}; - -TEST_P(PyrUp, Accuracy) -{ - for(int j = 0; j < LOOP_TIMES; j++) - { - Size size(MWIDTH, MHEIGHT); - Mat src = randomMat(size, CV_MAKETYPE(type, channels)); - Mat dst_gold; - pyrUp(src, dst_gold); - ocl::oclMat dst; - ocl::oclMat srcMat(src); - ocl::pyrUp(srcMat, dst); - - EXPECT_MAT_NEAR(dst_gold, Mat(dst), (type == CV_32F ? 1e-4f : 1.0)); - } - -} - - -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine( - Values(CV_8U, CV_32F), Values(1, 3, 4))); - - -#endif // HAVE_OPENCL \ No newline at end of file diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp index 4b21081a8b..27f9cec079 100644 --- a/modules/ocl/test/utility.cpp +++ b/modules/ocl/test/utility.cpp @@ -100,12 +100,6 @@ Mat randomMat(Size size, int type, double minVal, double maxVal) return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); } - - - - - - /* void showDiff(InputArray gold_, InputArray actual_, double eps) { @@ -137,58 +131,7 @@ void showDiff(InputArray gold_, InputArray actual_, double eps) } */ -/* -bool supportFeature(const DeviceInfo& info, FeatureSet feature) -{ - return TargetArchs::builtWith(feature) && info.supports(feature); -} - -const vector& devices() -{ - static vector devs; - static bool first = true; - - if (first) - { - int deviceCount = getCudaEnabledDeviceCount(); - - devs.reserve(deviceCount); - - for (int i = 0; i < deviceCount; ++i) - { - DeviceInfo info(i); - if (info.isCompatible()) - devs.push_back(info); - } - - first = false; - } - - return devs; -} -vector devices(FeatureSet feature) -{ - const vector& d = devices(); - - vector devs_filtered; - - if (TargetArchs::builtWith(feature)) - { - devs_filtered.reserve(d.size()); - - for (size_t i = 0, size = d.size(); i < size; ++i) - { - const DeviceInfo& info = d[i]; - - if (info.supports(feature)) - devs_filtered.push_back(info); - } - } - - return devs_filtered; -} -*/ vector types(int depth_start, int depth_end, int cn_start, int cn_end) { @@ -264,3 +207,48 @@ void PrintTo(const Inverse &inverse, std::ostream *os) (*os) << "direct"; } +double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& ob2) +{ + double final_test_result = 0.0; + size_t sz1 = ob1.size(); + size_t sz2 = ob2.size(); + + if(sz1 != sz2) + { + return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + } + else + { + if(sz1==0 && sz2==0) + return 0; + cv::Mat cpu_result(sz, CV_8UC1); + cpu_result.setTo(0); + + for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) + { + cv::Mat cpu_result_roi(cpu_result, *r); + cpu_result_roi.setTo(1); + cpu_result.copyTo(cpu_result); + } + int cpu_area = cv::countNonZero(cpu_result > 0); + + cv::Mat gpu_result(sz, CV_8UC1); + gpu_result.setTo(0); + for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) + { + cv::Mat gpu_result_roi(gpu_result, *r2); + gpu_result_roi.setTo(1); + gpu_result.copyTo(gpu_result); + } + + cv::Mat result_; + multiply(cpu_result, gpu_result, result_); + int result = cv::countNonZero(result_ > 0); + if(cpu_area!=0 && result!=0) + final_test_result = 1.0 - (double)result/(double)cpu_area; + else if(cpu_area==0 && result!=0) + final_test_result = -1; + } + return final_test_result; +} + diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp index 42fa69384d..0b101ec50b 100644 --- a/modules/ocl/test/utility.hpp +++ b/modules/ocl/test/utility.hpp @@ -55,13 +55,12 @@ cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); -//! return true if device supports specified feature and gpu module was built with support the feature. -//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature); +// This function test if gpu_rst matches cpu_rst. +// If the two vectors are not equal, it will return the difference in vector size +// Else it will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) +// The smaller, the better matched +double checkRectSimilarity(cv::Size sz, std::vector& ob1, std::vector& ob2); -//! return all devices compatible with current gpu module build. -//const std::vector& devices(); -//! return all devices compatible with current gpu module build which support specified feature. -//std::vector devices(cv::gpu::FeatureSet feature); //! read image from testdata folder. cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR); From f1c549fabf2d916df306a889137de49f3ef338d5 Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 19 Jun 2013 16:36:23 +0800 Subject: [PATCH 19/27] revise ocl samples, add tvl1 sample --- samples/ocl/facedetect.cpp | 159 ++++++++------ samples/ocl/hog.cpp | 333 +++++++++++------------------ samples/ocl/pyrlk_optical_flow.cpp | 59 +++-- samples/ocl/squares.cpp | 232 ++++++++++++++++---- samples/ocl/stereo_match.cpp | 306 ++++++++++++-------------- samples/ocl/surf_matcher.cpp | 205 +++++++----------- samples/ocl/tvl1_optical_flow.cpp | 265 +++++++++++++++++++++++ 7 files changed, 919 insertions(+), 640 deletions(-) create mode 100644 samples/ocl/tvl1_optical_flow.cpp diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index 684c2d923b..a49610aeb7 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -7,55 +7,67 @@ using namespace std; using namespace cv; -#define LOOP_NUM 10 +#define LOOP_NUM 10 const static Scalar colors[] = { CV_RGB(0,0,255), - CV_RGB(0,128,255), - CV_RGB(0,255,255), - CV_RGB(0,255,0), - CV_RGB(255,128,0), - CV_RGB(255,255,0), - CV_RGB(255,0,0), - CV_RGB(255,0,255)} ; + CV_RGB(0,128,255), + CV_RGB(0,255,255), + CV_RGB(0,255,0), + CV_RGB(255,128,0), + CV_RGB(255,255,0), + CV_RGB(255,0,0), + CV_RGB(255,0,255) + } ; + int64 work_begin = 0; int64 work_end = 0; +string outputName; -static void workBegin() -{ +static void workBegin() +{ work_begin = getTickCount(); } static void workEnd() { work_end += (getTickCount() - work_begin); } -static double getTime(){ +static double getTime() +{ return work_end /((double)cvGetTickFrequency() * 1000.); } -void detect( Mat& img, vector& faces, - cv::ocl::OclCascadeClassifierBuf& cascade, - double scale, bool calTime); -void detectCPU( Mat& img, vector& faces, - CascadeClassifier& cascade, - double scale, bool calTime); +void detect( Mat& img, vector& faces, + ocl::OclCascadeClassifierBuf& cascade, + double scale, bool calTime); + + +void detectCPU( Mat& img, vector& faces, + CascadeClassifier& cascade, + double scale, bool calTime); + void Draw(Mat& img, vector& faces, double scale); + // This function test if gpu_rst matches cpu_rst. // If the two vectors are not equal, it will return the difference in vector size // Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) -double checkRectSimilarity(Size sz, std::vector& cpu_rst, std::vector& gpu_rst); +double checkRectSimilarity(Size sz, vector& cpu_rst, vector& gpu_rst); + int main( int argc, const char** argv ) { const char* keys = "{ h | help | false | print help message }" "{ i | input | | specify input image }" - "{ t | template | ../../../data/haarcascades/haarcascade_frontalface_alt.xml | specify template file }" + "{ t | template | haarcascade_frontalface_alt.xml |" + " specify template file path }" "{ c | scale | 1.0 | scale image }" - "{ s | use_cpu | false | use cpu or gpu to process the image }"; + "{ s | use_cpu | false | use cpu or gpu to process the image }" + "{ o | output | facedetect_output.jpg |" + " specify output image save path(only works when input is images) }"; CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) @@ -69,9 +81,10 @@ int main( int argc, const char** argv ) bool useCPU = cmd.get("s"); string inputName = cmd.get("i"); + outputName = cmd.get("o"); string cascadeName = cmd.get("t"); double scale = cmd.get("c"); - cv::ocl::OclCascadeClassifierBuf cascade; + ocl::OclCascadeClassifierBuf cascade; CascadeClassifier cpu_cascade; if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) ) @@ -83,7 +96,7 @@ int main( int argc, const char** argv ) if( inputName.empty() ) { capture = cvCaptureFromCAM(0); - if(!capture) + if(!capture) cout << "Capture from CAM 0 didn't work" << endl; } else if( inputName.size() ) @@ -92,7 +105,7 @@ int main( int argc, const char** argv ) if( image.empty() ) { capture = cvCaptureFromAVI( inputName.c_str() ); - if(!capture) + if(!capture) cout << "Capture from AVI didn't work" << endl; return -1; } @@ -100,14 +113,15 @@ int main( int argc, const char** argv ) else { image = imread( "lena.jpg", 1 ); - if(image.empty()) + if(image.empty()) cout << "Couldn't read lena.jpg" << endl; return -1; } + cvNamedWindow( "result", 1 ); - std::vector oclinfo; - int devnums = cv::ocl::getDevice(oclinfo); + vector oclinfo; + int devnums = ocl::getDevice(oclinfo); if( devnums < 1 ) { std::cout << "no device found\n"; @@ -130,19 +144,23 @@ int main( int argc, const char** argv ) frame.copyTo( frameCopy ); else flip( frame, frameCopy, 0 ); - if(useCPU){ + if(useCPU) + { detectCPU(frameCopy, faces, cpu_cascade, scale, false); } - else{ - detect(frameCopy, faces, cascade, scale, false); + else + { + detect(frameCopy, faces, cascade, scale, false); } Draw(frameCopy, faces, scale); if( waitKey( 10 ) >= 0 ) goto _cleanup_; } + waitKey(0); + _cleanup_: cvReleaseCapture( &capture ); } @@ -152,18 +170,21 @@ _cleanup_: vector faces; vector ref_rst; double accuracy = 0.; - for(int i = 0; i <= LOOP_NUM;i ++) + for(int i = 0; i <= LOOP_NUM; i ++) { cout << "loop" << i << endl; - if(useCPU){ - detectCPU(image, faces, cpu_cascade, scale, i==0?false:true); + if(useCPU) + { + detectCPU(image, faces, cpu_cascade, scale, i==0?false:true); } - else{ + else + { detect(image, faces, cascade, scale, i==0?false:true); - if(i == 0){ + if(i == 0) + { detectCPU(image, ref_rst, cpu_cascade, scale, false); accuracy = checkRectSimilarity(image.size(), ref_rst, faces); - } + } } if (i == LOOP_NUM) { @@ -180,31 +201,31 @@ _cleanup_: } cvDestroyWindow("result"); - return 0; } -void detect( Mat& img, vector& faces, - cv::ocl::OclCascadeClassifierBuf& cascade, - double scale, bool calTime) +void detect( Mat& img, vector& faces, + ocl::OclCascadeClassifierBuf& cascade, + double scale, bool calTime) { - cv::ocl::oclMat image(img); - cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); + ocl::oclMat image(img); + ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); if(calTime) workBegin(); - cv::ocl::cvtColor( image, gray, CV_BGR2GRAY ); - cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - cv::ocl::equalizeHist( smallImg, smallImg ); + ocl::cvtColor( image, gray, CV_BGR2GRAY ); + ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); + ocl::equalizeHist( smallImg, smallImg ); cascade.detectMultiScale( smallImg, faces, 1.1, - 3, 0 - |CV_HAAR_SCALE_IMAGE - , Size(30,30), Size(0, 0) ); + 3, 0 + |CV_HAAR_SCALE_IMAGE + , Size(30,30), Size(0, 0) ); if(calTime) workEnd(); } -void detectCPU( Mat& img, vector& faces, - CascadeClassifier& cascade, - double scale, bool calTime) + +void detectCPU( Mat& img, vector& faces, + CascadeClassifier& cascade, + double scale, bool calTime) { if(calTime) workBegin(); Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); @@ -212,11 +233,12 @@ void detectCPU( Mat& img, vector& faces, resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR); equalizeHist(cpu_smallImg, cpu_smallImg); cascade.detectMultiScale(cpu_smallImg, faces, 1.1, - 3, 0 | CV_HAAR_SCALE_IMAGE, - Size(30, 30), Size(0, 0)); - if(calTime) workEnd(); + 3, 0 | CV_HAAR_SCALE_IMAGE, + Size(30, 30), Size(0, 0)); + if(calTime) workEnd(); } + void Draw(Mat& img, vector& faces, double scale) { int i = 0; @@ -230,31 +252,38 @@ void Draw(Mat& img, vector& faces, double scale) radius = cvRound((r->width + r->height)*0.25*scale); circle( img, center, radius, color, 3, 8, 0 ); } - cv::imshow( "result", img ); + imshow( "result", img ); + imwrite( outputName, img ); } -double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& ob2) + +double checkRectSimilarity(Size sz, vector& ob1, vector& ob2) { double final_test_result = 0.0; size_t sz1 = ob1.size(); size_t sz2 = ob2.size(); if(sz1 != sz2) + { return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + } else { - cv::Mat cpu_result(sz, CV_8UC1); + if(sz1==0 && sz2==0) + return 0; + Mat cpu_result(sz, CV_8UC1); cpu_result.setTo(0); for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) - { - cv::Mat cpu_result_roi(cpu_result, *r); + { + Mat cpu_result_roi(cpu_result, *r); cpu_result_roi.setTo(1); cpu_result.copyTo(cpu_result); } - int cpu_area = cv::countNonZero(cpu_result > 0); + int cpu_area = countNonZero(cpu_result > 0); + - cv::Mat gpu_result(sz, CV_8UC1); + Mat gpu_result(sz, CV_8UC1); gpu_result.setTo(0); for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) { @@ -263,11 +292,13 @@ double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& o gpu_result.copyTo(gpu_result); } - cv::Mat result_; + Mat result_; multiply(cpu_result, gpu_result, result_); - int result = cv::countNonZero(result_ > 0); - - final_test_result = 1.0 - (double)result/(double)cpu_area; + int result = countNonZero(result_ > 0); + if(cpu_area!=0 && result!=0) + final_test_result = 1.0 - (double)result/(double)cpu_area; + else if(cpu_area==0 && result!=0) + final_test_result = -1; } return final_test_result; } diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp index 28be6fa9af..ff53e010cf 100644 --- a/samples/ocl/hog.cpp +++ b/samples/ocl/hog.cpp @@ -10,75 +10,39 @@ using namespace std; using namespace cv; -bool help_showed = false; - -class Args -{ -public: - Args(); - static Args read(int argc, char** argv); - - string src; - bool src_is_video; - bool src_is_camera; - int camera_id; - - bool write_video; - string dst_video; - double dst_video_fps; - - bool make_gray; - - bool resize_src; - int width, height; - - double scale; - int nlevels; - int gr_threshold; - - double hit_threshold; - bool hit_threshold_auto; - - int win_width; - int win_stride_width, win_stride_height; - - bool gamma_corr; -}; - class App { public: - App(const Args& s); + App(CommandLineParser& cmd); void run(); - void handleKey(char key); - void hogWorkBegin(); void hogWorkEnd(); string hogWorkFps() const; - void workBegin(); void workEnd(); string workFps() const; - string message() const; + // This function test if gpu_rst matches cpu_rst. // If the two vectors are not equal, it will return the difference in vector size -// Else if will return +// Else if will return // (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) - double checkRectSimilarity(Size sz, - std::vector& cpu_rst, + double checkRectSimilarity(Size sz, + std::vector& cpu_rst, std::vector& gpu_rst); private: App operator=(App&); - Args args; + //Args args; bool running; - bool use_gpu; bool make_gray; double scale; + double resize_scale; + int win_width; + int win_stride_width, win_stride_height; int gr_threshold; int nlevels; double hit_threshold; @@ -86,119 +50,49 @@ private: int64 hog_work_begin; double hog_work_fps; - int64 work_begin; double work_fps; -}; -static void printHelp() -{ - cout << "Histogram of Oriented Gradients descriptor and detector sample.\n" - << "\nUsage: hog_gpu\n" - << " (|--video |--camera ) # frames source\n" - << " [--make_gray ] # convert image to gray one or not\n" - << " [--resize_src ] # do resize of the source image or not\n" - << " [--width ] # resized image width\n" - << " [--height ] # resized image height\n" - << " [--hit_threshold ] # classifying plane distance threshold (0.0 usually)\n" - << " [--scale ] # HOG window scale factor\n" - << " [--nlevels ] # max number of HOG window scales\n" - << " [--win_width ] # width of the window (48 or 64)\n" - << " [--win_stride_width ] # distance by OX axis between neighbour wins\n" - << " [--win_stride_height ] # distance by OY axis between neighbour wins\n" - << " [--gr_threshold ] # merging similar rects constant\n" - << " [--gamma_correct ] # do gamma correction or not\n" - << " [--write_video ] # write video or not\n" - << " [--dst_video ] # output video path\n" - << " [--dst_video_fps ] # output video fps\n"; - help_showed = true; -} + string img_source; + string vdo_source; + string output; + int camera_id; +}; int main(int argc, char** argv) { + const char* keys = + "{ h | help | false | print help message }" + "{ i | input | | specify input image}" + "{ c | camera | -1 | enable camera capturing }" + "{ v | video | | use video as input }" + "{ g | gray | false | convert image to gray one or not}" + "{ s | scale | 1.0 | resize the image before detect}" + "{ l |larger_win| false | use 64x128 window}" + "{ o | output | | specify output path when input is images}"; + CommandLineParser cmd(argc, argv, keys); + App app(cmd); try { - if (argc < 2) - printHelp(); - Args args = Args::read(argc, argv); - if (help_showed) - return -1; - App app(args); app.run(); } - catch (const Exception& e) { return cout << "error: " << e.what() << endl, 1; } - catch (const exception& e) { return cout << "error: " << e.what() << endl, 1; } - catch(...) { return cout << "unknown exception" << endl, 1; } - return 0; -} - - -Args::Args() -{ - src_is_video = false; - src_is_camera = false; - camera_id = 0; - - write_video = false; - dst_video_fps = 24.; - - make_gray = false; - - resize_src = false; - width = 640; - height = 480; - - scale = 1.05; - nlevels = 13; - gr_threshold = 8; - hit_threshold = 1.4; - hit_threshold_auto = true; - - win_width = 48; - win_stride_width = 8; - win_stride_height = 8; - - gamma_corr = true; -} - - -Args Args::read(int argc, char** argv) -{ - Args args; - for (int i = 1; i < argc; i++) + catch (const Exception& e) { - if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]); - else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]); - else if (string(argv[i]) == "--hit_threshold") - { - args.hit_threshold = atof(argv[++i]); - args.hit_threshold_auto = false; - } - else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]); - else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]); - else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]); - else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]); - else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]); - else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]); - else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i]; - else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]); - else if (string(argv[i]) == "--help") printHelp(); - else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; } - else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; } - else if (args.src.empty()) args.src = argv[i]; - else throw runtime_error((string("unknown key: ") + argv[i])); + return cout << "error: " << e.what() << endl, 1; + } + catch (const exception& e) + { + return cout << "error: " << e.what() << endl, 1; } - return args; + catch(...) + { + return cout << "unknown exception" << endl, 1; + } + return 0; } - -App::App(const Args& s) +App::App(CommandLineParser& cmd) { - args = s; cout << "\nControls:\n" << "\tESC - exit\n" << "\tm - change mode GPU <-> CPU\n" @@ -209,56 +103,56 @@ App::App(const Args& s) << "\t4/r - increase/decrease hit threshold\n" << endl; - use_gpu = true; - make_gray = args.make_gray; - scale = args.scale; - gr_threshold = args.gr_threshold; - nlevels = args.nlevels; - - if (args.hit_threshold_auto) - args.hit_threshold = args.win_width == 48 ? 1.4 : 0.; - hit_threshold = args.hit_threshold; - gamma_corr = args.gamma_corr; + use_gpu = true; + make_gray = cmd.get("g"); + resize_scale = cmd.get("s"); + win_width = cmd.get("l") == true ? 64 : 48; + vdo_source = cmd.get("v"); + img_source = cmd.get("i"); + output = cmd.get("o"); + camera_id = cmd.get("c"); - if (args.win_width != 64 && args.win_width != 48) - args.win_width = 64; + win_stride_width = 8; + win_stride_height = 8; + gr_threshold = 8; + nlevels = 13; + hit_threshold = win_width == 48 ? 1.4 : 0.; + scale = 1.05; + gamma_corr = true; - cout << "Scale: " << scale << endl; - if (args.resize_src) - cout << "Resized source: (" << args.width << ", " << args.height << ")\n"; cout << "Group threshold: " << gr_threshold << endl; cout << "Levels number: " << nlevels << endl; - cout << "Win width: " << args.win_width << endl; - cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n"; + cout << "Win width: " << win_width << endl; + cout << "Win stride: (" << win_stride_width << ", " << win_stride_height << ")\n"; cout << "Hit threshold: " << hit_threshold << endl; cout << "Gamma correction: " << gamma_corr << endl; cout << endl; } - void App::run() { - std::vector oclinfo; + vector oclinfo; ocl::getDevice(oclinfo); running = true; - cv::VideoWriter video_writer; + VideoWriter video_writer; - Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96) - Size win_stride(args.win_stride_width, args.win_stride_height); + Size win_size(win_width, win_width * 2); + Size win_stride(win_stride_width, win_stride_height); // Create HOG descriptors and detectors here vector detector; if (win_size == Size(64, 128)) - detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128(); + detector = ocl::HOGDescriptor::getPeopleDetector64x128(); else - detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96(); + detector = ocl::HOGDescriptor::getPeopleDetector48x96(); + - cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, - cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, - cv::ocl::HOGDescriptor::DEFAULT_NLEVELS); - cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, - HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); + ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, + ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, + ocl::HOGDescriptor::DEFAULT_NLEVELS); + HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, + HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); gpu_hog.setSVMDetector(detector); cpu_hog.setSVMDetector(detector); @@ -267,29 +161,29 @@ void App::run() VideoCapture vc; Mat frame; - if (args.src_is_video) + if (vdo_source!="") { - vc.open(args.src.c_str()); + vc.open(vdo_source.c_str()); if (!vc.isOpened()) - throw runtime_error(string("can't open video file: " + args.src)); + throw runtime_error(string("can't open video file: " + vdo_source)); vc >> frame; } - else if (args.src_is_camera) + else if (camera_id != -1) { - vc.open(args.camera_id); + vc.open(camera_id); if (!vc.isOpened()) { stringstream msg; - msg << "can't open camera: " << args.camera_id; + msg << "can't open camera: " << camera_id; throw runtime_error(msg.str()); } vc >> frame; } else { - frame = imread(args.src); + frame = imread(img_source); if (frame.empty()) - throw runtime_error(string("can't open image file: " + args.src)); + throw runtime_error(string("can't open image file: " + img_source)); } Mat img_aux, img, img_to_show; @@ -307,13 +201,15 @@ void App::run() else frame.copyTo(img_aux); // Resize image - if (args.resize_src) resize(img_aux, img, Size(args.width, args.height)); + if (abs(scale-1.0)>0.001) + { + Size sz((int)((double)img_aux.cols/resize_scale), (int)((double)img_aux.rows/resize_scale)); + resize(img_aux, img, sz); + } else img = img_aux; img_to_show = img; - gpu_hog.nlevels = nlevels; cpu_hog.nlevels = nlevels; - vector found; // Perform HOG classification @@ -330,15 +226,16 @@ void App::run() vector ref_rst; cvtColor(img, img, CV_BGRA2BGR); cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride, - Size(0, 0), scale, gr_threshold-2); + Size(0, 0), scale, gr_threshold-2); double accuracy = checkRectSimilarity(img.size(), ref_rst, found); - cout << "\naccuracy value: " << accuracy << endl; - } - } + cout << "\naccuracy value: " << accuracy << endl; + } + } else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, - Size(0, 0), scale, gr_threshold); + Size(0, 0), scale, gr_threshold); hogWorkEnd(); + // Draw positive classified windows for (size_t i = 0; i < found.size(); i++) { @@ -353,25 +250,31 @@ void App::run() putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); imshow("opencv_gpu_hog", img_to_show); - - if (args.src_is_video || args.src_is_camera) vc >> frame; + if (vdo_source!="" || camera_id!=-1) vc >> frame; workEnd(); - if (args.write_video) + if (output!="") { - if (!video_writer.isOpened()) + if (img_source!="") // wirte image { - video_writer.open(args.dst_video, CV_FOURCC('x','v','i','d'), args.dst_video_fps, - img_to_show.size(), true); - if (!video_writer.isOpened()) - throw std::runtime_error("can't create video writer"); + imwrite(output, img_to_show); } + else //write video + { + if (!video_writer.isOpened()) + { + video_writer.open(output, CV_FOURCC('x','v','i','d'), 24, + img_to_show.size(), true); + if (!video_writer.isOpened()) + throw std::runtime_error("can't create video writer"); + } - if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR); - else cvtColor(img_to_show, img, CV_BGRA2BGR); + if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR); + else cvtColor(img_to_show, img, CV_BGRA2BGR); - video_writer << img; + video_writer << img; + } } handleKey((char)waitKey(3)); @@ -379,7 +282,6 @@ void App::run() } } - void App::handleKey(char key) { switch (key) @@ -442,7 +344,10 @@ void App::handleKey(char key) } -inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); } +inline void App::hogWorkBegin() +{ + hog_work_begin = getTickCount(); +} inline void App::hogWorkEnd() { @@ -458,8 +363,10 @@ inline string App::hogWorkFps() const return ss.str(); } - -inline void App::workBegin() { work_begin = getTickCount(); } +inline void App::workBegin() +{ + work_begin = getTickCount(); +} inline void App::workEnd() { @@ -475,8 +382,9 @@ inline string App::workFps() const return ss.str(); } -double App::checkRectSimilarity(Size sz, - std::vector& ob1, + +double App::checkRectSimilarity(Size sz, + std::vector& ob1, std::vector& ob2) { double final_test_result = 0.0; @@ -484,20 +392,26 @@ double App::checkRectSimilarity(Size sz, size_t sz2 = ob2.size(); if(sz1 != sz2) + { return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + } else { + if(sz1==0 && sz2==0) + return 0; cv::Mat cpu_result(sz, CV_8UC1); cpu_result.setTo(0); + for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) - { + { cv::Mat cpu_result_roi(cpu_result, *r); cpu_result_roi.setTo(1); cpu_result.copyTo(cpu_result); } int cpu_area = cv::countNonZero(cpu_result > 0); + cv::Mat gpu_result(sz, CV_8UC1); gpu_result.setTo(0); for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) @@ -510,10 +424,11 @@ double App::checkRectSimilarity(Size sz, cv::Mat result_; multiply(cpu_result, gpu_result, result_); int result = cv::countNonZero(result_ > 0); - - final_test_result = 1.0 - (double)result/(double)cpu_area; + if(cpu_area!=0 && result!=0) + final_test_result = 1.0 - (double)result/(double)cpu_area; + else if(cpu_area==0 && result!=0) + final_test_result = -1; } return final_test_result; - } diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp index cc8d886f79..cefa928670 100644 --- a/samples/ocl/pyrlk_optical_flow.cpp +++ b/samples/ocl/pyrlk_optical_flow.cpp @@ -11,19 +11,20 @@ using namespace cv; using namespace cv::ocl; typedef unsigned char uchar; -#define LOOP_NUM 10 +#define LOOP_NUM 10 int64 work_begin = 0; int64 work_end = 0; -static void workBegin() -{ +static void workBegin() +{ work_begin = getTickCount(); } static void workEnd() { work_end += (getTickCount() - work_begin); } -static double getTime(){ +static double getTime() +{ return work_end * 1000. / getTickFrequency(); } @@ -93,14 +94,15 @@ int main(int argc, const char* argv[]) //set this to save kernel compile time from second time you run ocl::setBinpath("./"); const char* keys = - "{ h | help | false | print help message }" - "{ l | left | | specify left image }" - "{ r | right | | specify right image }" - "{ c | camera | 0 | enable camera capturing }" - "{ s | use_cpu | false | use cpu or gpu to process the image }" - "{ v | video | | use video as input }" - "{ points | points | 1000 | specify points count [GoodFeatureToTrack] }" - "{ min_dist | min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }"; + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ c | camera | 0 | specify camera id }" + "{ s | use_cpu | false | use cpu or gpu to process the image }" + "{ v | video | | use video as input }" + "{ o | output | pyrlk_output.jpg| specify output save path when input is images }" + "{ p | points | 1000 | specify points count [GoodFeatureToTrack] }" + "{ m | min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }"; CommandLineParser cmd(argc, argv, keys); @@ -113,13 +115,13 @@ int main(int argc, const char* argv[]) } bool defaultPicturesFail = false; - string fname0 = cmd.get("left"); - string fname1 = cmd.get("right"); - string vdofile = cmd.get("video"); - int points = cmd.get("points"); - double minDist = cmd.get("min_dist"); + string fname0 = cmd.get("l"); + string fname1 = cmd.get("r"); + string vdofile = cmd.get("v"); + string outfile = cmd.get("o"); + int points = cmd.get("p"); + double minDist = cmd.get("m"); bool useCPU = cmd.get("s"); - bool useCamera = cmd.get("c"); int inputName = cmd.get("c"); oclMat d_nextPts, d_status; @@ -132,22 +134,9 @@ int main(int argc, const char* argv[]) vector status(points); vector err; - if (frame0.empty() || frame1.empty()) - { - useCamera = true; - defaultPicturesFail = true; - CvCapture* capture = 0; - capture = cvCaptureFromCAM( inputName ); - if (!capture) - { - cout << "Can't load input images" << endl; - return -1; - } - } - cout << "Points count : " << points << endl << endl; - if (useCamera) + if (frame0.empty() || frame1.empty()) { CvCapture* capture = 0; Mat frame, frameCopy; @@ -241,10 +230,10 @@ _cleanup_: else { nocamera: - for(int i = 0; i <= LOOP_NUM;i ++) + for(int i = 0; i <= LOOP_NUM; i ++) { cout << "loop" << i << endl; - if (i > 0) workBegin(); + if (i > 0) workBegin(); if (useCPU) { @@ -274,8 +263,8 @@ nocamera: cout << getTime() / LOOP_NUM << " ms" << endl; drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0)); - imshow("PyrLK [Sparse]", frame0); + imwrite(outfile, frame0); } } } diff --git a/samples/ocl/squares.cpp b/samples/ocl/squares.cpp index 6b184161f7..48964ffb2e 100644 --- a/samples/ocl/squares.cpp +++ b/samples/ocl/squares.cpp @@ -6,7 +6,6 @@ #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/ocl/ocl.hpp" - #include #include #include @@ -14,23 +13,50 @@ using namespace cv; using namespace std; -static void help() +#define ACCURACY_CHECK 1 + +#if ACCURACY_CHECK +// check if two vectors of vector of points are near or not +// prior assumption is that they are in correct order +static bool checkPoints( + vector< vector > set1, + vector< vector > set2, + int maxDiff = 5) { - cout << - "\nA program using OCL module pyramid scaling, Canny, dilate functions, threshold, split; cpu contours, contour simpification and\n" - "memory storage (it's got it all folks) to find\n" - "squares in a list of images pic1-6.png\n" - "Returns sequence of squares detected on the image.\n" - "the sequence is stored in the specified memory storage\n" - "Call:\n" - "./squares\n" - "Using OpenCV version %s\n" << CV_VERSION << "\n" << endl; -} + if(set1.size() != set2.size()) + { + return false; + } + + for(vector< vector >::iterator it1 = set1.begin(), it2 = set2.begin(); + it1 < set1.end() && it2 < set2.end(); it1 ++, it2 ++) + { + vector pts1 = *it1; + vector pts2 = *it2; + if(pts1.size() != pts2.size()) + { + return false; + } + for(size_t i = 0; i < pts1.size(); i ++) + { + Point pt1 = pts1[i], pt2 = pts2[i]; + if(std::abs(pt1.x - pt2.x) > maxDiff || + std::abs(pt1.y - pt2.y) > maxDiff) + { + return false; + } + } + } + return true; +} +#endif + int thresh = 50, N = 11; const char* wndname = "OpenCL Square Detection Demo"; + // helper function: // finds a cosine of angle between vectors // from pt0->pt1 and from pt0->pt2 @@ -43,9 +69,92 @@ static double angle( Point pt1, Point pt2, Point pt0 ) return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10); } + // returns sequence of squares detected on the image. // the sequence is stored in the specified memory storage static void findSquares( const Mat& image, vector >& squares ) +{ + squares.clear(); + Mat pyr, timg, gray0(image.size(), CV_8U), gray; + + // down-scale and upscale the image to filter out the noise + pyrDown(image, pyr, Size(image.cols/2, image.rows/2)); + pyrUp(pyr, timg, image.size()); + vector > contours; + + // find squares in every color plane of the image + for( int c = 0; c < 3; c++ ) + { + int ch[] = {c, 0}; + mixChannels(&timg, 1, &gray0, 1, ch, 1); + + // try several threshold levels + for( int l = 0; l < N; l++ ) + { + // hack: use Canny instead of zero threshold level. + // Canny helps to catch squares with gradient shading + if( l == 0 ) + { + // apply Canny. Take the upper threshold from slider + // and set the lower to 0 (which forces edges merging) + Canny(gray0, gray, 0, thresh, 5); + // dilate canny output to remove potential + // holes between edge segments + dilate(gray, gray, Mat(), Point(-1,-1)); + } + else + { + // apply threshold if l!=0: + // tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0 + cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY); + } + + // find contours and store them all as a list + findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE); + + vector approx; + + // test each contour + for( size_t i = 0; i < contours.size(); i++ ) + { + // approximate contour with accuracy proportional + // to the contour perimeter + approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true); + + // square contours should have 4 vertices after approximation + // relatively large area (to filter out noisy contours) + // and be convex. + // Note: absolute value of an area is used because + // area may be positive or negative - in accordance with the + // contour orientation + if( approx.size() == 4 && + fabs(contourArea(Mat(approx))) > 1000 && + isContourConvex(Mat(approx)) ) + { + double maxCosine = 0; + + for( int j = 2; j < 5; j++ ) + { + // find the maximum cosine of the angle between joint edges + double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1])); + maxCosine = MAX(maxCosine, cosine); + } + + // if cosines of all angles are small + // (all angles are ~90 degree) then write quandrange + // vertices to resultant sequence + if( maxCosine < 0.3 ) + squares.push_back(approx); + } + } + } + } +} + + +// returns sequence of squares detected on the image. +// the sequence is stored in the specified memory storage +static void findSquares_ocl( const Mat& image, vector >& squares ) { squares.clear(); @@ -91,7 +200,6 @@ static void findSquares( const Mat& image, vector >& squares ) findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE); vector approx; - // test each contour for( size_t i = 0; i < contours.size(); i++ ) { @@ -106,11 +214,10 @@ static void findSquares( const Mat& image, vector >& squares ) // area may be positive or negative - in accordance with the // contour orientation if( approx.size() == 4 && - fabs(contourArea(Mat(approx))) > 1000 && - isContourConvex(Mat(approx)) ) + fabs(contourArea(Mat(approx))) > 1000 && + isContourConvex(Mat(approx)) ) { double maxCosine = 0; - for( int j = 2; j < 5; j++ ) { // find the maximum cosine of the angle between joint edges @@ -139,40 +246,93 @@ static void drawSquares( Mat& image, const vector >& squares ) int n = (int)squares[i].size(); polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, CV_AA); } +} + - imshow(wndname, image); +// draw both pure-C++ and ocl square results onto a single image +static Mat drawSquaresBoth( const Mat& image, + const vector >& sqsCPP, + const vector >& sqsOCL +) +{ + Mat imgToShow(Size(image.cols * 2, image.rows), image.type()); + Mat lImg = imgToShow(Rect(Point(0, 0), image.size())); + Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size())); + image.copyTo(lImg); + image.copyTo(rImg); + drawSquares(lImg, sqsCPP); + drawSquares(rImg, sqsOCL); + float fontScale = 0.8f; + Scalar white = Scalar::all(255), black = Scalar::all(0); + + putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2); + putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2); + putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1); + putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1); + + return imgToShow; } -int main(int /*argc*/, char** /*argv*/) +int main(int argc, char** argv) { + const char* keys = + "{ i | input | | specify input image }" + "{ o | output | squares_output.jpg | specify output save path}"; + CommandLineParser cmd(argc, argv, keys); + string inputName = cmd.get("i"); + string outfile = cmd.get("o"); + if(inputName.empty()) + { + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } - //ocl::setBinpath("F:/kernel_bin"); vector info; CV_Assert(ocl::getDevice(info)); - - static const char* names[] = { "pic1.png", "pic2.png", "pic3.png", - "pic4.png", "pic5.png", "pic6.png", 0 }; - help(); + int iterations = 10; namedWindow( wndname, 1 ); - vector > squares; + vector > squares_cpu, squares_ocl; - for( int i = 0; names[i] != 0; i++ ) + Mat image = imread(inputName, 1); + if( image.empty() ) { - Mat image = imread(names[i], 1); - if( image.empty() ) - { - cout << "Couldn't load " << names[i] << endl; - continue; - } + cout << "Couldn't load " << inputName << endl; + return -1; + } + int j = iterations; + int64 t_ocl = 0, t_cpp = 0; + //warm-ups + cout << "warming up ..." << endl; + findSquares(image, squares_cpu); + findSquares_ocl(image, squares_ocl); + + +#if ACCURACY_CHECK + cout << "Checking ocl accuracy ... " << endl; + cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl; +#endif + do + { + int64 t_start = cv::getTickCount(); + findSquares(image, squares_cpu); + t_cpp += cv::getTickCount() - t_start; - findSquares(image, squares); - drawSquares(image, squares); - int c = waitKey(); - if( (char)c == 27 ) - break; + t_start = cv::getTickCount(); + findSquares_ocl(image, squares_ocl); + t_ocl += cv::getTickCount() - t_start; + cout << "run loop: " << j << endl; } + while(--j); + cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl; + cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl; + + Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl); + imshow(wndname, result); + imwrite(outfile, result); + cvWaitKey(0); return 0; } diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp index 7ac2c9a6f3..565744baa6 100644 --- a/samples/ocl/stereo_match.cpp +++ b/samples/ocl/stereo_match.cpp @@ -10,56 +10,45 @@ using namespace cv; using namespace std; using namespace ocl; -bool help_showed = false; - -struct Params -{ - Params(); - static Params read(int argc, char** argv); - - string left; - string right; - - string method_str() const - { - switch (method) - { - case BM: return "BM"; - case BP: return "BP"; - case CSBP: return "CSBP"; - } - return ""; - } - enum {BM, BP, CSBP} method; - int ndisp; // Max disparity + 1 - enum {GPU, CPU} type; -}; - struct App { - App(const Params& p); + App(CommandLineParser& cmd); void run(); void handleKey(char key); void printParams() const; - void workBegin() { work_begin = getTickCount(); } + void workBegin() + { + work_begin = getTickCount(); + } void workEnd() { int64 d = getTickCount() - work_begin; double f = getTickFrequency(); work_fps = f / d; } - + string method_str() const + { + switch (method) + { + case BM: + return "BM"; + case BP: + return "BP"; + case CSBP: + return "CSBP"; + } + return ""; + } string text() const { stringstream ss; - ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left) - << setprecision(4) << work_fps; + ss << "(" << method_str() << ") FPS: " << setiosflags(ios::left) + << setprecision(4) << work_fps; return ss.str(); } private: - Params p; bool running; Mat left_src, right_src; @@ -72,42 +61,45 @@ private: int64 work_begin; double work_fps; -}; -static void printHelp() -{ - cout << "Usage: stereo_match_gpu\n" - << "\t--left --right # must be rectified\n" - << "\t--method # BM | BP | CSBP\n" - << "\t--ndisp # number of disparity levels\n" - << "\t--type # cpu | CPU | gpu | GPU\n"; - help_showed = true; -} + string l_img, r_img; + string out_img; + enum {BM, BP, CSBP} method; + int ndisp; // Max disparity + 1 + enum {GPU, CPU} type; +}; int main(int argc, char** argv) { + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ m | method | BM | specify match method(BM/BP/CSBP) }" + "{ n | ndisp | 64 | specify number of disparity levels }" + "{ s | cpu_ocl | false | use cpu or gpu as ocl device to process the image }" + "{ o | output | stereo_match_output.jpg | specify output path when input is images}"; + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } try { - if (argc < 2) - { - printHelp(); - return 1; - } - - Params args = Params::read(argc, argv); - if (help_showed) - return -1; + App app(cmd); + int flag = CVCL_DEVICE_TYPE_GPU; + if(cmd.get("s") == true) + flag = CVCL_DEVICE_TYPE_CPU; - int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU }; vector info; - - if(getDevice(info, flags[args.type]) == 0) + if(getDevice(info, flag) == 0) { throw runtime_error("Error: Did not find a valid OpenCL device!"); } cout << "Device name:" << info[0].DeviceName[0] << endl; - App app(args); app.run(); } catch (const exception& e) @@ -117,77 +109,39 @@ int main(int argc, char** argv) return 0; } - -Params::Params() -{ - method = BM; - ndisp = 64; - type = GPU; -} - - -Params Params::read(int argc, char** argv) -{ - Params p; - - for (int i = 1; i < argc; i++) - { - if (string(argv[i]) == "--left") p.left = argv[++i]; - else if (string(argv[i]) == "--right") p.right = argv[++i]; - else if (string(argv[i]) == "--method") - { - if (string(argv[i + 1]) == "BM") p.method = BM; - else if (string(argv[i + 1]) == "BP") p.method = BP; - else if (string(argv[i + 1]) == "CSBP") p.method = CSBP; - else throw runtime_error("unknown stereo match method: " + string(argv[i + 1])); - i++; - } - else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]); - else if (string(argv[i]) == "--type") - { - string t(argv[++i]); - if (t == "cpu" || t == "CPU") - { - p.type = CPU; - } - else if (t == "gpu" || t == "GPU") - { - p.type = GPU; - } - else throw runtime_error("unknown device type: " + t); - } - else if (string(argv[i]) == "--help") printHelp(); - else throw runtime_error("unknown key: " + string(argv[i])); - } - - return p; -} - - -App::App(const Params& params) - : p(params), running(false) +App::App(CommandLineParser& cmd) + : running(false),method(BM) { cout << "stereo_match_ocl sample\n"; cout << "\nControls:\n" - << "\tesc - exit\n" - << "\tp - print current parameters\n" - << "\tg - convert source images into gray\n" - << "\tm - change stereo match method\n" - << "\ts - change Sobel prefiltering flag (for BM only)\n" - << "\t1/q - increase/decrease maximum disparity\n" - << "\t2/w - increase/decrease window size (for BM only)\n" - << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n" - << "\t4/r - increase/decrease level count (for BP and CSBP only)\n"; + << "\tesc - exit\n" + << "\tp - print current parameters\n" + << "\tg - convert source images into gray\n" + << "\tm - change stereo match method\n" + << "\ts - change Sobel prefiltering flag (for BM only)\n" + << "\t1/q - increase/decrease maximum disparity\n" + << "\t2/w - increase/decrease window size (for BM only)\n" + << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n" + << "\t4/r - increase/decrease level count (for BP and CSBP only)\n"; + l_img = cmd.get("l"); + r_img = cmd.get("r"); + string mstr = cmd.get("m"); + if(mstr == "BM") method = BM; + else if(mstr == "BP") method = BP; + else if(mstr == "CSBP") method = CSBP; + else cout << "unknown method!\n"; + ndisp = cmd.get("n"); + out_img = cmd.get("o"); } void App::run() { // Load images - left_src = imread(p.left); - right_src = imread(p.right); - if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\""); - if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\""); + left_src = imread(l_img); + right_src = imread(r_img); + if (left_src.empty()) throw runtime_error("can't open file \"" + l_img + "\""); + if (right_src.empty()) throw runtime_error("can't open file \"" + r_img + "\""); cvtColor(left_src, left, CV_BGR2GRAY); cvtColor(right_src, right, CV_BGR2GRAY); @@ -199,14 +153,15 @@ void App::run() imshow("right", right); // Set common parameters - bm.ndisp = p.ndisp; - bp.ndisp = p.ndisp; - csbp.ndisp = p.ndisp; + bm.ndisp = ndisp; + bp.ndisp = ndisp; + csbp.ndisp = ndisp; cout << endl; printParams(); running = true; + bool written = false; while (running) { @@ -214,9 +169,9 @@ void App::run() Mat disp; oclMat d_disp; workBegin(); - switch (p.method) + switch (method) { - case Params::BM: + case BM: if (d_left.channels() > 1 || d_right.channels() > 1) { cout << "BM doesn't support color images\n"; @@ -230,25 +185,28 @@ void App::run() } bm(d_left, d_right, d_disp); break; - case Params::BP: + case BP: bp(d_left, d_right, d_disp); break; - case Params::CSBP: + case CSBP: csbp(d_left, d_right, d_disp); break; } - ocl::finish(); workEnd(); // Show results d_disp.download(disp); - if (p.method != Params::BM) + if (method != BM) { disp.convertTo(disp, 0); } putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255)); imshow("disparity", disp); - + if(!written) + { + imwrite(out_img, disp); + written = true; + } handleKey((char)waitKey(3)); } } @@ -259,19 +217,19 @@ void App::printParams() const cout << "--- Parameters ---\n"; cout << "image_size: (" << left.cols << ", " << left.rows << ")\n"; cout << "image_channels: " << left.channels() << endl; - cout << "method: " << p.method_str() << endl - << "ndisp: " << p.ndisp << endl; - switch (p.method) + cout << "method: " << method_str() << endl + << "ndisp: " << ndisp << endl; + switch (method) { - case Params::BM: + case BM: cout << "win_size: " << bm.winSize << endl; cout << "prefilter_sobel: " << bm.preset << endl; break; - case Params::BP: + case BP: cout << "iter_count: " << bp.iters << endl; cout << "level_count: " << bp.levels << endl; break; - case Params::CSBP: + case CSBP: cout << "iter_count: " << csbp.iters << endl; cout << "level_count: " << csbp.levels << endl; break; @@ -287,11 +245,13 @@ void App::handleKey(char key) case 27: running = false; break; - case 'p': case 'P': + case 'p': + case 'P': printParams(); break; - case 'g': case 'G': - if (left.channels() == 1 && p.method != Params::BM) + case 'g': + case 'G': + if (left.channels() == 1 && method != BM) { left = left_src; right = right_src; @@ -307,23 +267,25 @@ void App::handleKey(char key) imshow("left", left); imshow("right", right); break; - case 'm': case 'M': - switch (p.method) + case 'm': + case 'M': + switch (method) { - case Params::BM: - p.method = Params::BP; + case BM: + method = BP; break; - case Params::BP: - p.method = Params::CSBP; + case BP: + method = CSBP; break; - case Params::CSBP: - p.method = Params::BM; + case CSBP: + method = BM; break; } - cout << "method: " << p.method_str() << endl; + cout << "method: " << method_str() << endl; break; - case 's': case 'S': - if (p.method == Params::BM) + case 's': + case 'S': + if (method == BM) { switch (bm.preset) { @@ -338,76 +300,80 @@ void App::handleKey(char key) } break; case '1': - p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8; - cout << "ndisp: " << p.ndisp << endl; - bm.ndisp = p.ndisp; - bp.ndisp = p.ndisp; - csbp.ndisp = p.ndisp; + ndisp == 1 ? ndisp = 8 : ndisp += 8; + cout << "ndisp: " << ndisp << endl; + bm.ndisp = ndisp; + bp.ndisp = ndisp; + csbp.ndisp = ndisp; break; - case 'q': case 'Q': - p.ndisp = max(p.ndisp - 8, 1); - cout << "ndisp: " << p.ndisp << endl; - bm.ndisp = p.ndisp; - bp.ndisp = p.ndisp; - csbp.ndisp = p.ndisp; + case 'q': + case 'Q': + ndisp = max(ndisp - 8, 1); + cout << "ndisp: " << ndisp << endl; + bm.ndisp = ndisp; + bp.ndisp = ndisp; + csbp.ndisp = ndisp; break; case '2': - if (p.method == Params::BM) + if (method == BM) { bm.winSize = min(bm.winSize + 1, 51); cout << "win_size: " << bm.winSize << endl; } break; - case 'w': case 'W': - if (p.method == Params::BM) + case 'w': + case 'W': + if (method == BM) { bm.winSize = max(bm.winSize - 1, 2); cout << "win_size: " << bm.winSize << endl; } break; case '3': - if (p.method == Params::BP) + if (method == BP) { bp.iters += 1; cout << "iter_count: " << bp.iters << endl; } - else if (p.method == Params::CSBP) + else if (method == CSBP) { csbp.iters += 1; cout << "iter_count: " << csbp.iters << endl; } break; - case 'e': case 'E': - if (p.method == Params::BP) + case 'e': + case 'E': + if (method == BP) { bp.iters = max(bp.iters - 1, 1); cout << "iter_count: " << bp.iters << endl; } - else if (p.method == Params::CSBP) + else if (method == CSBP) { csbp.iters = max(csbp.iters - 1, 1); cout << "iter_count: " << csbp.iters << endl; } break; case '4': - if (p.method == Params::BP) + if (method == BP) { bp.levels += 1; cout << "level_count: " << bp.levels << endl; } - else if (p.method == Params::CSBP) + else if (method == CSBP) { csbp.levels += 1; cout << "level_count: " << csbp.levels << endl; } break; - case 'r': case 'R': - if (p.method == Params::BP) + case 'r': + case 'R': + if (method == BP) { bp.levels = max(bp.levels - 1, 1); cout << "level_count: " << bp.levels << endl; } - else if (p.method == Params::CSBP) + else if (method == CSBP) { csbp.levels = max(csbp.levels - 1, 1); cout << "level_count: " << csbp.levels << endl; diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp index 038a8dc5cd..bee517fbca 100644 --- a/samples/ocl/surf_matcher.cpp +++ b/samples/ocl/surf_matcher.cpp @@ -1,48 +1,3 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Peng Xiao, pengxiao@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - #include #include #include "opencv2/core/core.hpp" @@ -61,27 +16,20 @@ const float GOOD_PORTION = 0.15f; namespace { -void help(); - -void help() -{ - std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl; - std::cout << "\nUsage:\n\tsurf_matcher --left --right [-c]" << std::endl; - std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl; -} int64 work_begin = 0; int64 work_end = 0; -void workBegin() -{ +void workBegin() +{ work_begin = getTickCount(); } void workEnd() { work_end = getTickCount() - work_begin; } -double getTime(){ +double getTime() +{ return work_end /((double)cvGetTickFrequency() * 1000.); } @@ -114,17 +62,17 @@ struct SURFMatcher Mat drawGoodMatches( const Mat& cpu_img1, const Mat& cpu_img2, - const vector& keypoints1, - const vector& keypoints2, + const vector& keypoints1, + const vector& keypoints2, vector& matches, vector& scene_corners_ - ) +) { - //-- Sort matches and preserve top 10% matches + //-- Sort matches and preserve top 10% matches std::sort(matches.begin(), matches.end()); std::vector< DMatch > good_matches; double minDist = matches.front().distance, - maxDist = matches.back().distance; + maxDist = matches.back().distance; const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION)); for( int i = 0; i < ptsPairs; i++ ) @@ -139,8 +87,8 @@ Mat drawGoodMatches( // drawing the results Mat img_matches; drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2, - good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), - vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); + good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), + vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); //-- Localize the object std::vector obj; @@ -154,28 +102,30 @@ Mat drawGoodMatches( } //-- Get the corners from the image_1 ( the object to be "detected" ) std::vector obj_corners(4); - obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( cpu_img1.cols, 0 ); - obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = cvPoint( 0, cpu_img1.rows ); + obj_corners[0] = cvPoint(0,0); + obj_corners[1] = cvPoint( cpu_img1.cols, 0 ); + obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); + obj_corners[3] = cvPoint( 0, cpu_img1.rows ); std::vector scene_corners(4); - + Mat H = findHomography( obj, scene, CV_RANSAC ); perspectiveTransform( obj_corners, scene_corners, H); scene_corners_ = scene_corners; - + //-- Draw lines between the corners (the mapped object in the scene - image_2 ) - line( img_matches, - scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), - Scalar( 0, 255, 0), 2, CV_AA ); - line( img_matches, - scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), - Scalar( 0, 255, 0), 2, CV_AA ); - line( img_matches, - scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), - Scalar( 0, 255, 0), 2, CV_AA ); - line( img_matches, - scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), - Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); return img_matches; } @@ -185,6 +135,21 @@ Mat drawGoodMatches( // use cpu findHomography interface to calculate the transformation matrix int main(int argc, char* argv[]) { + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ o | output | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }" + "{ c | use_cpu | false | use CPU algorithms }" + "{ a | use_all | false | use both CPU and GPU algorithms}"; + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + std::cout << "Avaible options:" << std::endl; + cmd.printParams(); + return 0; + } + vector info; if(cv::ocl::getDevice(info) == 0) { @@ -195,54 +160,38 @@ int main(int argc, char* argv[]) Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey; oclMat img1, img2; - bool useCPU = false; + bool useCPU = cmd.get("c"); bool useGPU = false; - bool useALL = false; + bool useALL = cmd.get("a"); + + string outpath = cmd.get("o"); - for (int i = 1; i < argc; ++i) + cpu_img1 = imread(cmd.get("l")); + CV_Assert(!cpu_img1.empty()); + cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); + img1 = cpu_img1_grey; + + cpu_img2 = imread(cmd.get("r")); + CV_Assert(!cpu_img2.empty()); + cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); + img2 = cpu_img2_grey; + + if(useALL) { - if (string(argv[i]) == "--left") - { - cpu_img1 = imread(argv[++i]); - CV_Assert(!cpu_img1.empty()); - cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); - img1 = cpu_img1_grey; - } - else if (string(argv[i]) == "--right") - { - cpu_img2 = imread(argv[++i]); - CV_Assert(!cpu_img2.empty()); - cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); - img2 = cpu_img2_grey; - } - else if (string(argv[i]) == "-c") - { - useCPU = true; - useGPU = false; - useALL = false; - }else if(string(argv[i]) == "-g") - { - useGPU = true; - useCPU = false; - useALL = false; - }else if(string(argv[i]) == "-a") - { - useALL = true; - useCPU = false; - useGPU = false; - } - else if (string(argv[i]) == "--help") - { - help(); - return -1; - } + useCPU = false; + useGPU = false; } + else if(useCPU==false && useALL==false) + { + useGPU = true; + } + if(!useCPU) { std::cout - << "Device name:" - << info[0].DeviceName[0] - << std::endl; + << "Device name:" + << info[0].DeviceName[0] + << std::endl; } double surf_time = 0.; @@ -262,12 +211,12 @@ int main(int argc, char* argv[]) //instantiate detectors/matchers SURFDetector cpp_surf; SURFDetector ocl_surf; - + SURFMatcher cpp_matcher; SURFMatcher ocl_matcher; //-- start of timing section - if (useCPU) + if (useCPU) { for (int i = 0; i <= LOOP_NUM; i++) { @@ -298,7 +247,8 @@ int main(int argc, char* argv[]) surf_time = getTime(); std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n"; - }else + } + else { //cpu runs for (int i = 0; i <= LOOP_NUM; i++) @@ -353,14 +303,14 @@ int main(int argc, char* argv[]) for(size_t i = 0; i < cpu_corner.size(); i++) { if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10) - ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10)) + ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10)) { std::cout<<"Failed\n"; result = false; break; } result = true; - } + } if(result) std::cout<<"Passed\n"; } @@ -371,12 +321,15 @@ int main(int argc, char* argv[]) { namedWindow("cpu surf matches", 0); imshow("cpu surf matches", img_matches); + imwrite(outpath, img_matches); } else if(useGPU) { namedWindow("ocl surf matches", 0); imshow("ocl surf matches", img_matches); - }else + imwrite(outpath, img_matches); + } + else { namedWindow("cpu surf matches", 0); imshow("cpu surf matches", img_matches); diff --git a/samples/ocl/tvl1_optical_flow.cpp b/samples/ocl/tvl1_optical_flow.cpp new file mode 100644 index 0000000000..cff9692ed6 --- /dev/null +++ b/samples/ocl/tvl1_optical_flow.cpp @@ -0,0 +1,265 @@ +#include +#include +#include + +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/ocl/ocl.hpp" +#include "opencv2/video/video.hpp" + +using namespace std; +using namespace cv; +using namespace cv::ocl; + +typedef unsigned char uchar; +#define LOOP_NUM 10 +int64 work_begin = 0; +int64 work_end = 0; + +static void workBegin() +{ + work_begin = getTickCount(); +} +static void workEnd() +{ + work_end += (getTickCount() - work_begin); +} +static double getTime() +{ + return work_end * 1000. / getTickFrequency(); +} + +template inline T clamp (T x, T a, T b) +{ + return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a)); +} + +template inline T mapValue(T x, T a, T b, T c, T d) +{ + x = clamp(x, a, b); + return c + (d - c) * (x - a) / (b - a); +} + +static void getFlowField(const Mat& u, const Mat& v, Mat& flowField) +{ + float maxDisplacement = 1.0f; + + for (int i = 0; i < u.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + for (int j = 0; j < u.cols; ++j) + { + float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j])); + + if (d > maxDisplacement) + maxDisplacement = d; + } + } + + flowField.create(u.size(), CV_8UC4); + + for (int i = 0; i < flowField.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + + Vec4b* row = flowField.ptr(i); + + for (int j = 0; j < flowField.cols; ++j) + { + row[j][0] = 0; + row[j][1] = static_cast (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][2] = static_cast (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][3] = 255; + } + } +} + + +int main(int argc, const char* argv[]) +{ + static std::vector ocl_info; + ocl::getDevice(ocl_info); + //if you want to use undefault device, set it here + setDevice(ocl_info[0]); + + //set this to save kernel compile time from second time you run + ocl::setBinpath("./"); + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ o | output | tvl1_output.jpg | specify output save path }" + "{ c | camera | 0 | enable camera capturing }" + "{ s | use_cpu | false | use cpu or gpu to process the image }" + "{ v | video | | use video as input }"; + + CommandLineParser cmd(argc, argv, keys); + + if (cmd.get("help")) + { + cout << "Usage: pyrlk_optical_flow [options]" << endl; + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } + + bool defaultPicturesFail = false; + string fname0 = cmd.get("l"); + string fname1 = cmd.get("r"); + string vdofile = cmd.get("v"); + string outpath = cmd.get("o"); + bool useCPU = cmd.get("s"); + bool useCamera = cmd.get("c"); + int inputName = cmd.get("c"); + + Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE); + Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE); + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; + + + Mat flow, show_flow; + Mat flow_vec[2]; + if (frame0.empty() || frame1.empty()) + { + useCamera = true; + defaultPicturesFail = true; + CvCapture* capture = 0; + capture = cvCaptureFromCAM( inputName ); + if (!capture) + { + cout << "Can't load input images" << endl; + return -1; + } + } + + + if (useCamera) + { + CvCapture* capture = 0; + Mat frame, frameCopy; + Mat frame0Gray, frame1Gray; + Mat ptr0, ptr1; + + if(vdofile == "") + capture = cvCaptureFromCAM( inputName ); + else + capture = cvCreateFileCapture(vdofile.c_str()); + + int c = inputName ; + if(!capture) + { + if(vdofile == "") + cout << "Capture from CAM " << c << " didn't work" << endl; + else + cout << "Capture from file " << vdofile << " failed" <calc(ptr0, ptr1, flow); + split(flow, flow_vec); + } + else + { + oclMat d_flowx, d_flowy; + d_alg(oclMat(ptr0), oclMat(ptr1), d_flowx, d_flowy); + d_flowx.download(flow_vec[0]); + d_flowy.download(flow_vec[1]); + } + if (i%2 == 1) + frame1.copyTo(frameCopy); + else + frame0.copyTo(frameCopy); + getFlowField(flow_vec[0], flow_vec[1], show_flow); + imshow("PyrLK [Sparse]", show_flow); + } + + if( waitKey( 10 ) >= 0 ) + goto _cleanup_; + } + + waitKey(0); + +_cleanup_: + cvReleaseCapture( &capture ); + } + else + { +nocamera: + oclMat d_flowx, d_flowy; + for(int i = 0; i <= LOOP_NUM; i ++) + { + cout << "loop" << i << endl; + + if (i > 0) workBegin(); + if (useCPU) + { + alg->calc(frame0, frame1, flow); + split(flow, flow_vec); + } + else + { + d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy); + d_flowx.download(flow_vec[0]); + d_flowy.download(flow_vec[1]); + } + if (i > 0 && i <= LOOP_NUM) + workEnd(); + + if (i == LOOP_NUM) + { + if (useCPU) + cout << "average CPU time (noCamera) : "; + else + cout << "average GPU time (noCamera) : "; + cout << getTime() / LOOP_NUM << " ms" << endl; + + getFlowField(flow_vec[0], flow_vec[1], show_flow); + imshow("PyrLK [Sparse]", show_flow); + imwrite(outpath, show_flow); + } + } + } + + waitKey(); + + return 0; +} \ No newline at end of file From d58421c08eb578fe449e6b90cbeb7731fdb1a44b Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 19 Jun 2013 14:45:03 +0400 Subject: [PATCH 20/27] Make version-related test properties more useful. Namely, normalize their names to a common convention and remove useless text from their values. --- modules/ts/src/ts_func.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index e2998149d5..9b6b535816 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -2940,27 +2940,29 @@ MatComparator::operator()(const char* expr1, const char* expr2, void printVersionInfo(bool useStdOut) { - ::testing::Test::RecordProperty("CV_VERSION", CV_VERSION); + ::testing::Test::RecordProperty("cv_version", CV_VERSION); if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl; std::string buildInfo( cv::getBuildInformation() ); size_t pos1 = buildInfo.find("Version control"); - size_t pos2 = buildInfo.find("\n", pos1);\ + size_t pos2 = buildInfo.find('\n', pos1); if(pos1 != std::string::npos && pos2 != std::string::npos) { - std::string ver( buildInfo.substr(pos1, pos2-pos1) ); - ::testing::Test::RecordProperty("Version_control", ver); - if(useStdOut) std::cout << ver << std::endl; + size_t value_start = buildInfo.rfind(' ', pos2) + 1; + std::string ver( buildInfo.substr(value_start, pos2 - value_start) ); + ::testing::Test::RecordProperty("cv_vcs_version", ver); + if (useStdOut) std::cout << "OpenCV VCS version: " << ver << std::endl; } pos1 = buildInfo.find("inner version"); - pos2 = buildInfo.find("\n", pos1);\ + pos2 = buildInfo.find('\n', pos1); if(pos1 != std::string::npos && pos2 != std::string::npos) { - std::string ver( buildInfo.substr(pos1, pos2-pos1) ); - ::testing::Test::RecordProperty("inner_version", ver); - if(useStdOut) std::cout << ver << std::endl; + size_t value_start = buildInfo.rfind(' ', pos2) + 1; + std::string ver( buildInfo.substr(value_start, pos2 - value_start) ); + ::testing::Test::RecordProperty("cv_inner_vcs_version", ver); + if(useStdOut) std::cout << "Inner VCS version: " << ver << std::endl; } #ifdef CV_PARALLEL_FRAMEWORK From 1ed5fb937d34348becbf9fa3c837d1bdfe9c6f95 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 19 Jun 2013 15:39:11 +0400 Subject: [PATCH 21/27] Give cv::ocl::CLAHE a virtual destructor, for the usual reasons. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index d6dd4b983c..3324b7932e 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -520,6 +520,8 @@ namespace cv virtual Size getTilesGridSize() const = 0; virtual void collectGarbage() = 0; + + virtual ~CLAHE() {} }; CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); From c1f4fe1637aa1279d7eef7ef95f26ea92c9de967 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Thu, 20 Jun 2013 11:26:22 +0800 Subject: [PATCH 22/27] Fix a bug of convertTo. The bug was found that all 3-channel oclMat's were converted to 4-channel oclMat's after using convertTo function. --- modules/ocl/src/matrix_operations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 268a1fe9b5..172dfa5a89 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -394,7 +394,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be if( rtype < 0 ) rtype = type(); else - rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), oclchannels()); + rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); //int scn = channels(); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); From 3e2c4563134e2b88408ad7b1a280a312eb46d4a4 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 20 Jun 2013 14:27:51 +0400 Subject: [PATCH 23/27] A few minor improvements to the XLS report generator. * In comparison column headers, switched the order of labels, so that it's "to" vs "from". * When a test was present, but not run successfully, put its status in the corresponding cell instead of coloring it gray. --- modules/ts/misc/xls-report.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index f6278bae00..c13842cdca 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -4,6 +4,7 @@ from __future__ import division import ast import logging +import numbers import os, os.path import re @@ -52,8 +53,7 @@ def collect_xml(collection, configuration, xml_fullname): for test in sorted(parseLogFile(xml_fullname)): test_results = module_tests.setdefault((test.shortName(), test.param()), {}) - if test.status == 'run': - test_results[configuration] = test.get("gmean") + test_results[configuration] = test.get("gmean") if test.status == 'run' else test.status def main(): arg_parser = ArgumentParser(description='Build an XLS performance report.') @@ -117,7 +117,7 @@ def main(): for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Parameters'] + config_names + [None] - + [comp['from'] + '\nvs\n' + comp['to'] for comp in sheet_comparisons]): + + [comp['to'] + '\nvs\n' + comp['from'] for comp in sheet_comparisons]): sheet.row(0).write(i, caption, header_style) row = 1 @@ -143,13 +143,13 @@ def main(): sheet.write(row, 5 + i, None, no_time_style) for i, comp in enumerate(sheet_comparisons): - left = configs.get(comp["from"]) - right = configs.get(comp["to"]) + cmp_from = configs.get(comp["from"]) + cmp_to = configs.get(comp["to"]) col = 5 + len(config_names) + 1 + i - if left is not None and right is not None: + if isinstance(cmp_from, numbers.Number) and isinstance(cmp_to, numbers.Number): try: - speedup = left / right + speedup = cmp_from / cmp_to sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else bad_speedup_style if speedup < 0.9 else speedup_style) From 57317c3196fb9d5fbe9e00b16453dea7d534ac11 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 20 Jun 2013 19:39:02 +0400 Subject: [PATCH 24/27] Use log formatting as intended. --- modules/ts/misc/xls-report.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index c13842cdca..e79bb123dd 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -79,7 +79,7 @@ def main(): sheet_conf = ast.literal_eval(sheet_conf_file.read()) except Exception: sheet_conf = {} - logging.debug('no sheet.conf for {}'.format(sheet_path)) + logging.debug('no sheet.conf for %s', sheet_path) sheet_conf = dict(global_conf.items() + sheet_conf.items()) @@ -90,14 +90,14 @@ def main(): config_names = [p for p in os.listdir(sheet_path) if os.path.isdir(os.path.join(sheet_path, p))] except Exception as e: - logging.warning(e) + logging.warning('error while determining configuration names for %s: %s', sheet_path, e) continue collection = {} for configuration, configuration_path in \ [(c, os.path.join(sheet_path, c)) for c in config_names]: - logging.info('processing {}'.format(configuration_path)) + logging.info('processing %s', configuration_path) for xml_fullname in glob(os.path.join(configuration_path, '*.xml')): collect_xml(collection, configuration, xml_fullname) From e12963826337daa5ff67198e25b17f0dfdbf2edf Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 21 Jun 2013 14:05:29 +0800 Subject: [PATCH 25/27] Add a workaround to interpolate between oclMat and Input/OutputArray. --- modules/core/include/opencv2/core/core.hpp | 3 ++- modules/core/src/matrix.cpp | 30 ++++++++++++++++++++++ modules/ocl/include/opencv2/ocl/ocl.hpp | 8 ++++++ modules/ocl/src/matrix_operations.cpp | 29 +++++++++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp index 2b7791958f..5ff31fe3a8 100644 --- a/modules/core/include/opencv2/core/core.hpp +++ b/modules/core/include/opencv2/core/core.hpp @@ -1322,7 +1322,8 @@ public: EXPR = 6 << KIND_SHIFT, OPENGL_BUFFER = 7 << KIND_SHIFT, OPENGL_TEXTURE = 8 << KIND_SHIFT, - GPU_MAT = 9 << KIND_SHIFT + GPU_MAT = 9 << KIND_SHIFT, + OCL_MAT =10 << KIND_SHIFT }; _InputArray(); diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 7acb0e0dbd..c4c0041dd9 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -980,6 +980,11 @@ Mat _InputArray::getMat(int i) const return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat(); } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == STD_VECTOR_MAT ); //if( k == STD_VECTOR_MAT ) { @@ -1062,6 +1067,11 @@ void _InputArray::getMatVector(vector& mv) const return; } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == STD_VECTOR_MAT ); //if( k == STD_VECTOR_MAT ) { @@ -1189,6 +1199,11 @@ Size _InputArray::size(int i) const return tex->size(); } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == GPU_MAT ); //if( k == GPU_MAT ) { @@ -1303,6 +1318,11 @@ bool _InputArray::empty() const if( k == OPENGL_TEXTURE ) return ((const ogl::Texture2D*)obj)->empty(); + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == GPU_MAT ); //if( k == GPU_MAT ) return ((const gpu::GpuMat*)obj)->empty(); @@ -1523,6 +1543,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all return; } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + if( k == NONE ) { CV_Error(CV_StsNullPtr, "create() called for the missing output array" ); @@ -1634,6 +1659,11 @@ void _OutputArray::release() const return; } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == STD_VECTOR_MAT ); //if( k == STD_VECTOR_MAT ) { diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index d6dd4b983c..9fdd8f3e99 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -248,6 +248,11 @@ namespace cv operator Mat() const; void download(cv::Mat &m) const; + //! convert to _InputArray + operator _InputArray(); + + //! convert to _OutputArray + operator _OutputArray(); //! returns a new oclMatrix header for the specified row oclMat row(int y) const; @@ -387,6 +392,9 @@ namespace cv int wholecols; }; + // convert InputArray/OutputArray to oclMat + CV_EXPORTS oclMat& getOclMat(InputArray src); + CV_EXPORTS oclMat& getOclMat(OutputArray src); ///////////////////// mat split and merge ///////////////////////////////// //! Compose a multi-channel array from several single-channel arrays diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 268a1fe9b5..dc7deebe38 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -74,6 +74,7 @@ namespace cv } } + //////////////////////////////////////////////////////////////////////// // convert_C3C4 static void convert_C3C4(const cl_mem &src, oclMat &dst) @@ -227,6 +228,34 @@ void cv::ocl::oclMat::upload(const Mat &m) //download_channels = m.channels(); } +cv::ocl::oclMat::operator cv::_InputArray() +{ + _InputArray newInputArray; + newInputArray.flags = cv::_InputArray::OCL_MAT; + newInputArray.obj = reinterpret_cast(this); + return newInputArray; +} + +cv::ocl::oclMat::operator cv::_OutputArray() +{ + _OutputArray newOutputArray; + newOutputArray.flags = cv::_InputArray::OCL_MAT; + newOutputArray.obj = reinterpret_cast(this); + return newOutputArray; +} + +cv::ocl::oclMat& cv::ocl::getOclMat(InputArray src) +{ + CV_Assert(src.flags & cv::_InputArray::OCL_MAT); + return *reinterpret_cast(src.obj); +} + +cv::ocl::oclMat& cv::ocl::getOclMat(OutputArray src) +{ + CV_Assert(src.flags & cv::_InputArray::OCL_MAT); + return *reinterpret_cast(src.obj); +} + void cv::ocl::oclMat::download(cv::Mat &m) const { CV_DbgAssert(!this->empty()); From 6326739b443c0e87a251446893ee18225eeaf428 Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 21 Jun 2013 14:50:08 +0800 Subject: [PATCH 26/27] a bug fix in stereo_match sample --- samples/ocl/stereo_match.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp index 565744baa6..abe75c70e1 100644 --- a/samples/ocl/stereo_match.cpp +++ b/samples/ocl/stereo_match.cpp @@ -192,10 +192,9 @@ void App::run() csbp(d_left, d_right, d_disp); break; } - workEnd(); - // Show results d_disp.download(disp); + workEnd(); if (method != BM) { disp.convertTo(disp, 0); From 290c8db0a85ff6e4a9d84243624852a21190598f Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 21 Jun 2013 14:51:23 +0800 Subject: [PATCH 27/27] Revise naming for getOclMat function. --- modules/core/src/matrix.cpp | 12 ++++++------ modules/ocl/include/opencv2/ocl/ocl.hpp | 6 +++--- modules/ocl/src/matrix_operations.cpp | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index c4c0041dd9..5a3600b9b3 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -982,7 +982,7 @@ Mat _InputArray::getMat(int i) const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == STD_VECTOR_MAT ); @@ -1069,7 +1069,7 @@ void _InputArray::getMatVector(vector& mv) const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == STD_VECTOR_MAT ); @@ -1201,7 +1201,7 @@ Size _InputArray::size(int i) const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == GPU_MAT ); @@ -1320,7 +1320,7 @@ bool _InputArray::empty() const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == GPU_MAT ); @@ -1545,7 +1545,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } if( k == NONE ) @@ -1661,7 +1661,7 @@ void _OutputArray::release() const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == STD_VECTOR_MAT ); diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 9fdd8f3e99..ed887e61a6 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -392,9 +392,9 @@ namespace cv int wholecols; }; - // convert InputArray/OutputArray to oclMat - CV_EXPORTS oclMat& getOclMat(InputArray src); - CV_EXPORTS oclMat& getOclMat(OutputArray src); + // convert InputArray/OutputArray to oclMat references + CV_EXPORTS oclMat& getOclMatRef(InputArray src); + CV_EXPORTS oclMat& getOclMatRef(OutputArray src); ///////////////////// mat split and merge ///////////////////////////////// //! Compose a multi-channel array from several single-channel arrays diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index dc7deebe38..dcaf0418ac 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -244,13 +244,13 @@ cv::ocl::oclMat::operator cv::_OutputArray() return newOutputArray; } -cv::ocl::oclMat& cv::ocl::getOclMat(InputArray src) +cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src) { CV_Assert(src.flags & cv::_InputArray::OCL_MAT); return *reinterpret_cast(src.obj); } -cv::ocl::oclMat& cv::ocl::getOclMat(OutputArray src) +cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src) { CV_Assert(src.flags & cv::_InputArray::OCL_MAT); return *reinterpret_cast(src.obj);