Commit e496345d authored by Marina Kolpakova's avatar Marina Kolpakova

added lbp cascade test, fixed race conditions problems

parent 248f39e1
...@@ -290,7 +290,7 @@ namespace cv { namespace gpu { namespace device ...@@ -290,7 +290,7 @@ namespace cv { namespace gpu { namespace device
DevMem2D_<int4> objects, DevMem2D_<int4> objects,
unsigned int* classified); unsigned int* classified);
int connectedConmonents(DevMem2D_<int4> candidates, int groupThreshold, float grouping_eps, unsigned int* nclasses); int connectedConmonents(DevMem2D_<int4> candidates, DevMem2D_<int4> objects,int groupThreshold, float grouping_eps, unsigned int* nclasses);
} }
}}} }}}
...@@ -308,6 +308,7 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp ...@@ -308,6 +308,7 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
else else
objects.create(1 , defaultObjSearchNum, CV_32SC4); objects.create(1 , defaultObjSearchNum, CV_32SC4);
GpuMat candidates(1 , defaultObjSearchNum, CV_32SC4);
if (maxObjectSize == cv::Size()) if (maxObjectSize == cv::Size())
maxObjectSize = image.size(); maxObjectSize = image.size();
...@@ -317,6 +318,7 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp ...@@ -317,6 +318,7 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
unsigned int* dclassified; unsigned int* dclassified;
cudaMalloc(&dclassified, sizeof(int)); cudaMalloc(&dclassified, sizeof(int));
cudaMemcpy(dclassified, classified, sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(dclassified, classified, sizeof(int), cudaMemcpyHostToDevice);
int step;
for( double factor = 1; ; factor *= scaleFactor ) for( double factor = 1; ; factor *= scaleFactor )
{ {
...@@ -334,25 +336,22 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp ...@@ -334,25 +336,22 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
// continue; // continue;
cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, CV_INTER_LINEAR); cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, CV_INTER_LINEAR);
integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32SC1);
cv::gpu::integral(scaledImageBuffer, integral); cv::gpu::integral(scaledImageBuffer, integral);
int step = (factor <= 2.) + 1; step = (factor <= 2.) + 1;
cv::gpu::device::lbp::classifyStump(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat, cv::gpu::device::lbp::classifyStump(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat,
integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects, dclassified); integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, factor, step, subsetSize, candidates, dclassified);
} }
cudaMemcpy(classified, dclassified, sizeof(int), cudaMemcpyDeviceToHost);
GpuMat candidates(1, *classified, objects.type(), objects.ptr());
// std::cout << *classified << " Results: " << cv::Mat(candidates) << std::endl;
if (groupThreshold <= 0 || objects.empty()) if (groupThreshold <= 0 || objects.empty())
return 0; return 0;
cv::gpu::device::lbp::connectedConmonents(candidates, groupThreshold, grouping_eps, dclassified); cv::gpu::device::lbp::connectedConmonents(candidates, objects, groupThreshold, grouping_eps, dclassified);
cudaMemcpy(classified, dclassified, sizeof(int), cudaMemcpyDeviceToHost);
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
return *classified; step = *classified;
delete[] classified;
cudaFree(dclassified);
return step;
} }
// ============ old fashioned haar cascade ==============================================// // ============ old fashioned haar cascade ==============================================//
......
...@@ -51,8 +51,8 @@ namespace cv { namespace gpu { namespace device ...@@ -51,8 +51,8 @@ namespace cv { namespace gpu { namespace device
__global__ void lbp_classify_stump(Stage* stages, int nstages, ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features, __global__ void lbp_classify_stump(Stage* stages, int nstages, ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features,
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, unsigned int* n) const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, unsigned int* n)
{ {
int y = threadIdx.x * scale; int x = threadIdx.x * step;
int x = blockIdx.x * scale; int y = blockIdx.x * step;
int current_node = 0; int current_node = 0;
int current_leave = 0; int current_leave = 0;
...@@ -92,7 +92,7 @@ namespace cv { namespace gpu { namespace device ...@@ -92,7 +92,7 @@ namespace cv { namespace gpu { namespace device
} }
template<typename Pr> template<typename Pr>
__global__ void disjoin(int4* candidates, unsigned int n, int groupThreshold, float grouping_eps, unsigned int* nclasses) __global__ void disjoin(int4* candidates, int4* objects, unsigned int n, int groupThreshold, float grouping_eps, unsigned int* nclasses)
{ {
using cv::gpu::device::VecTraits; using cv::gpu::device::VecTraits;
unsigned int tid = threadIdx.x; unsigned int tid = threadIdx.x;
...@@ -119,7 +119,7 @@ namespace cv { namespace gpu { namespace device ...@@ -119,7 +119,7 @@ namespace cv { namespace gpu { namespace device
__syncthreads(); __syncthreads();
atomicInc((unsigned int*)labels + cls, n); atomicInc((unsigned int*)labels + cls, n);
labels[n - 1] = 0; *nclasses = 0;
int active = labels[tid]; int active = labels[tid];
if (active) if (active)
...@@ -152,11 +152,9 @@ namespace cv { namespace gpu { namespace device ...@@ -152,11 +152,9 @@ namespace cv { namespace gpu { namespace device
(n2 > max(3, n1) || n1 < 3) ) (n2 > max(3, n1) || n1 < 3) )
break; break;
} }
if( j == n) if( j == n)
{ {
// printf("founded gpu %d %d %d %d \n", r1[0], r1[1], r1[2], r1[3]); objects[atomicInc(nclasses, n)] = VecTraits<int4>::make(r1[0], r1[1], r1[2], r1[3]);
candidates[atomicInc((unsigned int*)labels + n -1, n)] = VecTraits<int4>::make(r1[0], r1[1], r1[2], r1[3]);
} }
} }
} }
...@@ -179,11 +177,11 @@ namespace cv { namespace gpu { namespace device ...@@ -179,11 +177,11 @@ namespace cv { namespace gpu { namespace device
workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects, classified); workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects, classified);
} }
int connectedConmonents(DevMem2D_<int4> candidates, int groupThreshold, float grouping_eps, unsigned int* nclasses) int connectedConmonents(DevMem2D_<int4> candidates, DevMem2D_<int4> objects, int groupThreshold, float grouping_eps, unsigned int* nclasses)
{ {
int threads = candidates.cols; int threads = candidates.cols;
int smem_amount = threads * sizeof(int) + threads * sizeof(int4); int smem_amount = threads * sizeof(int) + threads * sizeof(int4);
disjoin<InSameComponint><<<1, threads, smem_amount>>>((int4*)candidates.ptr(), candidates.cols, groupThreshold, grouping_eps, nclasses); disjoin<InSameComponint><<<1, threads, smem_amount>>>((int4*)candidates.ptr(), (int4*)objects.ptr(), candidates.cols, groupThreshold, grouping_eps, nclasses);
return 0; return 0;
} }
} }
......
...@@ -65,12 +65,12 @@ namespace lbp{ ...@@ -65,12 +65,12 @@ namespace lbp{
struct InSameComponint struct InSameComponint
{ {
public: public:
__device__ __forceinline__ InSameComponint(float _eps) : eps(_eps * 0.5) {} __device__ __forceinline__ InSameComponint(float _eps) : eps(_eps) {}
__device__ __forceinline__ InSameComponint(const InSameComponint& other) : eps(other.eps) {} __device__ __forceinline__ InSameComponint(const InSameComponint& other) : eps(other.eps) {}
__device__ __forceinline__ bool operator()(const int4& r1, const int4& r2) const __device__ __forceinline__ bool operator()(const int4& r1, const int4& r2) const
{ {
double delta = eps * (min(r1.z, r2.z) + min(r1.w, r2.w)); float delta = eps * (min(r1.z, r2.z) + min(r1.w, r2.w)) * 0.5;
return abs(r1.x - r2.x) <= delta && abs(r1.y - r2.y) <= delta return abs(r1.x - r2.x) <= delta && abs(r1.y - r2.y) <= delta
&& abs(r1.x + r1.z - r2.x - r2.z) <= delta && abs(r1.y + r1.w - r2.y - r2.w) <= delta; && abs(r1.x + r1.z - r2.x - r2.z) <= delta && abs(r1.y + r1.w - r2.y - r2.w) <= delta;
......
...@@ -308,4 +308,57 @@ INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_Read_classifier, testing::Combine( ...@@ -308,4 +308,57 @@ INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_Read_classifier, testing::Combine(
testing::Values<int>(0) testing::Values<int>(0)
)); ));
PARAM_TEST_CASE(LBP_classify, cv::gpu::DeviceInfo, int)
{
cv::gpu::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
cv::gpu::setDevice(devInfo.deviceID());
}
};
TEST_P(LBP_classify, Accuracy)
{
std::string classifierXmlPath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/lbpcascade_frontalface.xml";
std::string imagePath = std::string(cvtest::TS::ptr()->get_data_path()) + "lbpcascade/er.png";
cv::CascadeClassifier cpuClassifier(classifierXmlPath);
ASSERT_FALSE(cpuClassifier.empty());
cv::Mat image = cv::imread(imagePath);
image = image.colRange(0, image.cols / 2);
cv::Mat grey;
cvtColor(image, grey, CV_BGR2GRAY);
ASSERT_FALSE(image.empty());
std::vector<cv::Rect> rects;
cpuClassifier.detectMultiScale(grey, rects);
cv::Mat markedImage = image.clone();
std::vector<cv::Rect>::iterator it = rects.begin();
for (; it != rects.end(); ++it)
cv::rectangle(markedImage, *it, cv::Scalar(255, 0, 0, 255));
cv::gpu::CascadeClassifier_GPU_LBP gpuClassifier;
ASSERT_TRUE(gpuClassifier.load(classifierXmlPath));
cv::gpu::GpuMat gpu_rects, buffer;
cv::gpu::GpuMat tested(grey);
int count = gpuClassifier.detectMultiScale(tested, buffer, gpu_rects);
cv::Mat gpu_f(gpu_rects);
int* gpu_faces = (int*)gpu_f.ptr();
for (int i = 0; i < count; i++)
{
cv::Rect r(gpu_faces[i * 4],gpu_faces[i * 4 + 1],gpu_faces[i * 4 + 2],gpu_faces[i * 4 + 3]);
cv::rectangle(markedImage, r , cv::Scalar(0, 0, 255, 255));
}
}
INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, LBP_classify, testing::Combine(
ALL_DEVICES,
testing::Values<int>(0)
));
} // namespace } // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment