Commit 587fb494 authored by yao's avatar yao

some accuracy fix of HOG

parent d76468c2
...@@ -1816,8 +1816,14 @@ void cv::ocl::device::hog::normalize_hists(int nbins, ...@@ -1816,8 +1816,14 @@ void cv::ocl::device::hog::normalize_hists(int nbins,
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1, "-D CPU"); localThreads, args, -1, -1, "-D CPU");
else else
{
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
char opt[32] = {0};
sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1); localThreads, args, -1, -1, opt);
}
} }
void cv::ocl::device::hog::classify_hists(int win_height, int win_width, void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
...@@ -1879,8 +1885,14 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, ...@@ -1879,8 +1885,14 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1, "-D CPU"); localThreads, args, -1, -1, "-D CPU");
else else
{
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
char opt[32] = {0};
sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
localThreads, args, -1, -1); localThreads, args, -1, -1, opt);
}
} }
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
......
...@@ -318,6 +318,10 @@ float reduce_smem(volatile __local float* smem, int size) ...@@ -318,6 +318,10 @@ float reduce_smem(volatile __local float* smem, int size)
if (tid < 32) if (tid < 32)
{ {
if (size >= 64) smem[tid] = sum = sum + smem[tid + 32]; if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
#if WAVE_SIZE < 32
} barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) {
#endif
if (size >= 32) smem[tid] = sum = sum + smem[tid + 16]; if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
if (size >= 16) smem[tid] = sum = sum + smem[tid + 8]; if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
if (size >= 8) smem[tid] = sum = sum + smem[tid + 4]; if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
...@@ -418,6 +422,9 @@ __kernel void classify_hists_180_kernel( ...@@ -418,6 +422,9 @@ __kernel void classify_hists_180_kernel(
{ {
smem[tid] = product = product + smem[tid + 32]; smem[tid] = product = product + smem[tid + 32];
} }
#if WAVE_SIZE < 32
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if (tid < 16) if (tid < 16)
{ {
smem[tid] = product = product + smem[tid + 16]; smem[tid] = product = product + smem[tid + 16];
...@@ -487,6 +494,10 @@ __kernel void classify_hists_252_kernel( ...@@ -487,6 +494,10 @@ __kernel void classify_hists_252_kernel(
if (tid < 32) if (tid < 32)
{ {
smem[tid] = product = product + smem[tid + 32]; smem[tid] = product = product + smem[tid + 32];
#if WAVE_SIZE < 32
} barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) {
#endif
smem[tid] = product = product + smem[tid + 16]; smem[tid] = product = product + smem[tid + 16];
smem[tid] = product = product + smem[tid + 8]; smem[tid] = product = product + smem[tid + 8];
smem[tid] = product = product + smem[tid + 4]; smem[tid] = product = product + smem[tid + 4];
...@@ -553,6 +564,10 @@ __kernel void classify_hists_kernel( ...@@ -553,6 +564,10 @@ __kernel void classify_hists_kernel(
if (tid < 32) if (tid < 32)
{ {
smem[tid] = product = product + smem[tid + 32]; smem[tid] = product = product + smem[tid + 32];
#if WAVE_SIZE < 32
} barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16) {
#endif
smem[tid] = product = product + smem[tid + 16]; smem[tid] = product = product + smem[tid + 16];
smem[tid] = product = product + smem[tid + 8]; smem[tid] = product = product + smem[tid + 8];
smem[tid] = product = product + smem[tid + 4]; smem[tid] = product = product + smem[tid + 4];
......
...@@ -146,17 +146,17 @@ TEST_P(HOG, Detect) ...@@ -146,17 +146,17 @@ TEST_P(HOG, Detect)
if (winSize.width == 48 && winSize.height == 96) if (winSize.width == 48 && winSize.height == 96)
{ {
// daimler's base // daimler's base
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96()); ocl_hog.setSVMDetector(hog.getDaimlerPeopleDetector());
hog.setSVMDetector(hog.getDaimlerPeopleDetector()); hog.setSVMDetector(hog.getDaimlerPeopleDetector());
} }
else if (winSize.width == 64 && winSize.height == 128) else if (winSize.width == 64 && winSize.height == 128)
{ {
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128()); ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector());
hog.setSVMDetector(hog.getDefaultPeopleDetector()); hog.setSVMDetector(hog.getDefaultPeopleDetector());
} }
else else
{ {
ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector()); ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector());
hog.setSVMDetector(hog.getDefaultPeopleDetector()); hog.setSVMDetector(hog.getDefaultPeopleDetector());
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment