Commit 672cf1f4 authored by marina.kolpakova's avatar marina.kolpakova

implement different behaviour for up- and down-scaling

parent 56c7ef06
...@@ -86,8 +86,11 @@ namespace icf { ...@@ -86,8 +86,11 @@ namespace icf {
} }
texture<int, cudaTextureType2D, cudaReadModeElementType> thogluv; texture<int, cudaTextureType2D, cudaReadModeElementType> thogluv;
__device__ __forceinline__ float rescale(const Level& level, uchar4& scaledRect, const Node& node)
template<bool isUp>
__device__ __forceinline__ float rescale(const Level& level, Node& node)
{ {
uchar4& scaledRect = node.rect;
float relScale = level.relScale; float relScale = level.relScale;
float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y); float farea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
...@@ -119,7 +122,44 @@ namespace icf { ...@@ -119,7 +122,44 @@ namespace icf {
return rootThreshold; return rootThreshold;
} }
__device__ __forceinline__ int get(const int x, int y, uchar4 area) template<>
__device__ __forceinline__ float rescale<true>(const Level& level, Node& node)
{
uchar4& scaledRect = node.rect;
float relScale = level.relScale;
float farea = scaledRect.z * scaledRect.w;
dprintf("%d: feature %d box %d %d %d %d\n",threadIdx.x, (node.threshold >> 28), scaledRect.x, scaledRect.y,
scaledRect.z, scaledRect.w);
dprintf("%d: rescale: %f [%f %f] selected %f\n",threadIdx.x, level.relScale, level.scaling[0], level.scaling[1],
level.scaling[(node.threshold >> 28) > 6]);
// rescale
scaledRect.x = __float2int_rn(relScale * scaledRect.x);
scaledRect.y = __float2int_rn(relScale * scaledRect.y);
scaledRect.z = __float2int_rn(relScale * scaledRect.z);
scaledRect.w = __float2int_rn(relScale * scaledRect.w);
float sarea = scaledRect.z * scaledRect.w;
const float expected_new_area = farea * relScale * relScale;
float approx = __fdividef(sarea, expected_new_area);
dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28),
scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx;
rootThreshold *= level.scaling[(node.threshold >> 28) > 6];
dprintf("%d: approximation %f %d -> %f %f\n",threadIdx.x, approx, (node.threshold & 0x0FFFFFFFU), rootThreshold,
level.scaling[(node.threshold >> 28) > 6]);
return rootThreshold;
}
template<bool isUp>
__device__ __forceinline__ int get(int x, int y, uchar4 area)
{ {
dprintf("%d: feature box %d %d %d %d\n",threadIdx.x, area.x, area.y, area.z, area.w); dprintf("%d: feature box %d %d %d %d\n",threadIdx.x, area.x, area.y, area.z, area.w);
...@@ -138,7 +178,30 @@ namespace icf { ...@@ -138,7 +178,30 @@ namespace icf {
return (a - b + c - d); return (a - b + c - d);
} }
template<>
__device__ __forceinline__ int get<true>(int x, int y, uchar4 area)
{
dprintf("%d: feature box %d %d %d %d\n",threadIdx.x, area.x, area.y, area.z, area.w);
dprintf("%d: extract feature for: [%d %d] [%d %d] [%d %d] [%d %d]\n",threadIdx.x,
x + area.x, y + area.y, x + area.z, y + area.y, x + area.z,y + area.w,
x + area.x, y + area.w);
dprintf("%d: at point %d %d with offset %d\n", x, y, 0);
x += area.x;
y += area.y;
int a = tex2D(thogluv, x, y);
int b = tex2D(thogluv, x + area.z, y);
int c = tex2D(thogluv, x + area.z, y + area.w);
int d = tex2D(thogluv, x, y + area.w);
dprintf("%d retruved integral values: %d %d %d %d\n",threadIdx.x, a, b, c, d);
return (a - b + c - d);
}
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
template<bool isUp>
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages, __global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr, const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
const int downscales) const int downscales)
...@@ -163,15 +226,15 @@ namespace icf { ...@@ -163,15 +226,15 @@ namespace icf {
dprintf("\n\n%d: stage: %d %d\n",threadIdx.x, st, nId); dprintf("\n\n%d: stage: %d %d\n",threadIdx.x, st, nId);
Node node = nodes[nId]; Node node = nodes[nId];
float threshold = rescale(level, node.rect, node); float threshold = rescale<isUp>(level, node);
int sum = get(x, y + (node.threshold >> 28) * 121, node.rect); int sum = get<isUp>(x, y + (node.threshold >> 28) * 121, node.rect);
int next = 1 + (int)(sum >= threshold); int next = 1 + (int)(sum >= threshold);
dprintf("%d: go: %d (%d >= %f)\n\n" ,threadIdx.x, next, sum, threshold); dprintf("%d: go: %d (%d >= %f)\n\n" ,threadIdx.x, next, sum, threshold);
node = nodes[nId + next]; node = nodes[nId + next];
threshold = rescale(level, node.rect, node); threshold = rescale<isUp>(level, node);
sum = get(x, y + (node.threshold >> 28) * 121, node.rect); sum = get<isUp>(x, y + (node.threshold >> 28) * 121, node.rect);
const int lShift = (next - 1) * 2 + (int)(sum >= threshold); const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
float impact = leaves[(st + threadIdx.x) * 4 + lShift]; float impact = leaves[(st + threadIdx.x) * 4 + lShift];
...@@ -192,7 +255,7 @@ namespace icf { ...@@ -192,7 +255,7 @@ namespace icf {
dprintf("%d: impact scaned %f\n" ,threadIdx.x, impact); dprintf("%d: impact scaned %f\n" ,threadIdx.x, impact);
confidence += impact; confidence += impact;
if(__any((confidence <= stages[(st + threadIdx.x)]))) break; if(__any((confidence <= stages[(st + threadIdx.x)]))) st += stEnd;
} }
if(st == stEnd && !threadIdx.x) if(st == stEnd && !threadIdx.x)
...@@ -204,6 +267,7 @@ namespace icf { ...@@ -204,6 +267,7 @@ namespace icf {
} }
} }
#else #else
template<bool isUp>
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages, __global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr, const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
const int downscales) const int downscales)
...@@ -231,8 +295,8 @@ namespace icf { ...@@ -231,8 +295,8 @@ namespace icf {
dprintf("Node: [%d %d %d %d] %d %d\n", node.rect.x, node.rect.y, node.rect.z, node.rect.w, dprintf("Node: [%d %d %d %d] %d %d\n", node.rect.x, node.rect.y, node.rect.z, node.rect.w,
node.threshold >> 28, node.threshold & 0x0FFFFFFFU); node.threshold >> 28, node.threshold & 0x0FFFFFFFU);
float threshold = rescale(level, node.rect, node); float threshold = rescale<isUp>(level, node);
int sum = get(x, y + (node.threshold >> 28) * 121, node.rect); int sum = get<isUp>(x, y + (node.threshold >> 28) * 121, node.rect);
dprintf("Node: [%d %d %d %d] %f\n", node.rect.x, node.rect.y, node.rect.z, dprintf("Node: [%d %d %d %d] %f\n", node.rect.x, node.rect.y, node.rect.z,
node.rect.w, threshold); node.rect.w, threshold);
...@@ -241,8 +305,8 @@ namespace icf { ...@@ -241,8 +305,8 @@ namespace icf {
dprintf("go: %d (%d >= %f)\n\n" ,next, sum, threshold); dprintf("go: %d (%d >= %f)\n\n" ,next, sum, threshold);
node = nodes[nId + next]; node = nodes[nId + next];
threshold = rescale(level, node.rect, node); threshold = rescale<isUp>(level, node);
sum = get(x, y + (node.threshold >> 28) * 121, node.rect); sum = get<isUp>(x, y + (node.threshold >> 28) * 121, node.rect);
const int lShift = (next - 1) * 2 + (int)(sum >= threshold); const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
float impact = leaves[st * 4 + lShift]; float impact = leaves[st * 4 + lShift];
...@@ -286,18 +350,18 @@ namespace icf { ...@@ -286,18 +350,18 @@ namespace icf {
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step)); cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, 0); test_kernel_warp<false><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, 0);
cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaGetLastError());
grid = dim3(fw, fh / 8, 47 - downscales); grid = dim3(fw, fh / 8, 47 - downscales);
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, downscales); test_kernel_warp<true><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, downscales);
cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize()); cudaSafeCall( cudaDeviceSynchronize());
} }
void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter) PtrStepSzi counter, const int downscales)
{ {
int fw = 160; int fw = 160;
int fh = 120; int fh = 120;
...@@ -317,7 +381,11 @@ namespace icf { ...@@ -317,7 +381,11 @@ namespace icf {
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step)); cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale); if (scale >= downscales)
test_kernel_warp<true><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
else
test_kernel_warp<false><<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize()); cudaSafeCall( cudaDeviceSynchronize());
} }
......
...@@ -68,7 +68,7 @@ namespace icf { ...@@ -68,7 +68,7 @@ namespace icf {
PtrStepSzi counter, const int downscales); PtrStepSzi counter, const int downscales);
void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter); PtrStepSzi counter, const int downscales);
} }
}}} }}}
...@@ -147,7 +147,7 @@ struct cv::gpu::SoftCascade::Filds ...@@ -147,7 +147,7 @@ struct cv::gpu::SoftCascade::Filds
{ {
cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int)); cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
device::icf::detectAtScale(scale, levels, octaves, stages, nodes, leaves, hogluv, objects, device::icf::detectAtScale(scale, levels, octaves, stages, nodes, leaves, hogluv, objects,
detCounter); detCounter, downscales);
} }
private: private:
...@@ -240,6 +240,9 @@ bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, c ...@@ -240,6 +240,9 @@ bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, c
{ {
FileNode fns = *it; FileNode fns = *it;
float scale = (float)fns[SC_OCT_SCALE]; float scale = (float)fns[SC_OCT_SCALE];
bool isUPOctave = scale >= 1;
scales.push_back(scale); scales.push_back(scale);
ushort nstages = saturate_cast<ushort>((int)fns[SC_OCT_STAGES]); ushort nstages = saturate_cast<ushort>((int)fns[SC_OCT_STAGES]);
ushort2 size; ushort2 size;
...@@ -286,6 +289,12 @@ bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, c ...@@ -286,6 +289,12 @@ bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, c
rect.z = saturate_cast<uchar>((int)*(r_it++)); rect.z = saturate_cast<uchar>((int)*(r_it++));
rect.w = saturate_cast<uchar>((int)*(r_it++)); rect.w = saturate_cast<uchar>((int)*(r_it++));
if (isUPOctave)
{
rect.z -= rect.x;
rect.w -= rect.y;
}
uint channel = saturate_cast<uint>((int)(*ftrs)[SC_F_CHANNEL]); uint channel = saturate_cast<uint>((int)(*ftrs)[SC_F_CHANNEL]);
vnodes.push_back(Node(rect, channel, th)); vnodes.push_back(Node(rect, channel, th));
++ftrs; ++ftrs;
......
...@@ -63,7 +63,7 @@ TEST(SoftCascade, detect) ...@@ -63,7 +63,7 @@ TEST(SoftCascade, detect)
cv::Mat coloredCpu = cv::imread(cvtest::TS::ptr()->get_data_path() cv::Mat coloredCpu = cv::imread(cvtest::TS::ptr()->get_data_path()
+ "../cv/cascadeandhog/bahnhof/image_00000000_0.png"); + "../cv/cascadeandhog/bahnhof/image_00000000_0.png");
ASSERT_FALSE(coloredCpu.empty()); ASSERT_FALSE(coloredCpu.empty());
GpuMat colored(coloredCpu), objectBoxes(1, 1000, CV_8UC1), rois; GpuMat colored(coloredCpu), objectBoxes(1, 100000, CV_8UC1), rois;
// ASSERT_NO_THROW( // ASSERT_NO_THROW(
// { // {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment