Commit 72b499df authored by marina.kolpakova's avatar marina.kolpakova

add detection storing

parent 8108bd30
...@@ -104,7 +104,7 @@ PERF_TEST_P(SoftCascade, detect, Values<pair_string>(make_pair("cv/cascadeandhog ...@@ -104,7 +104,7 @@ PERF_TEST_P(SoftCascade, detect, Values<pair_string>(make_pair("cv/cascadeandhog
cv::gpu::SoftCascade cascade; cv::gpu::SoftCascade cascade;
ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath(GetParam().first))); ASSERT_TRUE(cascade.load(perf::TestBase::getDataPath(GetParam().first)));
cv::gpu::GpuMat rois, objectBoxes(1, 1000, CV_8UC1); cv::gpu::GpuMat rois, objectBoxes(1, 1000, CV_8UC4);
cascade.detectMultiScale(colored, rois, objectBoxes); cascade.detectMultiScale(colored, rois, objectBoxes);
TEST_CYCLE() TEST_CYCLE()
...@@ -117,7 +117,7 @@ PERF_TEST_P(SoftCascade, detect, Values<pair_string>(make_pair("cv/cascadeandhog ...@@ -117,7 +117,7 @@ PERF_TEST_P(SoftCascade, detect, Values<pair_string>(make_pair("cv/cascadeandhog
ASSERT_FALSE(colored.empty()); ASSERT_FALSE(colored.empty());
cv::SoftCascade cascade; cv::SoftCascade cascade;
ASSERT_TRUE(cascade.load(GetParam().first)); ASSERT_TRUE(cascade.load(getDataPath(GetParam().first)));
std::vector<cv::Rect> rois, objectBoxes; std::vector<cv::Rect> rois, objectBoxes;
cascade.detectMultiScale(colored, rois, objectBoxes); cascade.detectMultiScale(colored, rois, objectBoxes);
......
...@@ -57,14 +57,6 @@ ...@@ -57,14 +57,6 @@
namespace cv { namespace gpu { namespace device { namespace cv { namespace gpu { namespace device {
namespace icf { namespace icf {
// enum {
// HOG_BINS = 6,
// HOG_LUV_BINS = 10,
// WIDTH = 640,
// HEIGHT = 480,
// GREY_OFFSET = HEIGHT * HOG_LUV_BINS
// };
// ToDo: use textures or ancached load instruction. // ToDo: use textures or ancached load instruction.
__global__ void magToHist(const uchar* __restrict__ mag, __global__ void magToHist(const uchar* __restrict__ mag,
const float* __restrict__ angle, const int angPitch, const float* __restrict__ angle, const int angPitch,
...@@ -94,13 +86,6 @@ namespace icf { ...@@ -94,13 +86,6 @@ namespace icf {
} }
texture<int, cudaTextureType2D, cudaReadModeElementType> thogluv; texture<int, cudaTextureType2D, cudaReadModeElementType> thogluv;
// ToDo: do it in load time
// __device__ __forceinline__ float rescale(const Level& level, uchar4& scaledRect, const Node& node)
// {
// scaledRect = node.rect;
// return (float)(node.threshold & 0x0FFFFFFFU);
// }
__device__ __forceinline__ float rescale(const Level& level, uchar4& scaledRect, const Node& node) __device__ __forceinline__ float rescale(const Level& level, uchar4& scaledRect, const Node& node)
{ {
float relScale = level.relScale; float relScale = level.relScale;
...@@ -119,17 +104,12 @@ namespace icf { ...@@ -119,17 +104,12 @@ namespace icf {
float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y); float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
float approx = 1.f;
// if (fabs(farea - 0.f) > FLT_EPSILON && fabs(farea - 0.f) > FLT_EPSILON)
{
const float expected_new_area = farea * relScale * relScale; const float expected_new_area = farea * relScale * relScale;
approx = sarea / expected_new_area; float approx = sarea / expected_new_area;
}
dprintf("new rect: %d box %d %d %d %d rel areas %f %f\n", (node.threshold >> 28), dprintf("new rect: %d box %d %d %d %d rel areas %f %f\n", (node.threshold >> 28),
scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea); scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx; float rootThreshold = (node.threshold & 0x0FFFFFFFU) * approx;
rootThreshold *= level.scaling[(node.threshold >> 28) > 6]; rootThreshold *= level.scaling[(node.threshold >> 28) > 6];
...@@ -139,7 +119,7 @@ namespace icf { ...@@ -139,7 +119,7 @@ namespace icf {
return rootThreshold; return rootThreshold;
} }
__device__ __forceinline__ int get(const int x, int y, int channel, uchar4 area) __device__ __forceinline__ int get(const int x, int y, uchar4 area)
{ {
dprintf("feature box %d %d %d %d ", area.x, area.y, area.z, area.w); dprintf("feature box %d %d %d %d ", area.x, area.y, area.z, area.w);
...@@ -149,9 +129,6 @@ namespace icf { ...@@ -149,9 +129,6 @@ namespace icf {
x + area.x, y + area.w); x + area.x, y + area.w);
dprintf("at point %d %d with offset %d\n", x, y, 0); dprintf("at point %d %d with offset %d\n", x, y, 0);
int offset = channel * 121;
y += offset;
int a = tex2D(thogluv, x + area.x, y + area.y); int a = tex2D(thogluv, x + area.x, y + area.y);
int b = tex2D(thogluv, x + area.z, y + area.y); int b = tex2D(thogluv, x + area.z, y + area.y);
int c = tex2D(thogluv, x + area.z, y + area.w); int c = tex2D(thogluv, x + area.z, y + area.w);
...@@ -163,7 +140,7 @@ namespace icf { ...@@ -163,7 +140,7 @@ namespace icf {
} }
__global__ void test_kernel(const Level* levels, const Octave* octaves, const float* stages, __global__ void test_kernel(const Level* levels, const Octave* octaves, const float* stages,
const Node* nodes, const float* leaves, PtrStepSz<uchar4> objects) const Node* nodes, const float* leaves, PtrStepSz<uchar4> objects, uint* ctr)
{ {
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
...@@ -179,7 +156,7 @@ namespace icf { ...@@ -179,7 +156,7 @@ namespace icf {
float confidence = 0.f; float confidence = 0.f;
// #pragma unroll 8 // #pragma unroll 2
for(; st < stEnd; ++st) for(; st < stEnd; ++st)
{ {
dprintf("\n\nstage: %d\n", st); dprintf("\n\nstage: %d\n", st);
...@@ -190,7 +167,7 @@ namespace icf { ...@@ -190,7 +167,7 @@ namespace icf {
node.threshold >> 28, node.threshold & 0x0FFFFFFFU); node.threshold >> 28, node.threshold & 0x0FFFFFFFU);
float threshold = rescale(level, node.rect, node); float threshold = rescale(level, node.rect, node);
int sum = get(x, y, (node.threshold >> 28), node.rect); int sum = get(x, y + (node.threshold >> 28) * 121, node.rect);
dprintf("Node: [%d %d %d %d] %f\n", node.rect.x, node.rect.y, node.rect.z, dprintf("Node: [%d %d %d %d] %f\n", node.rect.x, node.rect.y, node.rect.z,
node.rect.w, threshold); node.rect.w, threshold);
...@@ -200,29 +177,30 @@ namespace icf { ...@@ -200,29 +177,30 @@ namespace icf {
node = nodes[nId + next]; node = nodes[nId + next];
threshold = rescale(level, node.rect, node); threshold = rescale(level, node.rect, node);
sum = get(x, y, (node.threshold >> 28), node.rect); sum = get(x, y + (node.threshold >> 28) * 121, node.rect);
const int lShift = (next - 1) * 2 + (int)(sum >= threshold); const int lShift = (next - 1) * 2 + (int)(sum >= threshold);
float impact = leaves[st * 4 + lShift]; float impact = leaves[st * 4 + lShift];
confidence += impact; confidence += impact;
if (confidence <= stages[st]) st = stEnd + 1; if (confidence <= stages[st]) st = stEnd + 10;
dprintf("decided: %d (%d >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact); dprintf("decided: %d (%d >= %f) %d %f\n\n" ,next, sum, threshold, lShift, impact);
dprintf("extracted stage: %f\n", stages[st]); dprintf("extracted stage: %f\n", stages[st]);
dprintf("computed score: %f\n\n", confidence); dprintf("computed score: %f\n\n", confidence);
} }
// if (st == stEnd) if(st == stEnd)
// printf("%d %d %d\n", x, y, st); {
int idx = atomicInc(ctr, objects.cols);
uchar4 val; uchar4 val;
val.x = (int)confidence; val.x = x * 4;
if (x == y) objects(0, threadIdx.x) = val; objects(0, idx) = val;
}
} }
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects) const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter)
{ {
int fw = 160; int fw = 160;
int fh = 120; int fh = 120;
...@@ -235,11 +213,12 @@ namespace icf { ...@@ -235,11 +213,12 @@ namespace icf {
const float* st = (const float*)stages.ptr(); const float* st = (const float*)stages.ptr();
const Node* nd = (const Node*)nodes.ptr(); const Node* nd = (const Node*)nodes.ptr();
const float* lf = (const float*)leaves.ptr(); const float* lf = (const float*)leaves.ptr();
uint* ctr = (uint*)counter.ptr();
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step)); cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
test_kernel<<<grid, block>>>(l, oct, st, nd, lf, objects); test_kernel<<<grid, block>>>(l, oct, st, nd, lf, objects, ctr);
cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize()); cudaSafeCall( cudaDeviceSynchronize());
......
...@@ -60,7 +60,8 @@ namespace icf { ...@@ -60,7 +60,8 @@ namespace icf {
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle, void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
const int fw, const int fh, const int bins); const int fw, const int fh, const int bins);
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects); const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter);
} }
}}} }}}
...@@ -75,6 +76,7 @@ struct cv::gpu::SoftCascade::Filds ...@@ -75,6 +76,7 @@ struct cv::gpu::SoftCascade::Filds
shrunk.create(FRAME_HEIGHT / 4 * HOG_LUV_BINS, FRAME_WIDTH / 4, CV_8UC1); shrunk.create(FRAME_HEIGHT / 4 * HOG_LUV_BINS, FRAME_WIDTH / 4, CV_8UC1);
integralBuffer.create(shrunk.rows + 1 * HOG_LUV_BINS, shrunk.cols + 1, CV_32SC1); integralBuffer.create(shrunk.rows + 1 * HOG_LUV_BINS, shrunk.cols + 1, CV_32SC1);
hogluv.create((FRAME_HEIGHT / 4 + 1) * HOG_LUV_BINS, FRAME_WIDTH / 4 + 1, CV_32SC1); hogluv.create((FRAME_HEIGHT / 4 + 1) * HOG_LUV_BINS, FRAME_WIDTH / 4 + 1, CV_32SC1);
detCounter.create(1,1, CV_32SC1);
} }
// scales range // scales range
...@@ -90,6 +92,8 @@ struct cv::gpu::SoftCascade::Filds ...@@ -90,6 +92,8 @@ struct cv::gpu::SoftCascade::Filds
GpuMat leaves; GpuMat leaves;
GpuMat levels; GpuMat levels;
GpuMat detCounter;
// preallocated buffer 640x480x10 for hogluv + 640x480 got gray // preallocated buffer 640x480x10 for hogluv + 640x480 got gray
GpuMat plane; GpuMat plane;
...@@ -127,7 +131,8 @@ struct cv::gpu::SoftCascade::Filds ...@@ -127,7 +131,8 @@ struct cv::gpu::SoftCascade::Filds
bool fill(const FileNode &root, const float mins, const float maxs); bool fill(const FileNode &root, const float mins, const float maxs);
void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
{ {
device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects); cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter);
} }
private: private:
...@@ -506,14 +511,13 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& ...@@ -506,14 +511,13 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
GpuMat sum(flds.hogluv, cv::Rect(0, (fh + 1) * i, fw + 1, fh + 1)); GpuMat sum(flds.hogluv, cv::Rect(0, (fh + 1) * i, fw + 1, fh + 1));
cv::gpu::integralBuffered(channel, sum, flds.integralBuffer); cv::gpu::integralBuffered(channel, sum, flds.integralBuffer);
} }
#endif #endif
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
// detection
flds.detect(objects, stream); flds.detect(objects, stream);
// // flds.storage.frame(colored, stream); // cv::Mat out(flds.detCounter);
// std::cout << out << std::endl;
} }
#endif #endif
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment