Commit 95bdd4b6 authored by Roman Donchenko's avatar Roman Donchenko Committed by OpenCV Buildbot

Merge pull request #1189 from pengx17:2.4_sort_by_key

parents a7da1299 8b90cb37
......@@ -481,4 +481,40 @@ Performs generalized matrix multiplication.
* **GEMM_1_T** transpose ``src1``
* **GEMM_2_T** transpose ``src2``
.. seealso:: :ocv:func:`gemm`
\ No newline at end of file
.. seealso:: :ocv:func:`gemm`
ocl::sortByKey
------------------
Returns void
.. ocv:function:: void ocl::sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false)
:param keys: The keys to be used as sorting indices.
:param values: The array of values.
:param isGreaterThan: Determine sorting order.
:param method: supported sorting methods:
* **SORT_BITONIC** bitonic sort, only support power-of-2 buffer size
* **SORT_SELECTION** selection sort, currently cannot sort duplicate keys
* **SORT_MERGE** merge sort
* **SORT_RADIX** radix sort, only support signed int/float keys(``CV_32S``/``CV_32F``)
Returns the sorted result of all the elements in values based on equivalent keys.
The element unit in the values to be sorted is determined from the data type,
i.e., a ``CV_32FC2`` input ``{a1a2, b1b2}`` will be considered as two elements, regardless its matrix dimension.
Both keys and values will be sorted inplace.
Keys needs to be a **single** channel `oclMat`.
Example::
input -
keys = {2, 3, 1} (CV_8UC1)
values = {10,5, 4,3, 6,2} (CV_8UC2)
sortByKey(keys, values, SORT_SELECTION, false);
output -
keys = {1, 2, 3} (CV_8UC1)
values = {6,2, 10,5, 4,3} (CV_8UC2)
......@@ -1673,6 +1673,31 @@ namespace cv
oclMat diff_buf;
oclMat norm_buf;
};
// current supported sorting methods
enum
{
SORT_BITONIC, // only support power-of-2 buffer size
SORT_SELECTION, // cannot sort duplicate keys
SORT_MERGE,
SORT_RADIX // only support signed int/float keys(CV_32S/CV_32F)
};
//! Returns the sorted result of all the elements in input based on equivalent keys.
//
// The element unit in the values to be sorted is determined from the data type,
// i.e., a CV_32FC2 input {a1a2, b1b2} will be considered as two elements, regardless its
// matrix dimension.
// both keys and values will be sorted inplace
// Key needs to be single channel oclMat.
//
// Example:
// input -
// keys = {2, 3, 1} (CV_8UC1)
// values = {10,5, 4,3, 6,2} (CV_8UC2)
// sortByKey(keys, values, SORT_SELECTION, false);
// output -
// keys = {1, 2, 3} (CV_8UC1)
// values = {6,2, 10,5, 4,3} (CV_8UC2)
void CV_EXPORTS sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false);
}
}
#if defined _MSC_VER && _MSC_VER >= 1200
......
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable
#ifndef N // number of radices
#define N 4
#endif
#ifndef K_T
#define K_T float
#endif
#ifndef V_T
#define V_T float
#endif
#ifndef IS_GT
#define IS_GT 0
#endif
// from Thrust::b40c, link:
// https://github.com/thrust/thrust/blob/master/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h
__inline uint convertKey(uint converted_key)
{
#ifdef K_FLT
unsigned int mask = (converted_key & 0x80000000) ? 0xffffffff : 0x80000000;
converted_key ^= mask;
#elif defined(K_INT)
const uint SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
converted_key ^= SIGN_MASK;
#else
#endif
return converted_key;
}
//FIXME(pengx17):
// exclusive scan, need to be optimized as this is too naive...
kernel
void naiveScanAddition(
__global int * input,
__global int * output,
int size
)
{
if(get_global_id(0) == 0)
{
output[0] = 0;
for(int i = 1; i < size; i ++)
{
output[i] = output[i - 1] + input[i - 1];
}
}
}
// following is ported from
// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_uint_kernels.cl
kernel
void histogramRadixN (
__global K_T* unsortedKeys,
__global int * buckets,
uint shiftCount
)
{
const int RADIX_T = N;
const int RADICES_T = (1 << RADIX_T);
const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T;
const int MASK_T = (1 << RADIX_T) - 1;
int localBuckets[16] = {0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0};
int globalId = get_global_id(0);
int numOfGroups = get_num_groups(0);
/* Calculate thread-histograms */
for(int i = 0; i < NUM_OF_ELEMENTS_PER_WORK_ITEM_T; ++i)
{
uint value = convertKey(as_uint(unsortedKeys[mad24(globalId, NUM_OF_ELEMENTS_PER_WORK_ITEM_T, i)]));
value = (value >> shiftCount) & MASK_T;
#if IS_GT
localBuckets[RADICES_T - value - 1]++;
#else
localBuckets[value]++;
#endif
}
for(int i = 0; i < NUM_OF_ELEMENTS_PER_WORK_ITEM_T; ++i)
{
buckets[mad24(i, RADICES_T * numOfGroups, globalId) ] = localBuckets[i];
}
}
kernel
void permuteRadixN (
__global K_T* unsortedKeys,
__global V_T* unsortedVals,
__global int* scanedBuckets,
uint shiftCount,
__global K_T* sortedKeys,
__global V_T* sortedVals
)
{
const int RADIX_T = N;
const int RADICES_T = (1 << RADIX_T);
const int MASK_T = (1<<RADIX_T) -1;
int globalId = get_global_id(0);
int numOfGroups = get_num_groups(0);
const int NUM_OF_ELEMENTS_PER_WORK_GROUP_T = numOfGroups << N;
int localIndex[16];
/*Load the index to local memory*/
for(int i = 0; i < RADICES_T; ++i)
{
#if IS_GT
localIndex[i] = scanedBuckets[mad24(RADICES_T - i - 1, NUM_OF_ELEMENTS_PER_WORK_GROUP_T, globalId)];
#else
localIndex[i] = scanedBuckets[mad24(i, NUM_OF_ELEMENTS_PER_WORK_GROUP_T, globalId)];
#endif
}
/* Permute elements to appropriate location */
for(int i = 0; i < RADICES_T; ++i)
{
int old_idx = mad24(globalId, RADICES_T, i);
K_T ovalue = unsortedKeys[old_idx];
uint value = convertKey(as_uint(ovalue));
uint maskedValue = (value >> shiftCount) & MASK_T;
uint index = localIndex[maskedValue];
sortedKeys[index] = ovalue;
sortedVals[index] = unsortedVals[old_idx];
localIndex[maskedValue] = index + 1;
}
}
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef K_T
#define K_T float
#endif
#ifndef V_T
#define V_T float
#endif
#ifndef IS_GT
#define IS_GT false
#endif
#if IS_GT
#define my_comp(x,y) ((x) > (y))
#else
#define my_comp(x,y) ((x) < (y))
#endif
/////////////////////// Bitonic sort ////////////////////////////
// ported from
// https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_by_key_kernels.cl
__kernel
void bitonicSort
(
__global K_T * keys,
__global V_T * vals,
int count,
int stage,
int passOfStage
)
{
const int threadId = get_global_id(0);
if(threadId >= count / 2)
{
return;
}
const int pairDistance = 1 << (stage - passOfStage);
const int blockWidth = 2 * pairDistance;
int leftId = min( (threadId % pairDistance)
+ (threadId / pairDistance) * blockWidth, count );
int rightId = min( leftId + pairDistance, count );
int temp;
const V_T lval = vals[leftId];
const V_T rval = vals[rightId];
const K_T lkey = keys[leftId];
const K_T rkey = keys[rightId];
int sameDirectionBlockWidth = 1 << stage;
if((threadId/sameDirectionBlockWidth) % 2 == 1)
{
temp = rightId;
rightId = leftId;
leftId = temp;
}
const bool compareResult = my_comp(lkey, rkey);
if(compareResult)
{
keys[rightId] = rkey;
keys[leftId] = lkey;
vals[rightId] = rval;
vals[leftId] = lval;
}
else
{
keys[rightId] = lkey;
keys[leftId] = rkey;
vals[rightId] = lval;
vals[leftId] = rval;
}
}
/////////////////////// Selection sort ////////////////////////////
//kernel is ported from Bolt library:
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
__kernel
void selectionSortLocal
(
__global K_T * keys,
__global V_T * vals,
const int count,
__local K_T * scratch
)
{
int i = get_local_id(0); // index in workgroup
int numOfGroups = get_num_groups(0); // index in workgroup
int groupID = get_group_id(0);
int wg = get_local_size(0); // workgroup size = block size
int n; // number of elements to be processed for this work group
int offset = groupID * wg;
int same = 0;
vals += offset;
keys += offset;
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
int clamped_i= min(i, n - 1);
K_T key1 = keys[clamped_i], key2;
V_T val1 = vals[clamped_i];
scratch[i] = key1;
barrier(CLK_LOCAL_MEM_FENCE);
if(i >= n)
{
return;
}
int pos = 0;
for (int j=0;j<n;++j)
{
key2 = scratch[j];
if(my_comp(key2, key1))
pos++;//calculate the rank of this element in this work group
else
{
if(my_comp(key1, key2))
continue;
else
{
// key1 and key2 are same
same++;
}
}
}
for (int j=0; j< same; j++)
{
vals[pos + j] = val1;
keys[pos + j] = key1;
}
}
__kernel
void selectionSortFinal
(
__global K_T * keys,
__global V_T * vals,
const int count
)
{
const int i = get_local_id(0); // index in workgroup
const int numOfGroups = get_num_groups(0); // index in workgroup
const int groupID = get_group_id(0);
const int wg = get_local_size(0); // workgroup size = block size
int pos = 0, same = 0;
const int offset = get_group_id(0) * wg;
const int remainder = count - wg*(numOfGroups-1);
if((offset + i ) >= count)
return;
V_T val1 = vals[offset + i];
K_T key1 = keys[offset + i];
K_T key2;
for(int j=0; j<numOfGroups-1; j++ )
{
for(int k=0; k<wg; k++)
{
key2 = keys[j*wg + k];
if(my_comp(key1, key2))
break;
else
{
//Increment only if the value is not the same.
if(my_comp(key2, key1))
pos++;
else
same++;
}
}
}
for(int k=0; k<remainder; k++)
{
key2 = keys[(numOfGroups-1)*wg + k];
if(my_comp(key1, key2))
break;
else
{
//Don't increment if the value is the same.
if(my_comp(key2, key1))
pos++;
else
same++;
}
}
for (int j=0; j< same; j++)
{
vals[pos + j] = val1;
keys[pos + j] = key1;
}
}
This diff is collapsed.
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Peng Xiao, pengxiao@outlook.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other oclMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <map>
#include <functional>
#include "precomp.hpp"
using namespace std;
using namespace cvtest;
using namespace testing;
using namespace cv;
namespace
{
IMPLEMENT_PARAM_CLASS(IsGreaterThan, bool)
IMPLEMENT_PARAM_CLASS(InputSize, int)
IMPLEMENT_PARAM_CLASS(SortMethod, int)
template<class T>
struct KV_CVTYPE{ static int toType() {return 0;} };
template<> struct KV_CVTYPE<int> { static int toType() {return CV_32SC1;} };
template<> struct KV_CVTYPE<float>{ static int toType() {return CV_32FC1;} };
template<> struct KV_CVTYPE<Vec2i>{ static int toType() {return CV_32SC2;} };
template<> struct KV_CVTYPE<Vec2f>{ static int toType() {return CV_32FC2;} };
template<class key_type, class val_type>
bool kvgreater(pair<key_type, val_type> p1, pair<key_type, val_type> p2)
{
return p1.first > p2.first;
}
template<class key_type, class val_type>
bool kvless(pair<key_type, val_type> p1, pair<key_type, val_type> p2)
{
return p1.first < p2.first;
}
template<class key_type, class val_type>
void toKVPair(
MatConstIterator_<key_type> kit,
MatConstIterator_<val_type> vit,
int vecSize,
vector<pair<key_type, val_type> >& kvres
)
{
kvres.clear();
for(int i = 0; i < vecSize; i ++)
{
kvres.push_back(make_pair(*kit, *vit));
++kit;
++vit;
}
}
template<class key_type, class val_type>
void kvquicksort(Mat& keys, Mat& vals, bool isGreater = false)
{
vector<pair<key_type, val_type> > kvres;
toKVPair(keys.begin<key_type>(), vals.begin<val_type>(), keys.cols, kvres);
if(isGreater)
{
std::sort(kvres.begin(), kvres.end(), kvgreater<key_type, val_type>);
}
else
{
std::sort(kvres.begin(), kvres.end(), kvless<key_type, val_type>);
}
key_type * kptr = keys.ptr<key_type>();
val_type * vptr = vals.ptr<val_type>();
for(int i = 0; i < keys.cols; i ++)
{
kptr[i] = kvres[i].first;
vptr[i] = kvres[i].second;
}
}
class SortByKey_STL
{
public:
static void sort(cv::Mat&, cv::Mat&, bool is_gt);
private:
typedef void (*quick_sorter)(cv::Mat&, cv::Mat&, bool);
SortByKey_STL();
quick_sorter quick_sorters[CV_64FC4][CV_64FC4];
static SortByKey_STL instance;
};
SortByKey_STL SortByKey_STL::instance = SortByKey_STL();
SortByKey_STL::SortByKey_STL()
{
memset(instance.quick_sorters, 0, sizeof(quick_sorters));
#define NEW_SORTER(KT, VT) \
instance.quick_sorters[KV_CVTYPE<KT>::toType()][KV_CVTYPE<VT>::toType()] = kvquicksort<KT, VT>;
NEW_SORTER(int, int);
NEW_SORTER(int, Vec2i);
NEW_SORTER(int, float);
NEW_SORTER(int, Vec2f);
NEW_SORTER(float, int);
NEW_SORTER(float, Vec2i);
NEW_SORTER(float, float);
NEW_SORTER(float, Vec2f);
#undef NEW_SORTER
}
void SortByKey_STL::sort(cv::Mat& keys, cv::Mat& vals, bool is_gt)
{
instance.quick_sorters[keys.type()][vals.type()](keys, vals, is_gt);
}
bool checkUnstableSorterResult(const Mat& gkeys_, const Mat& gvals_,
const Mat& /*dkeys_*/, const Mat& dvals_)
{
int cn_val = gvals_.channels();
int count = gkeys_.cols;
//for convenience we convert depth to float and channels to 1
Mat gkeys, gvals, dkeys, dvals;
gkeys_.reshape(1).convertTo(gkeys, CV_32F);
gvals_.reshape(1).convertTo(gvals, CV_32F);
//dkeys_.reshape(1).convertTo(dkeys, CV_32F);
dvals_.reshape(1).convertTo(dvals, CV_32F);
float * gkptr = gkeys.ptr<float>();
float * gvptr = gvals.ptr<float>();
//float * dkptr = dkeys.ptr<float>();
float * dvptr = dvals.ptr<float>();
for(int i = 0; i < count - 1; ++i)
{
int iden_count = 0;
// firstly calculate the number of identical keys
while(gkptr[i + iden_count] == gkptr[i + 1 + iden_count])
{
++ iden_count;
}
// sort dv and gv
int num_of_val = (iden_count + 1) * cn_val;
std::sort(gvptr + i * cn_val, gvptr + i * cn_val + num_of_val);
std::sort(dvptr + i * cn_val, dvptr + i * cn_val + num_of_val);
// then check if [i, i + iden_count) is the same
for(int j = 0; j < num_of_val; ++j)
{
if(gvptr[i + j] != dvptr[i + j])
{
return false;
}
}
i += iden_count;
}
return true;
}
}
#define INPUT_SIZES Values(InputSize(0x10), InputSize(0x100), InputSize(0x10000)) //2^4, 2^8, 2^16
#define KEY_TYPES Values(MatType(CV_32SC1), MatType(CV_32FC1))
#define VAL_TYPES Values(MatType(CV_32SC1), MatType(CV_32SC2), MatType(CV_32FC1), MatType(CV_32FC2))
#define SORT_METHODS Values(SortMethod(cv::ocl::SORT_BITONIC),SortMethod(cv::ocl::SORT_MERGE),SortMethod(cv::ocl::SORT_RADIX)/*,SortMethod(cv::ocl::SORT_SELECTION)*/)
#define F_OR_T Values(IsGreaterThan(false), IsGreaterThan(true))
PARAM_TEST_CASE(SortByKey, InputSize, MatType, MatType, SortMethod, IsGreaterThan)
{
InputSize input_size;
MatType key_type, val_type;
SortMethod method;
IsGreaterThan is_gt;
Mat mat_key, mat_val;
virtual void SetUp()
{
input_size = GET_PARAM(0);
key_type = GET_PARAM(1);
val_type = GET_PARAM(2);
method = GET_PARAM(3);
is_gt = GET_PARAM(4);
using namespace cv;
// fill key and val
mat_key = randomMat(Size(input_size, 1), key_type, INT_MIN, INT_MAX);
mat_val = randomMat(Size(input_size, 1), val_type, INT_MIN, INT_MAX);
}
};
TEST_P(SortByKey, Accuracy)
{
using namespace cv;
ocl::oclMat oclmat_key(mat_key);
ocl::oclMat oclmat_val(mat_val);
ocl::sortByKey(oclmat_key, oclmat_val, method, is_gt);
SortByKey_STL::sort(mat_key, mat_val, is_gt);
EXPECT_MAT_NEAR(mat_key, oclmat_key, 0.0);
EXPECT_TRUE(checkUnstableSorterResult(mat_key, mat_val, oclmat_key, oclmat_val));
}
INSTANTIATE_TEST_CASE_P(OCL_SORT, SortByKey, Combine(INPUT_SIZES, KEY_TYPES, VAL_TYPES, SORT_METHODS, F_OR_T));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment