Commit 4dda2002 authored by Vadim Pisarevsky's avatar Vadim Pisarevsky

Merge pull request #3241 from WilhelmHannemann:bugfix_brute_force_match_cl

parents 197b2e75 f8c51287
...@@ -179,7 +179,7 @@ __kernel void BruteForceMatch_UnrollMatch( ...@@ -179,7 +179,7 @@ __kernel void BruteForceMatch_UnrollMatch(
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++) for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
{ {
int loadx = lidx + i * BLOCK_SIZE; int loadx = lidx + i * BLOCK_SIZE;
s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0; s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(T)) + loadx] : 0;
} }
float myBestDistance = MAX_FLOAT; float myBestDistance = MAX_FLOAT;
...@@ -194,7 +194,7 @@ __kernel void BruteForceMatch_UnrollMatch( ...@@ -194,7 +194,7 @@ __kernel void BruteForceMatch_UnrollMatch(
{ {
//load a BLOCK_SIZE * BLOCK_SIZE block into local train. //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
const int loadx = lidx + i * BLOCK_SIZE; const int loadx = lidx + i * BLOCK_SIZE;
s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0; s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(T)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already. //synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -284,8 +284,8 @@ __kernel void BruteForceMatch_Match( ...@@ -284,8 +284,8 @@ __kernel void BruteForceMatch_Match(
if (loadx < query_cols) if (loadx < query_cols)
{ {
s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx]; s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(T)) + loadx];
s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx]; s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(T)) + loadx];
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -372,8 +372,8 @@ __kernel void BruteForceMatch_RadiusUnrollMatch( ...@@ -372,8 +372,8 @@ __kernel void BruteForceMatch_RadiusUnrollMatch(
//load a BLOCK_SIZE * BLOCK_SIZE block into local train. //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
const int loadx = lidx + i * BLOCK_SIZE; const int loadx = lidx + i * BLOCK_SIZE;
s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0; s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(T)) + loadx] : 0;
s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0; s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(T)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already. //synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -432,8 +432,8 @@ __kernel void BruteForceMatch_RadiusMatch( ...@@ -432,8 +432,8 @@ __kernel void BruteForceMatch_RadiusMatch(
//load a BLOCK_SIZE * BLOCK_SIZE block into local train. //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
const int loadx = lidx + i * BLOCK_SIZE; const int loadx = lidx + i * BLOCK_SIZE;
s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0; s_query[lidy * BLOCK_SIZE + lidx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(T)) + loadx] : 0;
s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0; s_train[lidx * BLOCK_SIZE + lidy] = loadx < query_cols ? train[min(groupidx * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(T)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already. //synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -483,7 +483,7 @@ __kernel void BruteForceMatch_knnUnrollMatch( ...@@ -483,7 +483,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++) for (int i = 0 ; i < MAX_DESC_LEN / BLOCK_SIZE; i ++)
{ {
int loadx = lidx + i * BLOCK_SIZE; int loadx = lidx + i * BLOCK_SIZE;
s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx] : 0; s_query[lidy * MAX_DESC_LEN + loadx] = loadx < query_cols ? query[min(queryIdx, query_rows - 1) * (step / sizeof(T)) + loadx] : 0;
} }
float myBestDistance1 = MAX_FLOAT; float myBestDistance1 = MAX_FLOAT;
...@@ -499,7 +499,7 @@ __kernel void BruteForceMatch_knnUnrollMatch( ...@@ -499,7 +499,7 @@ __kernel void BruteForceMatch_knnUnrollMatch(
{ {
//load a BLOCK_SIZE * BLOCK_SIZE block into local train. //load a BLOCK_SIZE * BLOCK_SIZE block into local train.
const int loadx = lidx + i * BLOCK_SIZE; const int loadx = lidx + i * BLOCK_SIZE;
s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx] : 0; s_train[lidx * BLOCK_SIZE + lidy] = loadx < train_cols ? train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(T)) + loadx] : 0;
//synchronize to make sure each elem for reduceIteration in share memory is written already. //synchronize to make sure each elem for reduceIteration in share memory is written already.
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
...@@ -643,8 +643,8 @@ __kernel void BruteForceMatch_knnMatch( ...@@ -643,8 +643,8 @@ __kernel void BruteForceMatch_knnMatch(
if (loadx < query_cols) if (loadx < query_cols)
{ {
s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(float)) + loadx]; s_query[lidy * BLOCK_SIZE + lidx] = query[min(queryIdx, query_rows - 1) * (step / sizeof(T)) + loadx];
s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(float)) + loadx]; s_train[lidx * BLOCK_SIZE + lidy] = train[min(t * BLOCK_SIZE + lidy, train_rows - 1) * (step / sizeof(T)) + loadx];
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment