Commit 65d64af2 authored by krodyush's avatar krodyush

ocl_calcOpticalFlowPyrLK optimizations

1. decrease branch number in CL code by replacing them into weights
2. decrease local mem pressure in reduce operation by using private variables
3. decrease image sampler pressure by caching data into local memory
4. remove unnecessary sync point on the HOST side.
parent a2dec6c3
......@@ -989,7 +989,7 @@ namespace cv
idxArg = kernel.set(idxArg, (int)winSize.height); // int c_winSize_y
idxArg = kernel.set(idxArg, (int)iters); // int c_iters
idxArg = kernel.set(idxArg, (char)calcErr); //char calcErr
return kernel.run(2, globalThreads, localThreads, true);
return kernel.run(2, globalThreads, localThreads, false);
}
private:
inline static bool isDeviceCPU()
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment