Commit b4c9c099 authored by Philip Langdale's avatar Philip Langdale Committed by Timo Rothenpieler

avfilter/vf_thumbnail_cuda: Switch to using ffnvcodec

This change switches the vf_thumbnail_cuda filter from using the
full cuda sdk to using the ffnvcodec headers and loader.

Most of the change is a direct mapping, but I also switched from
using texture references to using texture objects. This is supposed
to be the preferred way of using textures, and the texture object API
is the one I added to ffnvcodec.
Signed-off-by: 's avatarPhilip Langdale <philipl@overt.org>
Signed-off-by: 's avatarTimo Rothenpieler <timo@rothenpieler.org>
parent 2544c7ea
...@@ -2976,7 +2976,7 @@ v4l2_m2m_deps="linux_videodev2_h sem_timedwait" ...@@ -2976,7 +2976,7 @@ v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
hwupload_cuda_filter_deps="ffnvcodec" hwupload_cuda_filter_deps="ffnvcodec"
scale_npp_filter_deps="ffnvcodec libnpp" scale_npp_filter_deps="ffnvcodec libnpp"
scale_cuda_filter_deps="ffnvcodec cuda_nvcc" scale_cuda_filter_deps="ffnvcodec cuda_nvcc"
thumbnail_cuda_filter_deps="cuda_sdk" thumbnail_cuda_filter_deps="ffnvcodec cuda_nvcc"
transpose_npp_filter_deps="ffnvcodec libnpp" transpose_npp_filter_deps="ffnvcodec libnpp"
amf_deps_any="libdl LoadLibrary" amf_deps_any="libdl LoadLibrary"
......
...@@ -20,10 +20,8 @@ ...@@ -20,10 +20,8 @@
* DEALINGS IN THE SOFTWARE. * DEALINGS IN THE SOFTWARE.
*/ */
#include <cuda.h>
#include "libavutil/hwcontext.h" #include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_cuda.h" #include "libavutil/hwcontext_cuda_internal.h"
#include "libavutil/cuda_check.h" #include "libavutil/cuda_check.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
...@@ -31,7 +29,7 @@ ...@@ -31,7 +29,7 @@
#include "avfilter.h" #include "avfilter.h"
#include "internal.h" #include "internal.h"
#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
#define HIST_SIZE (3*256) #define HIST_SIZE (3*256)
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
...@@ -60,6 +58,7 @@ typedef struct ThumbnailCudaContext { ...@@ -60,6 +58,7 @@ typedef struct ThumbnailCudaContext {
AVRational tb; ///< copy of the input timebase to ease access AVRational tb; ///< copy of the input timebase to ease access
AVBufferRef *hw_frames_ctx; AVBufferRef *hw_frames_ctx;
AVCUDADeviceContext *hwctx;
CUmodule cu_module; CUmodule cu_module;
...@@ -67,12 +66,10 @@ typedef struct ThumbnailCudaContext { ...@@ -67,12 +66,10 @@ typedef struct ThumbnailCudaContext {
CUfunction cu_func_uchar2; CUfunction cu_func_uchar2;
CUfunction cu_func_ushort; CUfunction cu_func_ushort;
CUfunction cu_func_ushort2; CUfunction cu_func_ushort2;
CUtexref cu_tex_uchar; CUstream cu_stream;
CUtexref cu_tex_uchar2;
CUtexref cu_tex_ushort;
CUtexref cu_tex_ushort2;
CUdeviceptr data; CUdeviceptr data;
} ThumbnailCudaContext; } ThumbnailCudaContext;
#define OFFSET(x) offsetof(ThumbnailCudaContext, x) #define OFFSET(x) offsetof(ThumbnailCudaContext, x)
...@@ -157,29 +154,44 @@ static AVFrame *get_best_frame(AVFilterContext *ctx) ...@@ -157,29 +154,44 @@ static AVFrame *get_best_frame(AVFilterContext *ctx)
return picref; return picref;
} }
static int thumbnail_kernel(ThumbnailCudaContext *ctx, CUfunction func, CUtexref tex, int channels, static int thumbnail_kernel(AVFilterContext *ctx, CUfunction func, int channels,
int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size) int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size)
{ {
CUdeviceptr src_devptr = (CUdeviceptr)src_dptr; int ret;
void *args[] = { &histogram, &src_width, &src_height }; ThumbnailCudaContext *s = ctx->priv;
CUDA_ARRAY_DESCRIPTOR desc; CudaFunctions *cu = s->hwctx->internal->cuda_dl;
CUtexObject tex = 0;
desc.Width = src_width; void *args[] = { &tex, &histogram, &src_width, &src_height };
desc.Height = src_height;
desc.NumChannels = channels;
if (pixel_size == 1) {
desc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
}
else {
desc.Format = CU_AD_FORMAT_UNSIGNED_INT16;
}
CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch)); CUDA_TEXTURE_DESC tex_desc = {
CHECK_CU(cuLaunchKernel(func, .filterMode = CU_TR_FILTER_MODE_LINEAR,
DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, .flags = CU_TRSF_READ_AS_INTEGER,
BLOCKX, BLOCKY, 1, 0, 0, args, NULL)); };
return 0; CUDA_RESOURCE_DESC res_desc = {
.resType = CU_RESOURCE_TYPE_PITCH2D,
.res.pitch2D.format = pixel_size == 1 ?
CU_AD_FORMAT_UNSIGNED_INT8 :
CU_AD_FORMAT_UNSIGNED_INT16,
.res.pitch2D.numChannels = channels,
.res.pitch2D.width = src_width,
.res.pitch2D.height = src_height,
.res.pitch2D.pitchInBytes = src_pitch,
.res.pitch2D.devPtr = (CUdeviceptr)src_dptr,
};
ret = CHECK_CU(cu->cuTexObjectCreate(&tex, &res_desc, &tex_desc, NULL));
if (ret < 0)
goto exit;
ret = CHECK_CU(cu->cuLaunchKernel(func,
DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1,
BLOCKX, BLOCKY, 1, 0, s->cu_stream, args, NULL));
exit:
if (tex)
CHECK_CU(cu->cuTexObjectDestroy(tex));
return ret;
} }
static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in) static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in)
...@@ -189,40 +201,40 @@ static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in) ...@@ -189,40 +201,40 @@ static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in)
switch (in_frames_ctx->sw_format) { switch (in_frames_ctx->sw_format) {
case AV_PIX_FMT_NV12: case AV_PIX_FMT_NV12:
thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_uchar, 1,
histogram, in->data[0], in->width, in->height, in->linesize[0], 1); histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
thumbnail_kernel(s, s->cu_func_uchar2, s->cu_tex_uchar2, 2, thumbnail_kernel(ctx, s->cu_func_uchar2, 2,
histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1); histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1);
break; break;
case AV_PIX_FMT_YUV420P: case AV_PIX_FMT_YUV420P:
thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_uchar, 1,
histogram, in->data[0], in->width, in->height, in->linesize[0], 1); histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_uchar, 1,
histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1); histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 1);
thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_uchar, 1,
histogram + 512, in->data[2], in->width / 2, in->height / 2, in->linesize[2], 1); histogram + 512, in->data[2], in->width / 2, in->height / 2, in->linesize[2], 1);
break; break;
case AV_PIX_FMT_YUV444P: case AV_PIX_FMT_YUV444P:
thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_uchar, 1,
histogram, in->data[0], in->width, in->height, in->linesize[0], 1); histogram, in->data[0], in->width, in->height, in->linesize[0], 1);
thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_uchar, 1,
histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 1); histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 1);
thumbnail_kernel(s, s->cu_func_uchar, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_uchar, 1,
histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 1); histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 1);
break; break;
case AV_PIX_FMT_P010LE: case AV_PIX_FMT_P010LE:
case AV_PIX_FMT_P016LE: case AV_PIX_FMT_P016LE:
thumbnail_kernel(s, s->cu_func_ushort, s->cu_tex_ushort, 1, thumbnail_kernel(ctx, s->cu_func_ushort, 1,
histogram, in->data[0], in->width, in->height, in->linesize[0], 2); histogram, in->data[0], in->width, in->height, in->linesize[0], 2);
thumbnail_kernel(s, s->cu_func_ushort2, s->cu_tex_ushort2, 2, thumbnail_kernel(ctx, s->cu_func_ushort2, 2,
histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 2); histogram + 256, in->data[1], in->width / 2, in->height / 2, in->linesize[1], 2);
break; break;
case AV_PIX_FMT_YUV444P16: case AV_PIX_FMT_YUV444P16:
thumbnail_kernel(s, s->cu_func_ushort2, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
histogram, in->data[0], in->width, in->height, in->linesize[0], 2); histogram, in->data[0], in->width, in->height, in->linesize[0], 2);
thumbnail_kernel(s, s->cu_func_ushort2, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 2); histogram + 256, in->data[1], in->width, in->height, in->linesize[1], 2);
thumbnail_kernel(s, s->cu_func_ushort2, s->cu_tex_uchar, 1, thumbnail_kernel(ctx, s->cu_func_ushort2, 1,
histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 2); histogram + 512, in->data[2], in->width, in->height, in->linesize[2], 2);
break; break;
default: default:
...@@ -236,10 +248,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) ...@@ -236,10 +248,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
{ {
AVFilterContext *ctx = inlink->dst; AVFilterContext *ctx = inlink->dst;
ThumbnailCudaContext *s = ctx->priv; ThumbnailCudaContext *s = ctx->priv;
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
AVFilterLink *outlink = ctx->outputs[0]; AVFilterLink *outlink = ctx->outputs[0];
int *hist = s->frames[s->n].histogram; int *hist = s->frames[s->n].histogram;
AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data; AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data;
AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
CUcontext dummy; CUcontext dummy;
CUDA_MEMCPY2D cpy = { 0 }; CUDA_MEMCPY2D cpy = { 0 };
int ret = 0; int ret = 0;
...@@ -247,11 +259,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) ...@@ -247,11 +259,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
// keep a reference of each frame // keep a reference of each frame
s->frames[s->n].buf = frame; s->frames[s->n].buf = frame;
ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx)); ret = CHECK_CU(cu->cuCtxPushCurrent(s->hwctx->cuda_ctx));
if (ret < 0) if (ret < 0)
return ret; return ret;
CHECK_CU(cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int))); CHECK_CU(cu->cuMemsetD8Async(s->data, 0, HIST_SIZE * sizeof(int), s->cu_stream));
thumbnail(ctx, (int*)s->data, frame); thumbnail(ctx, (int*)s->data, frame);
...@@ -264,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) ...@@ -264,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
cpy.WidthInBytes = HIST_SIZE * sizeof(int); cpy.WidthInBytes = HIST_SIZE * sizeof(int);
cpy.Height = 1; cpy.Height = 1;
ret = CHECK_CU(cuMemcpy2D(&cpy)); ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, s->cu_stream));
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -276,7 +288,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) ...@@ -276,7 +288,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
hist[i] = 4 * hist[i]; hist[i] = 4 * hist[i];
} }
CHECK_CU(cuCtxPopCurrent(&dummy)); CHECK_CU(cu->cuCtxPopCurrent(&dummy));
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -292,14 +304,15 @@ static av_cold void uninit(AVFilterContext *ctx) ...@@ -292,14 +304,15 @@ static av_cold void uninit(AVFilterContext *ctx)
{ {
int i; int i;
ThumbnailCudaContext *s = ctx->priv; ThumbnailCudaContext *s = ctx->priv;
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
if (s->data) { if (s->data) {
CHECK_CU(cuMemFree(s->data)); CHECK_CU(cu->cuMemFree(s->data));
s->data = 0; s->data = 0;
} }
if (s->cu_module) { if (s->cu_module) {
CHECK_CU(cuModuleUnload(s->cu_module)); CHECK_CU(cu->cuModuleUnload(s->cu_module));
s->cu_module = NULL; s->cu_module = NULL;
} }
...@@ -342,43 +355,43 @@ static int config_props(AVFilterLink *inlink) ...@@ -342,43 +355,43 @@ static int config_props(AVFilterLink *inlink)
AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx;
CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx;
CudaFunctions *cu = device_hwctx->internal->cuda_dl;
int ret; int ret;
extern char vf_thumbnail_cuda_ptx[]; extern char vf_thumbnail_cuda_ptx[];
ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); s->hwctx = device_hwctx;
s->cu_stream = s->hwctx->stream;
ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx)); ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx));
if (ret < 0) if (ret < 0)
return ret; return ret;
CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar")); ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"));
CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2")); if (ret < 0)
CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort")); return ret;
CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"));
CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"));
CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); if (ret < 0)
CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); return ret;
CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"));
CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"));
CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); if (ret < 0)
CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); return ret;
CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER));
CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"));
CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); if (ret < 0)
CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); return ret;
CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR));
ret = CHECK_CU(cuMemAlloc(&s->data, HIST_SIZE * sizeof(int))); ret = CHECK_CU(cu->cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)));
if (ret < 0) if (ret < 0)
return ret; return ret;
CHECK_CU(cuCtxPopCurrent(&dummy)); CHECK_CU(cu->cuCtxPopCurrent(&dummy));
s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx; s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx;
......
...@@ -22,55 +22,54 @@ ...@@ -22,55 +22,54 @@
extern "C" { extern "C" {
texture<unsigned char, 2> uchar_tex; __global__ void Thumbnail_uchar(cudaTextureObject_t uchar_tex,
texture<uchar2, 2> uchar2_tex; int *histogram, int src_width, int src_height)
texture<unsigned short, 2> ushort_tex;
texture<ushort2, 2> ushort2_tex;
__global__ void Thumbnail_uchar(int *histogram, int src_width, int src_height)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < src_height && x < src_width) if (y < src_height && x < src_width)
{ {
unsigned char pixel = tex2D(uchar_tex, x, y); unsigned char pixel = tex2D<unsigned char>(uchar_tex, x, y);
atomicAdd(&histogram[pixel], 1); atomicAdd(&histogram[pixel], 1);
} }
} }
__global__ void Thumbnail_uchar2(int *histogram, int src_width, int src_height) __global__ void Thumbnail_uchar2(cudaTextureObject_t uchar2_tex,
int *histogram, int src_width, int src_height)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < src_height && x < src_width) if (y < src_height && x < src_width)
{ {
uchar2 pixel = tex2D(uchar2_tex, x, y); uchar2 pixel = tex2D<uchar2>(uchar2_tex, x, y);
atomicAdd(&histogram[pixel.x], 1); atomicAdd(&histogram[pixel.x], 1);
atomicAdd(&histogram[256 + pixel.y], 1); atomicAdd(&histogram[256 + pixel.y], 1);
} }
} }
__global__ void Thumbnail_ushort(int *histogram, int src_width, int src_height) __global__ void Thumbnail_ushort(cudaTextureObject_t ushort_tex,
int *histogram, int src_width, int src_height)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < src_height && x < src_width) if (y < src_height && x < src_width)
{ {
unsigned short pixel = (tex2D(ushort_tex, x, y) + 128) >> 8; unsigned short pixel = (tex2D<unsigned short>(ushort_tex, x, y) + 128) >> 8;
atomicAdd(&histogram[pixel], 1); atomicAdd(&histogram[pixel], 1);
} }
} }
__global__ void Thumbnail_ushort2(int *histogram, int src_width, int src_height) __global__ void Thumbnail_ushort2(cudaTextureObject_t ushort2_tex,
int *histogram, int src_width, int src_height)
{ {
int x = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < src_height && x < src_width) if (y < src_height && x < src_width)
{ {
ushort2 pixel = tex2D(ushort2_tex, x, y); ushort2 pixel = tex2D<ushort2>(ushort2_tex, x, y);
atomicAdd(&histogram[(pixel.x + 128) >> 8], 1); atomicAdd(&histogram[(pixel.x + 128) >> 8], 1);
atomicAdd(&histogram[256 + (pixel.y + 128) >> 8], 1); atomicAdd(&histogram[256 + (pixel.y + 128) >> 8], 1);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment