|
template<typename v_t > |
__device__ v_t | ghost_shfl_down32 (v_t var, unsigned int srcLane) |
|
template<typename v_t > |
__device__ v_t | ghost_shfl_down (v_t var, unsigned int srcLane, int width) |
|
template<> |
__device__ cuFloatComplex | ghost_shfl_down< cuFloatComplex > (cuFloatComplex var, unsigned int srcLane, int width) |
|
template<> |
__device__ cuDoubleComplex | ghost_shfl_down< cuDoubleComplex > (cuDoubleComplex var, unsigned int srcLane, int width) |
|
template<typename v_t > |
__inline__ __device__ v_t | ghost_warpReduceSum (v_t val) |
|
template<typename v_t > |
__inline__ __device__ v_t | ghost_partialWarpReduceSum (v_t val, int size, int width) |
|
template<typename v_t , int size> |
__inline__ __device__ v_t | ghost_partialWarpReduceSumFast (v_t val) |
|
template<> |
__inline__ __device__ double3 | ghost_warpReduceSum< double3 > (double3 val) |
|
template<typename v_t > |
__inline__ __device__ v_t | ghost_partialBlockReduceSum (v_t val, int size) |
|
template<typename v_t > |
__inline__ __device__ v_t | ghost_1dPartialBlockReduceSum (v_t val, int nwarps) |
|
template<typename v_t > |
__inline__ __device__ v_t | ghost_blockReduceSum (v_t val) |
|
template<> |
__inline__ __device__ double3 | ghost_blockReduceSum< double3 > (double3 val) |
|
template<typename v_t > |
__global__ void | ghost_deviceReduceSum (v_t *in, v_t *out, ghost_lidx N) |
|
template<typename v_t > |
__global__ void | ghost_deviceReduceSumMultiple (v_t *in, v_t *out, ghost_lidx N, ghost_lidx ncols) |
|
template<typename T > |
__device__ __inline__ T | streaming_load (const T *addr) |
|
template<> |
__device__ __inline__ double | streaming_load (const double *addr) |
|
template<> |
__device__ __inline__ float | streaming_load (const float *addr) |
|
template<> |
__device__ __inline__
cuDoubleComplex | streaming_load (const cuDoubleComplex *addr) |
|
template<> |
__device__ __inline__
cuFloatComplex | streaming_load (const cuFloatComplex *addr) |
|
template<typename T > |
__device__ __inline__ void | streaming_store (T *addr, const T val) |
|
template<> |
__device__ __inline__ void | streaming_store (double *addr, const double val) |
|
template<> |
__device__ __inline__ void | streaming_store (float *addr, const float val) |
|
template<> |
__device__ __inline__ void | streaming_store (cuDoubleComplex *addr, const cuDoubleComplex val) |
|