GHOST  1.1.2
General, Hybrid, and Optimized Sparse Toolkit
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
Classes | Functions | Variables
cu_sell_kernel.h File Reference
#include "ghost/types.h"
#include "ghost/cu_complex.h"
#include <cuda.h>
Include dependency graph for cu_sell_kernel.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  CustomSum
 

Functions

template<typename v_t >
__device__ v_t ghost_shfl_down32 (v_t var, unsigned int srcLane)
 
template<typename v_t >
__device__ v_t ghost_shfl_down (v_t var, unsigned int srcLane, int width)
 
template<>
__device__ cuFloatComplex ghost_shfl_down< cuFloatComplex > (cuFloatComplex var, unsigned int srcLane, int width)
 
template<>
__device__ cuDoubleComplex ghost_shfl_down< cuDoubleComplex > (cuDoubleComplex var, unsigned int srcLane, int width)
 
template<typename v_t >
__inline__ __device__ v_t ghost_warpReduceSum (v_t val)
 
template<typename v_t >
__inline__ __device__ v_t ghost_partialWarpReduceSum (v_t val, int size, int width)
 
template<typename v_t , int size>
__inline__ __device__ v_t ghost_partialWarpReduceSumFast (v_t val)
 
template<>
__inline__ __device__ double3 ghost_warpReduceSum< double3 > (double3 val)
 
template<typename v_t >
__inline__ __device__ v_t ghost_partialBlockReduceSum (v_t val, int size)
 
template<typename v_t >
__inline__ __device__ v_t ghost_1dPartialBlockReduceSum (v_t val, int nwarps)
 
template<typename v_t >
__inline__ __device__ v_t ghost_blockReduceSum (v_t val)
 
template<>
__inline__ __device__ double3 ghost_blockReduceSum< double3 > (double3 val)
 
template<typename v_t >
__global__ void ghost_deviceReduceSum (v_t *in, v_t *out, ghost_lidx N)
 
template<typename v_t >
__global__ void ghost_deviceReduceSumMultiple (v_t *in, v_t *out, ghost_lidx N, ghost_lidx ncols)
 
template<typename T >
__device__ __inline__ T streaming_load (const T *addr)
 
template<>
__device__ __inline__ double streaming_load (const double *addr)
 
template<>
__device__ __inline__ float streaming_load (const float *addr)
 
template<>
__device__ __inline__
cuDoubleComplex 
streaming_load (const cuDoubleComplex *addr)
 
template<>
__device__ __inline__
cuFloatComplex 
streaming_load (const cuFloatComplex *addr)
 
template<typename T >
__device__ __inline__ void streaming_store (T *addr, const T val)
 
template<>
__device__ __inline__ void streaming_store (double *addr, const double val)
 
template<>
__device__ __inline__ void streaming_store (float *addr, const float val)
 
template<>
__device__ __inline__ void streaming_store (cuDoubleComplex *addr, const cuDoubleComplex val)
 

Variables

__shared__ char shared []
 

Function Documentation

template<typename v_t >
__inline__ __device__ v_t ghost_1dPartialBlockReduceSum ( v_t  val,
int  nwarps 
)

Here is the call graph for this function:

template<typename v_t >
__inline__ __device__ v_t ghost_blockReduceSum ( v_t  val)

Here is the call graph for this function:

template<>
__inline__ __device__ double3 ghost_blockReduceSum< double3 > ( double3  val)

Here is the call graph for this function:

template<typename v_t >
__global__ void ghost_deviceReduceSum ( v_t *  in,
v_t *  out,
ghost_lidx  N 
)

Here is the call graph for this function:

template<typename v_t >
__global__ void ghost_deviceReduceSumMultiple ( v_t *  in,
v_t *  out,
ghost_lidx  N,
ghost_lidx  ncols 
)

Here is the call graph for this function:

template<typename v_t >
__inline__ __device__ v_t ghost_partialBlockReduceSum ( v_t  val,
int  size 
)

Here is the call graph for this function:

template<typename v_t >
__inline__ __device__ v_t ghost_partialWarpReduceSum ( v_t  val,
int  size,
int  width 
)

Here is the call graph for this function:

template<typename v_t , int size>
__inline__ __device__ v_t ghost_partialWarpReduceSumFast ( v_t  val)

Here is the call graph for this function:

template<typename v_t >
__device__ v_t ghost_shfl_down ( v_t  var,
unsigned int  srcLane,
int  width 
)
inline
template<typename v_t >
__device__ v_t ghost_shfl_down32 ( v_t  var,
unsigned int  srcLane 
)
inline

Here is the call graph for this function:

template<>
__device__ cuDoubleComplex ghost_shfl_down< cuDoubleComplex > ( cuDoubleComplex  var,
unsigned int  srcLane,
int  width 
)
inline

Here is the call graph for this function:

template<>
__device__ cuFloatComplex ghost_shfl_down< cuFloatComplex > ( cuFloatComplex  var,
unsigned int  srcLane,
int  width 
)
inline

Here is the call graph for this function:

template<typename v_t >
__inline__ __device__ v_t ghost_warpReduceSum ( v_t  val)

Here is the call graph for this function:

template<>
__inline__ __device__ double3 ghost_warpReduceSum< double3 > ( double3  val)

Here is the call graph for this function:

template<typename T >
__device__ __inline__ T streaming_load ( const T *  addr)
template<>
__device__ __inline__ double streaming_load ( const double *  addr)
template<>
__device__ __inline__ float streaming_load ( const float *  addr)
template<>
__device__ __inline__ cuDoubleComplex streaming_load ( const cuDoubleComplex *  addr)
template<>
__device__ __inline__ cuFloatComplex streaming_load ( const cuFloatComplex *  addr)
template<typename T >
__device__ __inline__ void streaming_store ( T *  addr,
const T  val 
)
template<>
__device__ __inline__ void streaming_store ( double *  addr,
const double  val 
)
template<>
__device__ __inline__ void streaming_store ( float *  addr,
const float  val 
)
template<>
__device__ __inline__ void streaming_store ( cuDoubleComplex *  addr,
const cuDoubleComplex  val 
)

Variable Documentation

__shared__ char shared[]