#include "ghost/types.h"
#include "ghost/cu_complex.h"
#include <cuda.h>

Include dependency graph for cu_sell_kernel.h:

This graph shows which files directly or indirectly include this file:

Classes
struct	CustomSum

Functions
template<typename v_t >
__device__ v_t	ghost_shfl_down32 (v_t var, unsigned int srcLane)

template<typename v_t >
__device__ v_t	ghost_shfl_down (v_t var, unsigned int srcLane, int width)

template<>
__device__ cuFloatComplex	ghost_shfl_down< cuFloatComplex > (cuFloatComplex var, unsigned int srcLane, int width)

template<>
__device__ cuDoubleComplex	ghost_shfl_down< cuDoubleComplex > (cuDoubleComplex var, unsigned int srcLane, int width)

template<typename v_t >
__inline__ __device__ v_t	ghost_warpReduceSum (v_t val)

template<typename v_t >
__inline__ __device__ v_t	ghost_partialWarpReduceSum (v_t val, int size, int width)

template<typename v_t , int size>
__inline__ __device__ v_t	ghost_partialWarpReduceSumFast (v_t val)

template<>
__inline__ __device__ double3	ghost_warpReduceSum< double3 > (double3 val)

template<typename v_t >
__inline__ __device__ v_t	ghost_partialBlockReduceSum (v_t val, int size)

template<typename v_t >
__inline__ __device__ v_t	ghost_1dPartialBlockReduceSum (v_t val, int nwarps)

template<typename v_t >
__inline__ __device__ v_t	ghost_blockReduceSum (v_t val)

template<>
__inline__ __device__ double3	ghost_blockReduceSum< double3 > (double3 val)

template<typename v_t >
__global__ void	ghost_deviceReduceSum (v_t in, v_t out, ghost_lidx N)

template<typename v_t >
__global__ void	ghost_deviceReduceSumMultiple (v_t in, v_t out, ghost_lidx N, ghost_lidx ncols)

template<typename T >
__device__ __inline__ T	streaming_load (const T *addr)

template<>
__device__ __inline__ double	streaming_load (const double *addr)

template<>
__device__ __inline__ float	streaming_load (const float *addr)

template<>
__device__ __inline__ cuDoubleComplex	streaming_load (const cuDoubleComplex *addr)

template<>
__device__ __inline__ cuFloatComplex	streaming_load (const cuFloatComplex *addr)

template<typename T >
__device__ __inline__ void	streaming_store (T *addr, const T val)

template<>
__device__ __inline__ void	streaming_store (double *addr, const double val)

template<>
__device__ __inline__ void	streaming_store (float *addr, const float val)

template<>
__device__ __inline__ void	streaming_store (cuDoubleComplex *addr, const cuDoubleComplex val)

Variables
__shared__ char	shared []

Function Documentation

template<typename v_t >

__inline__ __device__ v_t ghost_1dPartialBlockReduceSum	(	v_t	val,
		int	nwarps
	)

Here is the call graph for this function:

template<typename v_t >

__inline__ __device__ v_t ghost_blockReduceSum ( v_t val )

Here is the call graph for this function:

template<>

__inline__ __device__ double3 ghost_blockReduceSum< double3 > ( double3 val )

Here is the call graph for this function:

template<typename v_t >

__global__ void ghost_deviceReduceSum	(	v_t *	in,
		v_t *	out,
		ghost_lidx	N
	)

Here is the call graph for this function:

template<typename v_t >

__global__ void ghost_deviceReduceSumMultiple	(	v_t *	in,
		v_t *	out,
		ghost_lidx	N,
		ghost_lidx	ncols
	)

Here is the call graph for this function:

template<typename v_t >

__inline__ __device__ v_t ghost_partialBlockReduceSum	(	v_t	val,
		int	size
	)

Here is the call graph for this function:

template<typename v_t >

__inline__ __device__ v_t ghost_partialWarpReduceSum	(	v_t	val,
		int	size,
		int	width
	)

Here is the call graph for this function:

template<typename v_t , int size>

__inline__ __device__ v_t ghost_partialWarpReduceSumFast ( v_t val )

Here is the call graph for this function:

template<typename v_t >

__device__ v_t ghost_shfl_down	(	v_t	var,
		unsigned int	srcLane,
		int	width
	)

inline

template<typename v_t >

__device__ v_t ghost_shfl_down32	(	v_t	var,
		unsigned int	srcLane
	)

inline

Here is the call graph for this function:

template<>

__device__ cuDoubleComplex ghost_shfl_down< cuDoubleComplex >	(	cuDoubleComplex	var,
		unsigned int	srcLane,
		int	width
	)

inline

Here is the call graph for this function:

template<>

__device__ cuFloatComplex ghost_shfl_down< cuFloatComplex >	(	cuFloatComplex	var,
		unsigned int	srcLane,
		int	width
	)

inline

Here is the call graph for this function:

template<typename v_t >

__inline__ __device__ v_t ghost_warpReduceSum ( v_t val )

Here is the call graph for this function:

template<>

__inline__ __device__ double3 ghost_warpReduceSum< double3 > ( double3 val )

Here is the call graph for this function:

template<typename T >

__device__ __inline__ T streaming_load ( const T * addr )

template<>

__device__ __inline__ double streaming_load ( const double * addr )

template<>

__device__ __inline__ float streaming_load ( const float * addr )

template<>

__device__ __inline__ cuDoubleComplex streaming_load ( const cuDoubleComplex * addr )

template<>

__device__ __inline__ cuFloatComplex streaming_load ( const cuFloatComplex * addr )

template<typename T >

__device__ __inline__ void streaming_store	(	T *	addr,
		const T	val
	)

template<>

__device__ __inline__ void streaming_store	(	double *	addr,
		const double	val
	)

template<>

__device__ __inline__ void streaming_store	(	float *	addr,
		const float	val
	)

template<>

__device__ __inline__ void streaming_store	(	cuDoubleComplex *	addr,
		const cuDoubleComplex	val
	)

Variable Documentation

__shared__ char shared[]

Classes

Functions

Variables

Function Documentation

Variable Documentation