#include "ghost/config.h"
#include "ghost/types.h"
#include "ghost/instr.h"
#include "ghost/log.h"
#include "ghost/error.h"
#include "ghost/util.h"
#include "ghost/math.h"
#include <cuComplex.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include <complex.h>
#include <complex>
#include "ghost/cu_complex.h"
#include "ghost/cu_sell_kernel.h"

Include dependency graph for sell_spmv_cu_kernel.h:

Macros
#define	MAX_COLS_PER_BLOCK 16

#define	MAX_COLS_PER_BLOCK_COLMAJOR 16

#define	SELL_CUDA_THREADSPERBLOCK 512

#define	LOCALDOT_ONTHEFLY

Functions
template<typename m_t , typename v_t , typename v_t_b , int nrowsinblock, int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>
__global__ void	SELL_kernel_CU_rm_tmpl (v_t const __restrict__ lhs, const ghost_lidx lhs_lda, const v_t const __restrict__ rhs, const ghost_lidx rhs_lda, const ghost_spmv_flags flags, const ghost_lidx nrows, const ghost_lidx const __restrict__ rowlen, const ghost_lidx const __restrict__ mcol, const m_t const __restrict__ val, const ghost_lidx const __restrict__ chunkstart, const v_t const __restrict__ shift, const v_t alpha, const v_t beta, v_t const __restrict__ localdot, v_t *const __restrict__ z, const ghost_lidx z_lda, const v_t delta, const v_t eta)

template<typename m_t , typename v_t , typename v_t_b , int nrowsinblock, int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>
__global__ void	SELL_kernel_CU_cm_tmpl (v_t const __restrict__ lhs, const ghost_lidx lhs_lda, const v_t const __restrict__ rhs, const ghost_lidx rhs_lda, const ghost_spmv_flags flags, const ghost_lidx nrows, const ghost_lidx const __restrict__ rowlen, const ghost_lidx const __restrict__ mcol, const m_t const __restrict__ val, const ghost_lidx const __restrict__ chunkstart, const v_t const __restrict__ shift, const v_t alpha, const v_t beta, v_t const __restrict__ localdot, v_t *const __restrict__ z, const ghost_lidx z_lda, const v_t delta, const v_t eta)

template<typename m_dt , typename v_dt_host , typename v_dt_device , typename v_dt_base , int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>
ghost_error	ghost_sellspmv_cu_tmpl (ghost_densemat lhs, ghost_sparsemat mat, ghost_densemat *rhs, ghost_spmv_opts opts)

Macro Definition Documentation

#define LOCALDOT_ONTHEFLY

#define MAX_COLS_PER_BLOCK 16

#define MAX_COLS_PER_BLOCK_COLMAJOR 16

#define SELL_CUDA_THREADSPERBLOCK 512

Function Documentation

template<typename m_dt , typename v_dt_host , typename v_dt_device , typename v_dt_base , int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>

ghost_error ghost_sellspmv_cu_tmpl	(	ghost_densemat *	lhs,
		ghost_sparsemat *	mat,
		ghost_densemat *	rhs,
		ghost_spmv_opts	opts
	)

Here is the call graph for this function:

template<typename m_t , typename v_t , typename v_t_b , int nrowsinblock, int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>

__global__ void SELL_kernel_CU_cm_tmpl	(	v_t *const __restrict__	lhs,
		const ghost_lidx	lhs_lda,
		const v_t *const __restrict__	rhs,
		const ghost_lidx	rhs_lda,
		const ghost_spmv_flags	flags,
		const ghost_lidx	nrows,
		const ghost_lidx *const __restrict__	rowlen,
		const ghost_lidx *const __restrict__	mcol,
		const m_t *const __restrict__	val,
		const ghost_lidx *const __restrict__	chunkstart,
		const v_t *const __restrict__	shift,
		const v_t	alpha,
		const v_t	beta,
		v_t *const __restrict__	localdot,
		v_t *const __restrict__	z,
		const ghost_lidx	z_lda,
		const v_t	delta,
		const v_t	eta
	)

Here is the call graph for this function:

template<typename m_t , typename v_t , typename v_t_b , int nrowsinblock, int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>

__global__ void SELL_kernel_CU_rm_tmpl	(	v_t *const __restrict__	lhs,
		const ghost_lidx	lhs_lda,
		const v_t *const __restrict__	rhs,
		const ghost_lidx	rhs_lda,
		const ghost_spmv_flags	flags,
		const ghost_lidx	nrows,
		const ghost_lidx *const __restrict__	rowlen,
		const ghost_lidx *const __restrict__	mcol,
		const m_t *const __restrict__	val,
		const ghost_lidx *const __restrict__	chunkstart,
		const v_t *const __restrict__	shift,
		const v_t	alpha,
		const v_t	beta,
		v_t *const __restrict__	localdot,
		v_t *const __restrict__	z,
		const ghost_lidx	z_lda,
		const v_t	delta,
		const v_t	eta
	)

Here is the call graph for this function:

Macros

Functions

Macro Definition Documentation

Function Documentation