GHOST  1.1.2
General, Hybrid, and Optimized Sparse Toolkit
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
Macros | Functions
sell_spmv_cu_kernel.h File Reference
#include "ghost/config.h"
#include "ghost/types.h"
#include "ghost/instr.h"
#include "ghost/log.h"
#include "ghost/error.h"
#include "ghost/util.h"
#include "ghost/math.h"
#include <cuComplex.h>
#include <cuda_runtime.h>
#include <cuda.h>
#include <complex.h>
#include <complex>
#include "ghost/cu_complex.h"
#include "ghost/cu_sell_kernel.h"
Include dependency graph for sell_spmv_cu_kernel.h:

Go to the source code of this file.

Macros

#define MAX_COLS_PER_BLOCK   16
 
#define MAX_COLS_PER_BLOCK_COLMAJOR   16
 
#define SELL_CUDA_THREADSPERBLOCK   512
 
#define LOCALDOT_ONTHEFLY
 

Functions

template<typename m_t , typename v_t , typename v_t_b , int nrowsinblock, int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>
__global__ void SELL_kernel_CU_rm_tmpl (v_t *const __restrict__ lhs, const ghost_lidx lhs_lda, const v_t *const __restrict__ rhs, const ghost_lidx rhs_lda, const ghost_spmv_flags flags, const ghost_lidx nrows, const ghost_lidx *const __restrict__ rowlen, const ghost_lidx *const __restrict__ mcol, const m_t *const __restrict__ val, const ghost_lidx *const __restrict__ chunkstart, const v_t *const __restrict__ shift, const v_t alpha, const v_t beta, v_t *const __restrict__ localdot, v_t *const __restrict__ z, const ghost_lidx z_lda, const v_t delta, const v_t eta)
 
template<typename m_t , typename v_t , typename v_t_b , int nrowsinblock, int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>
__global__ void SELL_kernel_CU_cm_tmpl (v_t *const __restrict__ lhs, const ghost_lidx lhs_lda, const v_t *const __restrict__ rhs, const ghost_lidx rhs_lda, const ghost_spmv_flags flags, const ghost_lidx nrows, const ghost_lidx *const __restrict__ rowlen, const ghost_lidx *const __restrict__ mcol, const m_t *const __restrict__ val, const ghost_lidx *const __restrict__ chunkstart, const v_t *const __restrict__ shift, const v_t alpha, const v_t beta, v_t *const __restrict__ localdot, v_t *const __restrict__ z, const ghost_lidx z_lda, const v_t delta, const v_t eta)
 
template<typename m_dt , typename v_dt_host , typename v_dt_device , typename v_dt_base , int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>
ghost_error ghost_sellspmv_cu_tmpl (ghost_densemat *lhs, ghost_sparsemat *mat, ghost_densemat *rhs, ghost_spmv_opts opts)
 

Macro Definition Documentation

#define LOCALDOT_ONTHEFLY
#define MAX_COLS_PER_BLOCK   16
#define MAX_COLS_PER_BLOCK_COLMAJOR   16
#define SELL_CUDA_THREADSPERBLOCK   512

Function Documentation

template<typename m_dt , typename v_dt_host , typename v_dt_device , typename v_dt_base , int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>
ghost_error ghost_sellspmv_cu_tmpl ( ghost_densemat lhs,
ghost_sparsemat mat,
ghost_densemat rhs,
ghost_spmv_opts  opts 
)

Here is the call graph for this function:

template<typename m_t , typename v_t , typename v_t_b , int nrowsinblock, int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>
__global__ void SELL_kernel_CU_cm_tmpl ( v_t *const __restrict__  lhs,
const ghost_lidx  lhs_lda,
const v_t *const __restrict__  rhs,
const ghost_lidx  rhs_lda,
const ghost_spmv_flags  flags,
const ghost_lidx  nrows,
const ghost_lidx *const __restrict__  rowlen,
const ghost_lidx *const __restrict__  mcol,
const m_t *const __restrict__  val,
const ghost_lidx *const __restrict__  chunkstart,
const v_t *const __restrict__  shift,
const v_t  alpha,
const v_t  beta,
v_t *const __restrict__  localdot,
v_t *const __restrict__  z,
const ghost_lidx  z_lda,
const v_t  delta,
const v_t  eta 
)

Here is the call graph for this function:

template<typename m_t , typename v_t , typename v_t_b , int nrowsinblock, int C, int ncols, bool do_axpby, bool do_scale, bool do_vshift, bool do_dot_yy, bool do_dot_xy, bool do_dot_xx, bool do_chain_axpby>
__global__ void SELL_kernel_CU_rm_tmpl ( v_t *const __restrict__  lhs,
const ghost_lidx  lhs_lda,
const v_t *const __restrict__  rhs,
const ghost_lidx  rhs_lda,
const ghost_spmv_flags  flags,
const ghost_lidx  nrows,
const ghost_lidx *const __restrict__  rowlen,
const ghost_lidx *const __restrict__  mcol,
const m_t *const __restrict__  val,
const ghost_lidx *const __restrict__  chunkstart,
const v_t *const __restrict__  shift,
const v_t  alpha,
const v_t  beta,
v_t *const __restrict__  localdot,
v_t *const __restrict__  z,
const ghost_lidx  z_lda,
const v_t  delta,
const v_t  eta 
)

Here is the call graph for this function: