3 template<
typename T,
int CFGK,
int CFGM,
int UNROLL>
6 GHOST_FUNC_ENTER(GHOST_FUNCTYPE_MATH)
13 T * vval = (T *)v->
val, * wval = (T *)w->
val, * xval = (T *)x->
val;
30 for (k=0; k<CFGK; k++) {
31 for (j=0; j<CFGM; j++) {
32 xval[k*ldx+j] = mybeta*xval[k*ldx+j];
35 #pragma omp parallel private(j,k)
39 memset(x_priv,0,CFGM*CFGK*
sizeof(T));
42 #pragma omp for schedule(runtime)
45 #pragma vector aligned
47 for (k=0; k<CFGK; k++) {
48 #pragma unroll_and_jam
49 for (j=0; j<CFGM; j++) {
50 x_priv[j*CFGK+k] += (*alpha)*
std::conj(vval[i*ldv+j])*wval[i*ldw+k];
56 #pragma omp for schedule(runtime)
59 #pragma vector aligned
61 for (k=0; k<CFGK; k++) {
62 #pragma unroll_and_jam
63 for (j=0; j<CFGM; j++) {
64 x_priv[j*CFGK+k] += (*alpha)*vval[i*ldv+j]*wval[i*ldw+k];
73 #pragma vector aligned
75 for (k=0; k<CFGK; k++) {
76 #pragma unroll_and_jam
77 for (j=0; j<CFGM; j++) {
78 xval[k*ldx+j] += x_priv[j*CFGK+k];
89 GHOST_FUNC_EXIT(GHOST_FUNCTYPE_MATH)
ghost_error ghost_rank(int *rank, ghost_mpi_comm comm)
Definition: locality.c:120
__device__ T conj(T x)
Definition: cu_complex.h:226
static ghost_error ghost_tsmttsm__a_plain_cm_rm_tmpl(ghost_densemat *x, ghost_densemat *v, ghost_densemat *w, T *alpha, T *beta, int conjv)
Definition: tsmttsm_plain_kernel_tmpl.h:4
#define GHOST_CALL_GOTO(call, label, __err)
This macro should be used for calling a GHOST function inside a function which itself returns a ghost...
Definition: error.h:140
ghost_error ghost_malloc(void **mem, const size_t size)
Allocate memory.
Definition: util.c:172
No error occured.
Definition: error.h:27
int32_t ghost_lidx
Definition: types.h:503
ghost_error
Error return type.
Definition: error.h:23
ghost_lidx stride
The leading dimensions of the densemat in memory.
Definition: densemat.h:264
char * val
The values of the densemat.
Definition: densemat.h:235
ghost_densemat_traits traits
The densemat's traits.
Definition: densemat.h:231
A dense vector/matrix.
Definition: densemat.h:226