GHOST  1.1.2
General, Hybrid, and Optimized Sparse Toolkit
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
tsmttsm_plain_kernel_tmpl.h
Go to the documentation of this file.
1 #include <complex>
2 
3 template<typename T, int CFGK, int CFGM, int UNROLL>
5 {
6  GHOST_FUNC_ENTER(GHOST_FUNCTYPE_MATH)
8 
9  int myrank=0;
10  ghost_lidx n = v->traits.nrows;
11  ghost_lidx i,j,k;
12  ghost_lidx ldv, ldw, ldx;
13  T * vval = (T *)v->val, * wval = (T *)w->val, * xval = (T *)x->val;
14  T mybeta = *beta;
15 
16  if (v->context) {
17  GHOST_CALL_GOTO(ghost_rank(&myrank,v->context->mpicomm),err,ret);
18  }
19 
20  ldv = v->stride;
21  ldw = w->stride;
22  ldx = x->stride;
23 
24  // make sure that the initial x only gets added up once
25  if (myrank) {
26  mybeta = 0.;
27  }
28 
29 #pragma simd
30  for (k=0; k<CFGK; k++) {
31  for (j=0; j<CFGM; j++) {
32  xval[k*ldx+j] = mybeta*xval[k*ldx+j];
33  }
34  }
35 #pragma omp parallel private(j,k)
36  {
37  T *x_priv;
38  ghost_malloc((void **)&x_priv,CFGM*CFGK*sizeof(T));
39  memset(x_priv,0,CFGM*CFGK*sizeof(T));
40 
41  if (conjv) {
42 #pragma omp for schedule(runtime)
43  for (i=0; i<n; i++) {
44 #pragma simd
45 #pragma vector aligned
46 #pragma ivdep
47  for (k=0; k<CFGK; k++) {
48 #pragma unroll_and_jam
49  for (j=0; j<CFGM; j++) {
50  x_priv[j*CFGK+k] += (*alpha)*std::conj(vval[i*ldv+j])*wval[i*ldw+k];
51  }
52  }
53 
54  }
55  } else {
56 #pragma omp for schedule(runtime)
57  for (i=0; i<n; i++) {
58 #pragma simd
59 #pragma vector aligned
60 #pragma ivdep
61  for (k=0; k<CFGK; k++) {
62 #pragma unroll_and_jam
63  for (j=0; j<CFGM; j++) {
64  x_priv[j*CFGK+k] += (*alpha)*vval[i*ldv+j]*wval[i*ldw+k];
65  }
66  }
67 
68  }
69  }
70 
71 #pragma omp critical
72 #pragma simd
73 #pragma vector aligned
74 #pragma ivdep
75  for (k=0; k<CFGK; k++) {
76 #pragma unroll_and_jam
77  for (j=0; j<CFGM; j++) {
78  xval[k*ldx+j] += x_priv[j*CFGK+k];
79  }
80  }
81 
82  free(x_priv);
83  }
84 
85  goto out;
86 err:
87 
88 out:
89  GHOST_FUNC_EXIT(GHOST_FUNCTYPE_MATH)
90  return ret;
91 
92 }
ghost_error ghost_rank(int *rank, ghost_mpi_comm comm)
Definition: locality.c:120
__device__ T conj(T x)
Definition: cu_complex.h:226
static ghost_error ghost_tsmttsm__a_plain_cm_rm_tmpl(ghost_densemat *x, ghost_densemat *v, ghost_densemat *w, T *alpha, T *beta, int conjv)
Definition: tsmttsm_plain_kernel_tmpl.h:4
#define GHOST_CALL_GOTO(call, label, __err)
This macro should be used for calling a GHOST function inside a function which itself returns a ghost...
Definition: error.h:140
ghost_error ghost_malloc(void **mem, const size_t size)
Allocate memory.
Definition: util.c:172
No error occured.
Definition: error.h:27
int32_t ghost_lidx
Definition: types.h:503
ghost_error
Error return type.
Definition: error.h:23
ghost_lidx stride
The leading dimensions of the densemat in memory.
Definition: densemat.h:264
char * val
The values of the densemat.
Definition: densemat.h:235
ghost_densemat_traits traits
The densemat's traits.
Definition: densemat.h:231
A dense vector/matrix.
Definition: densemat.h:226