GHOST  1.1.2
General, Hybrid, and Optimized Sparse Toolkit
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
densemat_iter_macros.h
Go to the documentation of this file.
1 
6 #ifndef GHOST_DENSEMAT_ITER_MACROS_H
7 #define GHOST_DENSEMAT_ITER_MACROS_H
8 
9 #include "ghost/omp.h"
10 
11 #ifndef DENSEMAT_DT
12 #define DENSEMAT_DT char
13 #define DENSEMAT_ELSIZE(vec) vec->elSize
14 #else
15 #define DENSEMAT_ELSIZE(vec) 1
16 #endif
17 
18 #define DENSEMAT_COMPACT(vec) (!(vec->traits.flags & GHOST_DENSEMAT_SCATTERED))
19 #define DENSEMAT_COMPACT_ROWS(vec) (!(vec->traits.flags & GHOST_DENSEMAT_SCATTERED_ROWS)) // subsequent rows are compact
20 #define DENSEMAT_SCATTERED_COLS(vec) ((vec->traits.flags & GHOST_DENSEMAT_SCATTERED_COLS)) // subsequent cols are scattered
21 #define DENSEMAT_SINGLECOL_STRIDE1(vec) (vec->traits.ncols == 1 && ((vec->traits.storage == GHOST_DENSEMAT_COLMAJOR) || ((vec->traits.storage == GHOST_DENSEMAT_ROWMAJOR) && (vec->stride == 1))))
22 
34 #define DENSEMAT_ITER(vec,call)\
35  ghost_lidx row=0,col=0,memrow=0,memcol=0;\
36  DENSEMAT_DT *valptr = NULL, *cuvalptr = NULL;\
37  if (DENSEMAT_COMPACT(vec)) {\
38  if (ghost_omp_in_parallel()) {\
39  if (DENSEMAT_SINGLECOL_STRIDE1(vec)) {\
40  DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol);\
41  valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
42  cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
43  call;\
44  DENSEMAT_ITER_END();\
45  } else {\
46  DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol);\
47  valptr = DENSEMAT_VALPTR(vec,row,col);\
48  cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
49  call;\
50  DENSEMAT_ITER_END();\
51  }\
52  } else {\
53  if (DENSEMAT_SINGLECOL_STRIDE1(vec)) {\
54  _Pragma("omp parallel")\
55  {\
56  DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol)\
57  valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
58  cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
59  call;\
60  DENSEMAT_ITER_END()\
61  }\
62  } else {\
63  _Pragma("omp parallel")\
64  {\
65  DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol)\
66  valptr = DENSEMAT_VALPTR(vec,row,col);\
67  cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
68  call;\
69  DENSEMAT_ITER_END()\
70  }\
71  }\
72  }\
73  } else {\
74  _Pragma("omp single")\
75  {\
76  GHOST_WARNING_LOG("Serialized operation for scattered densemat!");\
77  DENSEMAT_ITER_BEGIN_SCATTERED(vec,row,col,memrow,memcol);\
78  call;\
79  DENSEMAT_ITER_END();\
80  }\
81  }\
82  /* Trick the compiler to not produce warnings about unused variables */\
83  if ((row+col+memrow+memcol < 0) || \
84  (valptr == (DENSEMAT_DT *)0xbeef) || (cuvalptr == (DENSEMAT_DT *)0xbeef)) \
85  {printf("Never happens\n");}
86 
87 
100 #define DENSEMAT_ITER_INIT(vec,call)\
101  ghost_lidx row=0,col=0,memrow=0,memcol=0;\
102  DENSEMAT_DT *valptr = NULL, *cuvalptr = NULL;\
103  if (DENSEMAT_COMPACT(vec)) {\
104  if (ghost_omp_in_parallel()) {\
105  if (DENSEMAT_SINGLECOL_STRIDE1(vec)) {\
106  DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol);\
107  valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
108  cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
109  call;\
110  DENSEMAT_ITER_COMPACT_SINGLECOL_PAD(vec,valptr,row,col,memrow,memcol);\
111  DENSEMAT_ITER_END();\
112  } else {\
113  DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol);\
114  valptr = DENSEMAT_VALPTR(vec,row,col);\
115  cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
116  call;\
117  DENSEMAT_ITER_COMPACT_PAD(vec,valptr,row,col,memrow,memcol);\
118  DENSEMAT_ITER_END();\
119  }\
120  } else {\
121  if (DENSEMAT_SINGLECOL_STRIDE1(vec)) {\
122  _Pragma("omp parallel")\
123  {\
124  DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol)\
125  valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
126  cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
127  call;\
128  DENSEMAT_ITER_COMPACT_SINGLECOL_PAD(vec,valptr,row,col,memrow,memcol);\
129  DENSEMAT_ITER_END()\
130  }\
131  } else {\
132  _Pragma("omp parallel")\
133  {\
134  DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol)\
135  valptr = DENSEMAT_VALPTR(vec,row,col);\
136  cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
137  call;\
138  DENSEMAT_ITER_COMPACT_PAD(vec,valptr,row,col,memrow,memcol);\
139  DENSEMAT_ITER_END()\
140  }\
141  }\
142  }\
143  } else {\
144  _Pragma("omp single")\
145  {\
146  GHOST_WARNING_LOG("Serialized operation for scattered densemat!");\
147  DENSEMAT_ITER_BEGIN_SCATTERED(vec,row,col,memrow,memcol);\
148  call;\
149  DENSEMAT_ITER_END();\
150  }\
151  }\
152  /* Trick the compiler to not produce warnings about unused variables */\
153  if ((row+col+memrow+memcol < 0) || \
154  (valptr == (DENSEMAT_DT *)0xbeef) || (cuvalptr == (DENSEMAT_DT *)0xbeef)) \
155  {printf("Never happens\n");}
156 
160 #define DENSEMAT_ITER2(vec1,vec2,call) DENSEMAT_ITER2_OFFS(vec1,vec2,0,0,call)
161 
177 #define DENSEMAT_ITER2_OFFS(vec1,vec2,vec2roffs,vec2coffs,call)\
178  ghost_lidx row=0,col=0,memrow1=0,memcol1=0,memrow2=0,memcol2=0;\
179  DENSEMAT_DT *valptr1 = NULL, *valptr2 = NULL, *cuvalptr1 = NULL, *cuvalptr2 = NULL;\
180  if (DENSEMAT_COMPACT(vec1) && DENSEMAT_COMPACT(vec2)) {\
181  if (ghost_omp_in_parallel()) {\
182  if (DENSEMAT_SINGLECOL_STRIDE1(vec1) && DENSEMAT_SINGLECOL_STRIDE1(vec2)) {\
183  DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_SINGLECOL(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
184  valptr1 = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec1,row,0);\
185  valptr2 = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec2,row+vec2roffs,vec2coffs);\
186  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,0);\
187  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,vec2coffs);\
188  call;\
189  DENSEMAT_ITER_END();\
190  } else {\
191  DENSEMAT_ITER2_BEGIN_COMPACT_OFFS(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
192  valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
193  valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,col+vec2coffs);\
194  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
195  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
196  call;\
197  DENSEMAT_ITER_END();\
198  }\
199  } else {\
200  if (DENSEMAT_SINGLECOL_STRIDE1(vec1) && DENSEMAT_SINGLECOL_STRIDE1(vec2)) {\
201  _Pragma("omp parallel")\
202  {\
203  DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_SINGLECOL(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
204  valptr1 = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec1,row,0);\
205  valptr2 = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec2,row+vec2roffs,vec2coffs);\
206  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,0);\
207  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,vec2coffs);\
208  call;\
209  DENSEMAT_ITER_END();\
210  }\
211  } else {\
212  _Pragma("omp parallel")\
213  {\
214  DENSEMAT_ITER2_BEGIN_COMPACT_OFFS(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
215  valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
216  valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,col+vec2coffs);\
217  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
218  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
219  call;\
220  DENSEMAT_ITER_END();\
221  }\
222  }\
223  }\
224  } else if (DENSEMAT_SCATTERED_COLS(vec1) && DENSEMAT_COMPACT_ROWS(vec1) && DENSEMAT_COMPACT(vec2)) {\
225  if (ghost_omp_in_parallel()) {\
226  DENSEMAT_ITER2_BEGIN_SCATTEREDCOLS_COMPACTROWS1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
227  valptr1 = DENSEMAT_VALPTR(vec1,row,memcol1);\
228  valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,memcol2+vec2coffs);\
229  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
230  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
231  call;\
232  DENSEMAT_ITER_END();\
233  } else {\
234  _Pragma("omp parallel private(col,memcol1,memcol2)")\
235  {\
236  DENSEMAT_ITER2_BEGIN_SCATTEREDCOLS_COMPACTROWS1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
237  valptr1 = DENSEMAT_VALPTR(vec1,row,memcol1);\
238  valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,memcol2);\
239  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,memcol1);\
240  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,memcol2);\
241  call;\
242  DENSEMAT_ITER_END();\
243  }\
244  }\
245  } else {\
246  if (DENSEMAT_COMPACT(vec1)) {\
247  _Pragma("omp single")\
248  {\
249  GHOST_WARNING_LOG("Serialized operation for scattered densemat! vec1 compact, vec2 scattered");\
250  DENSEMAT_ITER2_BEGIN_SCATTERED2_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
251  call;\
252  DENSEMAT_ITER_END();\
253  }\
254  } else if (DENSEMAT_COMPACT(vec2)) {\
255  _Pragma("omp single")\
256  {\
257  GHOST_WARNING_LOG("Serialized operation for scattered densemat! vec1 scattered, vec2 compact");\
258  DENSEMAT_ITER2_BEGIN_SCATTERED1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
259  call;\
260  DENSEMAT_ITER_END();\
261  }\
262  } else {\
263  _Pragma("omp single")\
264  {\
265  GHOST_WARNING_LOG("Serialized operation for scattered densemat! both scattered");\
266  DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
267  call;\
268  DENSEMAT_ITER_END();\
269  }\
270  }\
271  }\
272  /* Trick the compiler to not produce warnings about unused variables */\
273  if ((row+col+memrow1+memcol1+memrow2+memcol2 < 0) || \
274  (valptr1 == (DENSEMAT_DT *)0xbeef) || (valptr2 == (DENSEMAT_DT *)0xbeef) || \
275  (cuvalptr1 == (DENSEMAT_DT *)0xbeef) || (cuvalptr2 == (DENSEMAT_DT *)0xbeef))\
276  {printf("Never happens\n");}
277 
294 #define DENSEMAT_ITER2_COMPACT_OFFS_TRANSPOSED(vec1,vec2,vec2roffs,vec2coffs,call)\
295  ghost_lidx row=0,col=0,memrow1=0,memcol1=0,memrow2=0,memcol2=0;\
296  DENSEMAT_DT *valptr1 = NULL, *valptr2 = NULL, *cuvalptr1 = NULL, *cuvalptr2 = NULL;\
297  if (ghost_omp_in_parallel()) {\
298  DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_TRANSPOSED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
299  valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
300  valptr2 = DENSEMAT_VALPTR_TRANSPOSED(vec2,row+vec2roffs,col+vec2coffs);\
301  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
302  cuvalptr2 = DENSEMAT_CUVALPTR_TRANSPOSED(vec2,row+vec2roffs,col+vec2coffs);\
303  call;\
304  DENSEMAT_ITER_END();\
305  } else {\
306  _Pragma("omp parallel")\
307  {\
308  DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_TRANSPOSED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
309  valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
310  valptr2 = DENSEMAT_VALPTR_TRANSPOSED(vec2,row+vec2roffs,col+vec2coffs);\
311  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
312  cuvalptr2 = DENSEMAT_CUVALPTR_TRANSPOSED(vec2,row+vec2roffs,col+vec2coffs);\
313  call;\
314  DENSEMAT_ITER_END();\
315  }\
316  }\
317  /* Trick the compiler to not produce warnings about unused variables */\
318  if ((row+col+memrow1+memcol1+memrow2+memcol2 < 0) || \
319  (valptr1 == (DENSEMAT_DT *)0xbeef) || (valptr2 == (DENSEMAT_DT *)0xbeef) || \
320  (cuvalptr1 == (DENSEMAT_DT *)0xbeef) || (cuvalptr2 == (DENSEMAT_DT *)0xbeef)) \
321  {printf("Never happens\n");}
322 
323 
324 #define DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol)\
325  col = 0;\
326  memcol = 0;\
327  _Pragma("omp for schedule(runtime) private(memrow,valptr,cuvalptr)")\
328  for (row = 0; row<DM_NROWSPAD(vec); row++) {\
329  memrow = row;\
330  if( row < DM_NROWS(vec) ) {\
331 
332 #define DENSEMAT_ITER_COMPACT_SINGLECOL_PAD(vec,valptr,row,col,memrow,memcol)\
333  } else {\
334  valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
335  memset(valptr,0,DENSEMAT_ELSIZE(vec)*sizeof(DENSEMAT_DT));\
336 
337 #define DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol)\
338  _Pragma("omp for schedule(runtime) private(col,memrow,memcol,valptr,cuvalptr)")\
339  for (row = 0; row<DM_NROWSPAD(vec); row++) {\
340  memrow = row;\
341  col = 0;\
342  if( row<DM_NROWS(vec) )\
343  for (; col<vec->traits.ncols; col++) {\
344  memcol = col;\
345 
346 #define DENSEMAT_ITER_COMPACT_PAD(vec,valptr,row,col,memrow,memcol)\
347  }\
348  for (; col<vec->traits.ncolspadded; col++) {\
349  valptr = DENSEMAT_VALPTR(vec,row,col);\
350  memset(valptr,0,DENSEMAT_ELSIZE(vec)*sizeof(DENSEMAT_DT));\
351 
352 #define DENSEMAT_ITER_END()\
353  }\
354  }
355 
356 
357 #define DENSEMAT_ITER2_BEGIN_COMPACT(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2)\
358  DENSEMAT_ITER2_BEGIN_COMPACT_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,0,0)
359 
360 #define DENSEMAT_ITER2_BEGIN_COMPACT_OFFS(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
361  _Pragma("omp for schedule(runtime) private(col,memcol1,memcol2,memrow1,memrow2,valptr1,valptr2,cuvalptr1,cuvalptr2)")\
362  for (row=0; row<DM_NROWS(vec1); row++) {\
363  memrow1 = row;\
364  memrow2 = row;\
365  for (col = 0; col<vec1->traits.ncols; col++) {\
366  memcol1 = col;\
367  memcol2 = col;
368 
369 #define DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_SINGLECOL(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
370  memcol1 = 0;\
371  memcol2 = 0;\
372  col = 0;\
373  _Pragma("omp for schedule(runtime) private(memrow1,memrow2,valptr1,valptr2,cuvalptr1,cuvalptr2)")\
374  for (row=0; row<DM_NROWS(vec1); row++) {\
375  memrow1 = row;\
376  memrow2 = row;\
377  {\
378 
379 #define DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_TRANSPOSED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
380  _Pragma("omp for schedule(runtime) private(col,memcol1,memcol2,memrow1,memrow2,valptr1,valptr2,cuvalptr1,cuvalptr2)")\
381  for (row=0; row<DM_NROWS(vec1); row++) {\
382  memrow1 = row;\
383  memrow2 = row;\
384  for (col = 0; col<vec1->traits.ncols; col++) {\
385  memcol1 = col;\
386  memcol2 = col;
387 
388 #define DENSEMAT_ITER2_BEGIN_SCATTEREDCOLS_COMPACTROWS1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
389  memcol1 = -1;\
390  for (col = 0; col<vec1->traits.ncols; col++) {\
391  memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
392  memcol2 = col;\
393  _Pragma("omp for schedule(runtime) private(memrow1,memrow2,valptr1,valptr2,cuvalptr1,cuvalptr2)")\
394  for (row=0; row<DM_NROWS(vec1); row++) {\
395  memrow1 = row;\
396  memrow2 = row;\
397 
398 #ifdef ROWMAJOR
399 #ifdef COLMAJOR
400 #error "Only one of COLMAJOR or ROWMAJOR has to be defined for this header!"
401 #endif
402 
403 #define DENSEMAT_VALPTR(vec,row,col) (((DENSEMAT_DT *)(vec->val))+(((row)*(vec->stride)+(col))*DENSEMAT_ELSIZE(vec)))
404 #define DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col) (((DENSEMAT_DT *)(vec->val))+(((row))*DENSEMAT_ELSIZE(vec)))
405 #define DENSEMAT_VALPTR_TRANSPOSED(vec,row,col) (((DENSEMAT_DT *)(vec->val))+(((col)*vec->stride+(row))*DENSEMAT_ELSIZE(vec)))
406 #define DENSEMAT_CUVALPTR(vec,row,col) (((DENSEMAT_DT *)(vec->cu_val))+(((row)*vec->stride+(col))*DENSEMAT_ELSIZE(vec)))
407 #define DENSEMAT_CUVALPTR_TRANSPOSED(vec,row,col) (((DENSEMAT_DT *)(vec->cu_val))+(((col)*vec->stride+(row))*DENSEMAT_ELSIZE(vec)))
408 
409 
410 #define DENSEMAT_ITER_BEGIN_SCATTERED(vec,row,col,memrow,memcol)\
411  memrow = -1;\
412  for (row=0; row<DM_NROWS(vec); row++) {\
413  memrow = ghost_bitmap_next(vec->rowmask,memrow);\
414  memcol = -1;\
415  for (col = 0; col<vec->traits.ncols; col++) {\
416  memcol = ghost_bitmap_next(vec->colmask,memcol);\
417  valptr = DENSEMAT_VALPTR(vec,memrow,memcol);\
418  cuvalptr = DENSEMAT_CUVALPTR(vec,memrow,memcol);\
419 
420 #define DENSEMAT_ITER2_BEGIN_SCATTERED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2)\
421  DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,0,0)
422 
423 #define DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
424  memrow1 = -1;\
425  memrow2 = -1;\
426  for (row=0; row<vec2roffs; row++) { /* go to offset */\
427  memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
428  }\
429  for (row=0; row<DM_NROWS(vec1); row++) {\
430  memrow1 = ghost_bitmap_next(vec1->rowmask,memrow1);\
431  memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
432  memcol1 = -1;\
433  memcol2 = -1;\
434  for (col=0; col<vec2coffs; col++) { /* go to offset */\
435  memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
436  }\
437  for (col=0; col<vec1->traits.ncols; col++) {\
438  memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
439  memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
440  valptr1 = DENSEMAT_VALPTR(vec1,memrow1,memcol1);\
441  valptr2 = DENSEMAT_VALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
442  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,memrow1,memcol1);\
443  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
444 
445 #define DENSEMAT_ITER2_BEGIN_SCATTERED1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
446  memrow1 = -1;\
447  for (row=0; row<DM_NROWS(vec1); row++) {\
448  memrow1 = ghost_bitmap_next(vec1->rowmask,memrow1);\
449  memrow2 = row;\
450  memcol1 = -1;\
451  for (col=0; col<vec1->traits.ncols; col++) {\
452  memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
453  memcol2 = col;\
454  valptr1 = DENSEMAT_VALPTR(vec1,memrow1,memcol1);\
455  valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,col+vec2coffs);\
456  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,memrow1,memcol1);\
457  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
458 
459 #define DENSEMAT_ITER2_BEGIN_SCATTERED2_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
460  memrow2 = -1;\
461  for (row=0; row<vec2roffs; row++) { /* go to offset */\
462  memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
463  }\
464  for (row=0; row<DM_NROWS(vec1); row++) {\
465  memrow1 = row;\
466  memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
467  memcol2 = -1;\
468  for (col=0; col<vec2coffs; col++) { /* go to offset */\
469  memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
470  }\
471  for (col=0; col<vec1->traits.ncols; col++) {\
472  memcol1 = col;\
473  memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
474  valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
475  valptr2 = DENSEMAT_VALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
476  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
477  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
478 
479 #elif defined(COLMAJOR)
480 
481 #define DENSEMAT_VALPTR(vec,row,col) &((DENSEMAT_DT *)(vec->val))[((col)*vec->stride+(row))*DENSEMAT_ELSIZE(vec)]
482 #define DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col) &((DENSEMAT_DT *)(vec->val))[((row))*DENSEMAT_ELSIZE(vec)]
483 #define DENSEMAT_VALPTR_TRANSPOSED(vec,row,col) &((DENSEMAT_DT *)(vec->val))[((row)*vec->stride+(col))*DENSEMAT_ELSIZE(vec)]
484 #define DENSEMAT_CUVALPTR(vec,row,col) &((DENSEMAT_DT *)(vec->cu_val))[((col)*vec->stride+(row))*DENSEMAT_ELSIZE(vec)]
485 #define DENSEMAT_CUVALPTR_TRANSPOSED(vec,row,col) &((DENSEMAT_DT *)(vec->cu_val))[((row)*vec->stride+(col))*DENSEMAT_ELSIZE(vec)]
486 
487 #define DENSEMAT_ITER_BEGIN_SCATTERED(vec,row,col,memrow,memcol)\
488  memrow = -1;\
489  for (row=0; row<DM_NROWS(vec); row++) {\
490  memrow = ghost_bitmap_next(vec->rowmask,memrow);\
491  memcol = -1;\
492  for (col = 0; col<vec->traits.ncols; col++) {\
493  memcol = ghost_bitmap_next(vec->colmask,memcol);\
494  valptr = DENSEMAT_VALPTR(vec,memrow,memcol);\
495  cuvalptr = DENSEMAT_CUVALPTR(vec,memrow,memcol);\
496 
497 #define DENSEMAT_ITER2_BEGIN_SCATTERED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2)\
498  DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,0,0)
499 
500 #define DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
501  memrow1 = -1;\
502  memrow2 = -1;\
503  for (row=0; row<vec2roffs; row++) { /* go to offset */\
504  memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
505  }\
506  for (row=0; row<DM_NROWS(vec1); row++) {\
507  memrow1 = ghost_bitmap_next(vec1->rowmask,memrow1);\
508  memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
509  memcol1 = -1;\
510  memcol2 = -1;\
511  for (col=0; col<vec2coffs; col++) { /* go to offset */\
512  memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
513  }\
514  for (col=0; col<vec1->traits.ncols; col++) {\
515  memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
516  memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
517  valptr1 = DENSEMAT_VALPTR(vec1,memrow1,memcol1);\
518  valptr2 = DENSEMAT_VALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
519  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,memrow1,memcol1);\
520  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
521 
522 #define DENSEMAT_ITER2_BEGIN_SCATTERED1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
523  memrow1 = -1;\
524  for (row=0; row<DM_NROWS(vec1); row++) {\
525  memrow1 = ghost_bitmap_next(vec1->rowmask,memrow1);\
526  memrow2 = row;\
527  memcol1 = -1;\
528  for (col=0; col<vec1->traits.ncols; col++) {\
529  memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
530  memcol2 = col;\
531  valptr1 = DENSEMAT_VALPTR(vec1,memrow1,memcol1);\
532  valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,col+vec2coffs);\
533  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,memrow1,memcol1);\
534  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
535 
536 #define DENSEMAT_ITER2_BEGIN_SCATTERED2_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
537  memrow2 = -1;\
538  for (row=0; row<vec2roffs; row++) { /* go to offset */\
539  memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
540  }\
541  for (row=0; row<DM_NROWS(vec1); row++) {\
542  memrow1 = row;\
543  memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
544  memcol2 = -1;\
545  for (col=0; col<vec2coffs; col++) { /* go to offset */\
546  memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
547  }\
548  for (col=0; col<vec1->traits.ncols; col++) {\
549  memcol1 = col;\
550  memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
551  valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
552  valptr2 = DENSEMAT_VALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
553  cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
554  cuvalptr2 = DENSEMAT_CUVALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
555 
556 #else
557 #error "Either COLMAJOR or ROWMAJOR has to be defined for this header!"
558 
559 #endif
560 
561 #endif
Function wrappers for OpenMP functions. If OpenMP ist disabled, the function are still defined but st...