GHOST
1.1.2
General, Hybrid, and Optimized Sparse Toolkit
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
include
ghost
densemat_iter_macros.h
Go to the documentation of this file.
1
6
#ifndef GHOST_DENSEMAT_ITER_MACROS_H
7
#define GHOST_DENSEMAT_ITER_MACROS_H
8
9
#include "
ghost/omp.h
"
10
11
#ifndef DENSEMAT_DT
12
#define DENSEMAT_DT char
13
#define DENSEMAT_ELSIZE(vec) vec->elSize
14
#else
15
#define DENSEMAT_ELSIZE(vec) 1
16
#endif
17
18
#define DENSEMAT_COMPACT(vec) (!(vec->traits.flags & GHOST_DENSEMAT_SCATTERED))
19
#define DENSEMAT_COMPACT_ROWS(vec) (!(vec->traits.flags & GHOST_DENSEMAT_SCATTERED_ROWS)) // subsequent rows are compact
20
#define DENSEMAT_SCATTERED_COLS(vec) ((vec->traits.flags & GHOST_DENSEMAT_SCATTERED_COLS)) // subsequent cols are scattered
21
#define DENSEMAT_SINGLECOL_STRIDE1(vec) (vec->traits.ncols == 1 && ((vec->traits.storage == GHOST_DENSEMAT_COLMAJOR) || ((vec->traits.storage == GHOST_DENSEMAT_ROWMAJOR) && (vec->stride == 1))))
22
34
#define DENSEMAT_ITER(vec,call)\
35
ghost_lidx row=0,col=0,memrow=0,memcol=0;\
36
DENSEMAT_DT *valptr = NULL, *cuvalptr = NULL;\
37
if (DENSEMAT_COMPACT(vec)) {\
38
if (ghost_omp_in_parallel()) {\
39
if (DENSEMAT_SINGLECOL_STRIDE1(vec)) {\
40
DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol);\
41
valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
42
cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
43
call;\
44
DENSEMAT_ITER_END();\
45
} else {\
46
DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol);\
47
valptr = DENSEMAT_VALPTR(vec,row,col);\
48
cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
49
call;\
50
DENSEMAT_ITER_END();\
51
}\
52
} else {\
53
if (DENSEMAT_SINGLECOL_STRIDE1(vec)) {\
54
_Pragma("omp parallel")\
55
{\
56
DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol)\
57
valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
58
cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
59
call;\
60
DENSEMAT_ITER_END()\
61
}\
62
} else {\
63
_Pragma("omp parallel")\
64
{\
65
DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol)\
66
valptr = DENSEMAT_VALPTR(vec,row,col);\
67
cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
68
call;\
69
DENSEMAT_ITER_END()\
70
}\
71
}\
72
}\
73
} else {\
74
_Pragma("omp single")\
75
{\
76
GHOST_WARNING_LOG("Serialized operation for scattered densemat!");\
77
DENSEMAT_ITER_BEGIN_SCATTERED(vec,row,col,memrow,memcol);\
78
call;\
79
DENSEMAT_ITER_END();\
80
}\
81
}\
82
/* Trick the compiler to not produce warnings about unused variables */
\
83
if ((row+col+memrow+memcol < 0) || \
84
(valptr == (DENSEMAT_DT *)0xbeef) || (cuvalptr == (DENSEMAT_DT *)0xbeef)) \
85
{printf("Never happens\n");}
86
87
100
#define DENSEMAT_ITER_INIT(vec,call)\
101
ghost_lidx row=0,col=0,memrow=0,memcol=0;\
102
DENSEMAT_DT *valptr = NULL, *cuvalptr = NULL;\
103
if (DENSEMAT_COMPACT(vec)) {\
104
if (ghost_omp_in_parallel()) {\
105
if (DENSEMAT_SINGLECOL_STRIDE1(vec)) {\
106
DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol);\
107
valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
108
cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
109
call;\
110
DENSEMAT_ITER_COMPACT_SINGLECOL_PAD(vec,valptr,row,col,memrow,memcol);\
111
DENSEMAT_ITER_END();\
112
} else {\
113
DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol);\
114
valptr = DENSEMAT_VALPTR(vec,row,col);\
115
cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
116
call;\
117
DENSEMAT_ITER_COMPACT_PAD(vec,valptr,row,col,memrow,memcol);\
118
DENSEMAT_ITER_END();\
119
}\
120
} else {\
121
if (DENSEMAT_SINGLECOL_STRIDE1(vec)) {\
122
_Pragma("omp parallel")\
123
{\
124
DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol)\
125
valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
126
cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
127
call;\
128
DENSEMAT_ITER_COMPACT_SINGLECOL_PAD(vec,valptr,row,col,memrow,memcol);\
129
DENSEMAT_ITER_END()\
130
}\
131
} else {\
132
_Pragma("omp parallel")\
133
{\
134
DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol)\
135
valptr = DENSEMAT_VALPTR(vec,row,col);\
136
cuvalptr = DENSEMAT_CUVALPTR(vec,row,col);\
137
call;\
138
DENSEMAT_ITER_COMPACT_PAD(vec,valptr,row,col,memrow,memcol);\
139
DENSEMAT_ITER_END()\
140
}\
141
}\
142
}\
143
} else {\
144
_Pragma("omp single")\
145
{\
146
GHOST_WARNING_LOG("Serialized operation for scattered densemat!");\
147
DENSEMAT_ITER_BEGIN_SCATTERED(vec,row,col,memrow,memcol);\
148
call;\
149
DENSEMAT_ITER_END();\
150
}\
151
}\
152
/* Trick the compiler to not produce warnings about unused variables */
\
153
if ((row+col+memrow+memcol < 0) || \
154
(valptr == (DENSEMAT_DT *)0xbeef) || (cuvalptr == (DENSEMAT_DT *)0xbeef)) \
155
{printf("Never happens\n");}
156
160
#define DENSEMAT_ITER2(vec1,vec2,call) DENSEMAT_ITER2_OFFS(vec1,vec2,0,0,call)
161
177
#define DENSEMAT_ITER2_OFFS(vec1,vec2,vec2roffs,vec2coffs,call)\
178
ghost_lidx row=0,col=0,memrow1=0,memcol1=0,memrow2=0,memcol2=0;\
179
DENSEMAT_DT *valptr1 = NULL, *valptr2 = NULL, *cuvalptr1 = NULL, *cuvalptr2 = NULL;\
180
if (DENSEMAT_COMPACT(vec1) && DENSEMAT_COMPACT(vec2)) {\
181
if (ghost_omp_in_parallel()) {\
182
if (DENSEMAT_SINGLECOL_STRIDE1(vec1) && DENSEMAT_SINGLECOL_STRIDE1(vec2)) {\
183
DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_SINGLECOL(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
184
valptr1 = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec1,row,0);\
185
valptr2 = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec2,row+vec2roffs,vec2coffs);\
186
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,0);\
187
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,vec2coffs);\
188
call;\
189
DENSEMAT_ITER_END();\
190
} else {\
191
DENSEMAT_ITER2_BEGIN_COMPACT_OFFS(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
192
valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
193
valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,col+vec2coffs);\
194
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
195
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
196
call;\
197
DENSEMAT_ITER_END();\
198
}\
199
} else {\
200
if (DENSEMAT_SINGLECOL_STRIDE1(vec1) && DENSEMAT_SINGLECOL_STRIDE1(vec2)) {\
201
_Pragma("omp parallel")\
202
{\
203
DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_SINGLECOL(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
204
valptr1 = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec1,row,0);\
205
valptr2 = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec2,row+vec2roffs,vec2coffs);\
206
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,0);\
207
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,vec2coffs);\
208
call;\
209
DENSEMAT_ITER_END();\
210
}\
211
} else {\
212
_Pragma("omp parallel")\
213
{\
214
DENSEMAT_ITER2_BEGIN_COMPACT_OFFS(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
215
valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
216
valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,col+vec2coffs);\
217
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
218
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
219
call;\
220
DENSEMAT_ITER_END();\
221
}\
222
}\
223
}\
224
} else if (DENSEMAT_SCATTERED_COLS(vec1) && DENSEMAT_COMPACT_ROWS(vec1) && DENSEMAT_COMPACT(vec2)) {\
225
if (ghost_omp_in_parallel()) {\
226
DENSEMAT_ITER2_BEGIN_SCATTEREDCOLS_COMPACTROWS1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
227
valptr1 = DENSEMAT_VALPTR(vec1,row,memcol1);\
228
valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,memcol2+vec2coffs);\
229
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
230
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
231
call;\
232
DENSEMAT_ITER_END();\
233
} else {\
234
_Pragma("omp parallel private(col,memcol1,memcol2)")\
235
{\
236
DENSEMAT_ITER2_BEGIN_SCATTEREDCOLS_COMPACTROWS1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
237
valptr1 = DENSEMAT_VALPTR(vec1,row,memcol1);\
238
valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,memcol2);\
239
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,memcol1);\
240
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,memcol2);\
241
call;\
242
DENSEMAT_ITER_END();\
243
}\
244
}\
245
} else {\
246
if (DENSEMAT_COMPACT(vec1)) {\
247
_Pragma("omp single")\
248
{\
249
GHOST_WARNING_LOG("Serialized operation for scattered densemat! vec1 compact, vec2 scattered");\
250
DENSEMAT_ITER2_BEGIN_SCATTERED2_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
251
call;\
252
DENSEMAT_ITER_END();\
253
}\
254
} else if (DENSEMAT_COMPACT(vec2)) {\
255
_Pragma("omp single")\
256
{\
257
GHOST_WARNING_LOG("Serialized operation for scattered densemat! vec1 scattered, vec2 compact");\
258
DENSEMAT_ITER2_BEGIN_SCATTERED1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
259
call;\
260
DENSEMAT_ITER_END();\
261
}\
262
} else {\
263
_Pragma("omp single")\
264
{\
265
GHOST_WARNING_LOG("Serialized operation for scattered densemat! both scattered");\
266
DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
267
call;\
268
DENSEMAT_ITER_END();\
269
}\
270
}\
271
}\
272
/* Trick the compiler to not produce warnings about unused variables */
\
273
if ((row+col+memrow1+memcol1+memrow2+memcol2 < 0) || \
274
(valptr1 == (DENSEMAT_DT *)0xbeef) || (valptr2 == (DENSEMAT_DT *)0xbeef) || \
275
(cuvalptr1 == (DENSEMAT_DT *)0xbeef) || (cuvalptr2 == (DENSEMAT_DT *)0xbeef))\
276
{printf("Never happens\n");}
277
294
#define DENSEMAT_ITER2_COMPACT_OFFS_TRANSPOSED(vec1,vec2,vec2roffs,vec2coffs,call)\
295
ghost_lidx row=0,col=0,memrow1=0,memcol1=0,memrow2=0,memcol2=0;\
296
DENSEMAT_DT *valptr1 = NULL, *valptr2 = NULL, *cuvalptr1 = NULL, *cuvalptr2 = NULL;\
297
if (ghost_omp_in_parallel()) {\
298
DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_TRANSPOSED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
299
valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
300
valptr2 = DENSEMAT_VALPTR_TRANSPOSED(vec2,row+vec2roffs,col+vec2coffs);\
301
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
302
cuvalptr2 = DENSEMAT_CUVALPTR_TRANSPOSED(vec2,row+vec2roffs,col+vec2coffs);\
303
call;\
304
DENSEMAT_ITER_END();\
305
} else {\
306
_Pragma("omp parallel")\
307
{\
308
DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_TRANSPOSED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs);\
309
valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
310
valptr2 = DENSEMAT_VALPTR_TRANSPOSED(vec2,row+vec2roffs,col+vec2coffs);\
311
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
312
cuvalptr2 = DENSEMAT_CUVALPTR_TRANSPOSED(vec2,row+vec2roffs,col+vec2coffs);\
313
call;\
314
DENSEMAT_ITER_END();\
315
}\
316
}\
317
/* Trick the compiler to not produce warnings about unused variables */
\
318
if ((row+col+memrow1+memcol1+memrow2+memcol2 < 0) || \
319
(valptr1 == (DENSEMAT_DT *)0xbeef) || (valptr2 == (DENSEMAT_DT *)0xbeef) || \
320
(cuvalptr1 == (DENSEMAT_DT *)0xbeef) || (cuvalptr2 == (DENSEMAT_DT *)0xbeef)) \
321
{printf("Never happens\n");}
322
323
324
#define DENSEMAT_ITER_BEGIN_COMPACT_SINGLECOL(vec,valptr,row,col,memrow,memcol)\
325
col = 0;\
326
memcol = 0;\
327
_Pragma("omp for schedule(runtime) private(memrow,valptr,cuvalptr)")\
328
for (row = 0; row<DM_NROWSPAD(vec); row++) {\
329
memrow = row;\
330
if( row < DM_NROWS(vec) ) {\
331
332
#define DENSEMAT_ITER_COMPACT_SINGLECOL_PAD(vec,valptr,row,col,memrow,memcol)\
333
} else {\
334
valptr = DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col);\
335
memset(valptr,0,DENSEMAT_ELSIZE(vec)*sizeof(DENSEMAT_DT));\
336
337
#define DENSEMAT_ITER_BEGIN_COMPACT(vec,valptr,row,col,memrow,memcol)\
338
_Pragma("omp for schedule(runtime) private(col,memrow,memcol,valptr,cuvalptr)")\
339
for (row = 0; row<DM_NROWSPAD(vec); row++) {\
340
memrow = row;\
341
col = 0;\
342
if( row<DM_NROWS(vec) )\
343
for (; col<vec->traits.ncols; col++) {\
344
memcol = col;\
345
346
#define DENSEMAT_ITER_COMPACT_PAD(vec,valptr,row,col,memrow,memcol)\
347
}\
348
for (; col<vec->traits.ncolspadded; col++) {\
349
valptr = DENSEMAT_VALPTR(vec,row,col);\
350
memset(valptr,0,DENSEMAT_ELSIZE(vec)*sizeof(DENSEMAT_DT));\
351
352
#define DENSEMAT_ITER_END()\
353
}\
354
}
355
356
357
#define DENSEMAT_ITER2_BEGIN_COMPACT(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2)\
358
DENSEMAT_ITER2_BEGIN_COMPACT_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,0,0)
359
360
#define DENSEMAT_ITER2_BEGIN_COMPACT_OFFS(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
361
_Pragma("omp for schedule(runtime) private(col,memcol1,memcol2,memrow1,memrow2,valptr1,valptr2,cuvalptr1,cuvalptr2)")\
362
for (row=0; row<DM_NROWS(vec1); row++) {\
363
memrow1 = row;\
364
memrow2 = row;\
365
for (col = 0; col<vec1->traits.ncols; col++) {\
366
memcol1 = col;\
367
memcol2 = col;
368
369
#define DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_SINGLECOL(vec1,vec2,valptr1,valptr2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
370
memcol1 = 0;\
371
memcol2 = 0;\
372
col = 0;\
373
_Pragma("omp for schedule(runtime) private(memrow1,memrow2,valptr1,valptr2,cuvalptr1,cuvalptr2)")\
374
for (row=0; row<DM_NROWS(vec1); row++) {\
375
memrow1 = row;\
376
memrow2 = row;\
377
{\
378
379
#define DENSEMAT_ITER2_BEGIN_COMPACT_OFFS_TRANSPOSED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
380
_Pragma("omp for schedule(runtime) private(col,memcol1,memcol2,memrow1,memrow2,valptr1,valptr2,cuvalptr1,cuvalptr2)")\
381
for (row=0; row<DM_NROWS(vec1); row++) {\
382
memrow1 = row;\
383
memrow2 = row;\
384
for (col = 0; col<vec1->traits.ncols; col++) {\
385
memcol1 = col;\
386
memcol2 = col;
387
388
#define DENSEMAT_ITER2_BEGIN_SCATTEREDCOLS_COMPACTROWS1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
389
memcol1 = -1;\
390
for (col = 0; col<vec1->traits.ncols; col++) {\
391
memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
392
memcol2 = col;\
393
_Pragma("omp for schedule(runtime) private(memrow1,memrow2,valptr1,valptr2,cuvalptr1,cuvalptr2)")\
394
for (row=0; row<DM_NROWS(vec1); row++) {\
395
memrow1 = row;\
396
memrow2 = row;\
397
398
#ifdef ROWMAJOR
399
#ifdef COLMAJOR
400
#error "Only one of COLMAJOR or ROWMAJOR has to be defined for this header!"
401
#endif
402
403
#define DENSEMAT_VALPTR(vec,row,col) (((DENSEMAT_DT *)(vec->val))+(((row)*(vec->stride)+(col))*DENSEMAT_ELSIZE(vec)))
404
#define DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col) (((DENSEMAT_DT *)(vec->val))+(((row))*DENSEMAT_ELSIZE(vec)))
405
#define DENSEMAT_VALPTR_TRANSPOSED(vec,row,col) (((DENSEMAT_DT *)(vec->val))+(((col)*vec->stride+(row))*DENSEMAT_ELSIZE(vec)))
406
#define DENSEMAT_CUVALPTR(vec,row,col) (((DENSEMAT_DT *)(vec->cu_val))+(((row)*vec->stride+(col))*DENSEMAT_ELSIZE(vec)))
407
#define DENSEMAT_CUVALPTR_TRANSPOSED(vec,row,col) (((DENSEMAT_DT *)(vec->cu_val))+(((col)*vec->stride+(row))*DENSEMAT_ELSIZE(vec)))
408
409
410
#define DENSEMAT_ITER_BEGIN_SCATTERED(vec,row,col,memrow,memcol)\
411
memrow = -1;\
412
for (row=0; row<DM_NROWS(vec); row++) {\
413
memrow = ghost_bitmap_next(vec->rowmask,memrow);\
414
memcol = -1;\
415
for (col = 0; col<vec->traits.ncols; col++) {\
416
memcol = ghost_bitmap_next(vec->colmask,memcol);\
417
valptr = DENSEMAT_VALPTR(vec,memrow,memcol);\
418
cuvalptr = DENSEMAT_CUVALPTR(vec,memrow,memcol);\
419
420
#define DENSEMAT_ITER2_BEGIN_SCATTERED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2)\
421
DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,0,0)
422
423
#define DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
424
memrow1 = -1;\
425
memrow2 = -1;\
426
for (row=0; row<vec2roffs; row++) {
/* go to offset */
\
427
memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
428
}\
429
for (row=0; row<DM_NROWS(vec1); row++) {\
430
memrow1 = ghost_bitmap_next(vec1->rowmask,memrow1);\
431
memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
432
memcol1 = -1;\
433
memcol2 = -1;\
434
for (col=0; col<vec2coffs; col++) {
/* go to offset */
\
435
memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
436
}\
437
for (col=0; col<vec1->traits.ncols; col++) {\
438
memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
439
memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
440
valptr1 = DENSEMAT_VALPTR(vec1,memrow1,memcol1);\
441
valptr2 = DENSEMAT_VALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
442
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,memrow1,memcol1);\
443
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
444
445
#define DENSEMAT_ITER2_BEGIN_SCATTERED1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
446
memrow1 = -1;\
447
for (row=0; row<DM_NROWS(vec1); row++) {\
448
memrow1 = ghost_bitmap_next(vec1->rowmask,memrow1);\
449
memrow2 = row;\
450
memcol1 = -1;\
451
for (col=0; col<vec1->traits.ncols; col++) {\
452
memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
453
memcol2 = col;\
454
valptr1 = DENSEMAT_VALPTR(vec1,memrow1,memcol1);\
455
valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,col+vec2coffs);\
456
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,memrow1,memcol1);\
457
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
458
459
#define DENSEMAT_ITER2_BEGIN_SCATTERED2_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
460
memrow2 = -1;\
461
for (row=0; row<vec2roffs; row++) {
/* go to offset */
\
462
memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
463
}\
464
for (row=0; row<DM_NROWS(vec1); row++) {\
465
memrow1 = row;\
466
memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
467
memcol2 = -1;\
468
for (col=0; col<vec2coffs; col++) {
/* go to offset */
\
469
memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
470
}\
471
for (col=0; col<vec1->traits.ncols; col++) {\
472
memcol1 = col;\
473
memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
474
valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
475
valptr2 = DENSEMAT_VALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
476
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
477
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
478
479
#elif defined(COLMAJOR)
480
481
#define DENSEMAT_VALPTR(vec,row,col) &((DENSEMAT_DT *)(vec->val))[((col)*vec->stride+(row))*DENSEMAT_ELSIZE(vec)]
482
#define DENSEMAT_VALPTR_SINGLECOL_STRIDE1(vec,row,col) &((DENSEMAT_DT *)(vec->val))[((row))*DENSEMAT_ELSIZE(vec)]
483
#define DENSEMAT_VALPTR_TRANSPOSED(vec,row,col) &((DENSEMAT_DT *)(vec->val))[((row)*vec->stride+(col))*DENSEMAT_ELSIZE(vec)]
484
#define DENSEMAT_CUVALPTR(vec,row,col) &((DENSEMAT_DT *)(vec->cu_val))[((col)*vec->stride+(row))*DENSEMAT_ELSIZE(vec)]
485
#define DENSEMAT_CUVALPTR_TRANSPOSED(vec,row,col) &((DENSEMAT_DT *)(vec->cu_val))[((row)*vec->stride+(col))*DENSEMAT_ELSIZE(vec)]
486
487
#define DENSEMAT_ITER_BEGIN_SCATTERED(vec,row,col,memrow,memcol)\
488
memrow = -1;\
489
for (row=0; row<DM_NROWS(vec); row++) {\
490
memrow = ghost_bitmap_next(vec->rowmask,memrow);\
491
memcol = -1;\
492
for (col = 0; col<vec->traits.ncols; col++) {\
493
memcol = ghost_bitmap_next(vec->colmask,memcol);\
494
valptr = DENSEMAT_VALPTR(vec,memrow,memcol);\
495
cuvalptr = DENSEMAT_CUVALPTR(vec,memrow,memcol);\
496
497
#define DENSEMAT_ITER2_BEGIN_SCATTERED(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2)\
498
DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,0,0)
499
500
#define DENSEMAT_ITER2_BEGIN_SCATTERED_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
501
memrow1 = -1;\
502
memrow2 = -1;\
503
for (row=0; row<vec2roffs; row++) {
/* go to offset */
\
504
memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
505
}\
506
for (row=0; row<DM_NROWS(vec1); row++) {\
507
memrow1 = ghost_bitmap_next(vec1->rowmask,memrow1);\
508
memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
509
memcol1 = -1;\
510
memcol2 = -1;\
511
for (col=0; col<vec2coffs; col++) {
/* go to offset */
\
512
memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
513
}\
514
for (col=0; col<vec1->traits.ncols; col++) {\
515
memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
516
memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
517
valptr1 = DENSEMAT_VALPTR(vec1,memrow1,memcol1);\
518
valptr2 = DENSEMAT_VALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
519
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,memrow1,memcol1);\
520
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
521
522
#define DENSEMAT_ITER2_BEGIN_SCATTERED1_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
523
memrow1 = -1;\
524
for (row=0; row<DM_NROWS(vec1); row++) {\
525
memrow1 = ghost_bitmap_next(vec1->rowmask,memrow1);\
526
memrow2 = row;\
527
memcol1 = -1;\
528
for (col=0; col<vec1->traits.ncols; col++) {\
529
memcol1 = ghost_bitmap_next(vec1->colmask,memcol1);\
530
memcol2 = col;\
531
valptr1 = DENSEMAT_VALPTR(vec1,memrow1,memcol1);\
532
valptr2 = DENSEMAT_VALPTR(vec2,row+vec2roffs,col+vec2coffs);\
533
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,memrow1,memcol1);\
534
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,row+vec2roffs,col+vec2coffs);\
535
536
#define DENSEMAT_ITER2_BEGIN_SCATTERED2_OFFS(vec1,vec2,row,col,memrow1,memrow2,memcol1,memcol2,vec2roffs,vec2coffs)\
537
memrow2 = -1;\
538
for (row=0; row<vec2roffs; row++) {
/* go to offset */
\
539
memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
540
}\
541
for (row=0; row<DM_NROWS(vec1); row++) {\
542
memrow1 = row;\
543
memrow2 = ghost_bitmap_next(vec2->rowmask,memrow2);\
544
memcol2 = -1;\
545
for (col=0; col<vec2coffs; col++) {
/* go to offset */
\
546
memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
547
}\
548
for (col=0; col<vec1->traits.ncols; col++) {\
549
memcol1 = col;\
550
memcol2 = ghost_bitmap_next(vec2->colmask,memcol2);\
551
valptr1 = DENSEMAT_VALPTR(vec1,row,col);\
552
valptr2 = DENSEMAT_VALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
553
cuvalptr1 = DENSEMAT_CUVALPTR(vec1,row,col);\
554
cuvalptr2 = DENSEMAT_CUVALPTR(vec2,memrow2+vec2roffs,memcol2+vec2coffs);\
555
556
#else
557
#error "Either COLMAJOR or ROWMAJOR has to be defined for this header!"
558
559
#endif
560
561
#endif
omp.h
Function wrappers for OpenMP functions. If OpenMP ist disabled, the function are still defined but st...
Generated on Thu Dec 13 2018 20:56:02 for GHOST by
1.8.6