GHOST  1.1.2
General, Hybrid, and Optimized Sparse Toolkit
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cu_complex.h
Go to the documentation of this file.
1 
6 #ifndef GHOST_CU_COMPLEX_H
7 #define GHOST_CU_COMPLEX_H
8 
9 #include <cuComplex.h>
10 
11 template<typename T>
12 __device__ __host__ inline void zero(T &val)
13 {
14  val = 0.;
15 }
16 
17 template<>
18 __device__ __host__ inline void zero<cuFloatComplex>(cuFloatComplex &val)
19 {
20  val = make_cuFloatComplex(0.,0.);
21 }
22 
23 template<>
24 __device__ __host__ inline void zero<cuDoubleComplex>(cuDoubleComplex &val)
25 {
26  val = make_cuDoubleComplex(0.,0.);
27 }
28 
29 template<typename T>
30 __device__ __host__ inline void one(T &val)
31 {
32  val = 1.;
33 }
34 
35 template<>
36 __device__ __host__ inline void one<cuFloatComplex>(cuFloatComplex &val)
37 {
38  val = make_cuFloatComplex(1.,0.);
39 }
40 
41 template<>
42 __device__ __host__ inline void one<cuDoubleComplex>(cuDoubleComplex &val)
43 {
44  val = make_cuDoubleComplex(1.,0.);
45 }
46 
47 template<typename T, typename T_b>
48 __device__ inline void fromReal(T &val, T_b real)
49 {
50  val = real;
51 }
52 
53 template<>
54 __device__ inline void fromReal<cuDoubleComplex,double>(cuDoubleComplex &val, double real)
55 {
56  val = make_cuDoubleComplex(real,0.);
57 }
58 
59 template<>
60 __device__ inline void fromReal<cuFloatComplex,float>(cuFloatComplex &val, float real)
61 {
62  val = make_cuFloatComplex(real,0.f);
63 }
64 
65 template<typename T, typename T_b>
66 __device__ inline T_b Real(T val)
67 {
68  return val;
69 }
70 
71 template<>
72 __device__ inline double Real<cuDoubleComplex,double>(cuDoubleComplex val)
73 {
74  return cuCreal(val);
75 }
76 
77 template<>
78 __device__ inline float Real<cuFloatComplex,float>(cuFloatComplex val)
79 {
80  return cuCrealf(val);
81 }
82 
83 template<typename T, typename T_b>
84 __device__ inline T_b Imag(T val)
85 {
86  return val;
87 }
88 
89 template<>
90 __device__ inline double Imag<cuDoubleComplex,double>(cuDoubleComplex val)
91 {
92  return cuCimag(val);
93 }
94 
95 template<>
96 __device__ inline float Imag<cuFloatComplex,float>(cuFloatComplex val)
97 {
98  return cuCimagf(val);
99 }
100 
101 // val += val2
102 template<typename t>
103 __device__ inline t accu(t val, t val2)
104 {
105  return val+val2;
106 }
107 
108 template<>
109 __device__ inline cuFloatComplex accu<cuFloatComplex>(cuFloatComplex val, cuFloatComplex val2)
110 {
111  return cuCaddf(val,val2);
112 }
113 
114 template<>
115 __device__ inline cuDoubleComplex accu<cuDoubleComplex>(cuDoubleComplex val, cuDoubleComplex val2)
116 {
117  return cuCadd(val,val2);
118 }
119 
120 // val += val2*val3
121 template<typename T, typename T2>
122 __device__ inline T axpy(T val, T val2, T2 val3)
123 {
124  return val+val2*val3;
125 }
126 
127 template<>
128 __device__ inline cuFloatComplex axpy<cuFloatComplex,cuFloatComplex>(cuFloatComplex val, cuFloatComplex val2, cuFloatComplex val3)
129 {
130  return cuCaddf(val,cuCmulf(val2,val3));
131 }
132 
133 template<>
134 __device__ inline cuFloatComplex axpy<cuFloatComplex,double>(cuFloatComplex val, cuFloatComplex val2, double val3)
135 {
136  return cuCaddf(val,cuCmulf(val2,make_cuFloatComplex((float)val3,0.f)));
137 }
138 
139 template<>
140 __device__ inline cuFloatComplex axpy<cuFloatComplex,float>(cuFloatComplex val, cuFloatComplex val2, float val3)
141 {
142  return cuCaddf(val,cuCmulf(val2,make_cuFloatComplex(val3,0.f)));
143 }
144 
145 template<>
146 __device__ inline cuFloatComplex axpy<cuFloatComplex,cuDoubleComplex>(cuFloatComplex val, cuFloatComplex val2, cuDoubleComplex val3)
147 {
148  return cuCaddf(val,cuCmulf(val2,make_cuFloatComplex((float)(cuCreal(val3)),(float)(cuCimag(val3)))));
149 }
150 
151 template<>
152 __device__ inline cuDoubleComplex axpy<cuDoubleComplex,double>(cuDoubleComplex val, cuDoubleComplex val2, double val3)
153 {
154  return cuCadd(val,cuCmul(val2,make_cuDoubleComplex(val3,0.)));
155 }
156 
157 template<>
158 __device__ inline cuDoubleComplex axpy<cuDoubleComplex,float>(cuDoubleComplex val, cuDoubleComplex val2, float val3)
159 {
160  return cuCadd(val,cuCmul(val2,make_cuDoubleComplex((double)val3,0.)));
161 }
162 
163 template<>
164 __device__ inline cuDoubleComplex axpy<cuDoubleComplex,cuDoubleComplex>(cuDoubleComplex val, cuDoubleComplex val2, cuDoubleComplex val3)
165 {
166  return cuCadd(val,cuCmul(val2,val3));
167 }
168 
169 template<>
170 __device__ inline cuDoubleComplex axpy<cuDoubleComplex,cuFloatComplex>(cuDoubleComplex val, cuDoubleComplex val2, cuFloatComplex val3)
171 {
172  return cuCadd(val,cuCmul(val2,make_cuDoubleComplex((double)(cuCrealf(val3)),(double)(cuCimagf(val3)))));
173 }
174 
175 template<>
176 __device__ inline double axpy<double,cuFloatComplex>(double val, double val2, cuFloatComplex val3)
177 {
178  return val+val2*(double)cuCrealf(val3);
179 }
180 
181 template<>
182 __device__ inline double axpy<double,cuDoubleComplex>(double val, double val2, cuDoubleComplex val3)
183 {
184  return val+val2*cuCreal(val3);
185 }
186 
187 template<>
188 __device__ inline float axpy<float,cuFloatComplex>(float val, float val2, cuFloatComplex val3)
189 {
190  return val+val2*cuCrealf(val3);
191 }
192 
193 
194 template<>
195 __device__ inline float axpy<float,cuDoubleComplex>(float val, float val2, cuDoubleComplex val3)
196 {
197  return val+val2*(float)cuCreal(val3);
198 }
199 
200 // y = a*x + b*y
201 template<typename T>
202 __device__ inline T axpby(T x, T y, T a, T b)
203 {
204  return b*y+a*x;
205 }
206 
207 template<typename T,typename T_b>
208 __device__ inline T_b mulConjSame(T x)
209 {
210  return x*x;
211 }
212 
213 template<>
214 __device__ inline float mulConjSame<cuFloatComplex,float>(cuFloatComplex x)
215 {
216  return cuCrealf(x)*cuCrealf(x) + cuCimagf(x)*cuCimagf(x);
217 }
218 
219 template<>
220 __device__ inline double mulConjSame<cuDoubleComplex,double>(cuDoubleComplex x)
221 {
222  return cuCreal(x)*cuCreal(x) + cuCimag(x)*cuCimag(x);
223 }
224 
225 template<typename T>
226 __device__ inline T conj(T x)
227 {
228  return x;
229 }
230 
231 template<>
232 __device__ inline cuFloatComplex conj<cuFloatComplex>(cuFloatComplex x)
233 {
234  return cuConjf(x);
235 }
236 
237 template<>
238 __device__ inline cuDoubleComplex conj<cuDoubleComplex>(cuDoubleComplex x)
239 {
240  return cuConj(x);
241 }
242 
243 template<typename T>
244 __device__ inline T mulConj(T x, T y)
245 {
246  return x*y;
247 }
248 
249 template<>
250 __device__ inline cuFloatComplex mulConj<cuFloatComplex>(cuFloatComplex x, cuFloatComplex y)
251 {
252  return cuCmulf(cuConjf(x),y);
253 }
254 
255 template<>
256 __device__ inline cuDoubleComplex mulConj<cuDoubleComplex>(cuDoubleComplex x, cuDoubleComplex y)
257 {
258  return cuCmul(cuConj(x),y);
259 }
260 
261 template<>
262 __device__ inline cuFloatComplex axpby<cuFloatComplex>(cuFloatComplex x, cuFloatComplex y, cuFloatComplex a, cuFloatComplex b)
263 {
264  return cuCaddf(cuCmulf(b,y),cuCmulf(a,x));
265 }
266 
267 template<>
268 __device__ inline cuDoubleComplex axpby<cuDoubleComplex>(cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex a, cuDoubleComplex b)
269 {
270  return cuCadd(cuCmul(b,y),cuCmul(a,x));
271 }
272 
273 // x = a*y
274 template<typename T>
275 __device__ inline T scale(T y, T a)
276 {
277  return a*y;
278 }
279 
280 template<>
281 __device__ inline cuFloatComplex scale<cuFloatComplex>(cuFloatComplex y, cuFloatComplex a)
282 {
283  return cuCmulf(a,y);
284 }
285 
286 template<>
287 __device__ inline cuDoubleComplex scale<cuDoubleComplex>(cuDoubleComplex y, cuDoubleComplex a)
288 {
289  return cuCmul(a,y);
290 }
291 
292 template<typename T1, typename T2>
293 __device__ inline T1 scale2(T1 y, T2 a)
294 {
295  return a*y;
296 }
297 
298 template<>
299 __device__ inline cuFloatComplex scale2<cuFloatComplex,cuFloatComplex>(cuFloatComplex y, cuFloatComplex a)
300 {
301  return cuCmulf(a,y);
302 }
303 
304 template<>
305 __device__ inline cuFloatComplex scale2<cuFloatComplex,float>(cuFloatComplex y, float a)
306 {
307  return cuCmulf(make_cuFloatComplex(a,0.f),y);
308 }
309 
310 template<>
311 __device__ inline cuFloatComplex scale2<cuFloatComplex,double>(cuFloatComplex y, double a)
312 {
313  return cuCmulf(make_cuFloatComplex((float)a,0.f),y);
314 }
315 
316 template<>
317 __device__ inline cuDoubleComplex scale2<cuDoubleComplex,cuDoubleComplex>(cuDoubleComplex y, cuDoubleComplex a)
318 {
319  return cuCmul(a,y);
320 }
321 
322 
323 template<>
324 __device__ inline cuDoubleComplex scale2<cuDoubleComplex,float>(cuDoubleComplex y, float a)
325 {
326  return cuCmul(make_cuDoubleComplex(a,0.),y);
327 }
328 
329 template<>
330 __device__ inline cuDoubleComplex scale2<cuDoubleComplex,double>(cuDoubleComplex y, double a)
331 {
332  return cuCmul(make_cuDoubleComplex(a,0.),y);
333 }
334 
335 // z = a*x*y
336 template<typename T>
337 __device__ inline T mult(T x, T y, T a)
338 {
339  return a*x*y;
340 }
341 
342 template<>
343 __device__ inline cuFloatComplex mult<cuFloatComplex>(cuFloatComplex x, cuFloatComplex y, cuFloatComplex a)
344 {
345  return cuCmulf(a,cuCmulf(x,y));
346 }
347 
348 template<>
349 __device__ inline cuDoubleComplex mult<cuDoubleComplex>(cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex a)
350 {
351  return cuCmul(a,cuCmul(x,y));
352 }
353 
354 #endif
__device__ double Real< cuDoubleComplex, double >(cuDoubleComplex val)
Definition: cu_complex.h:72
__device__ void fromReal(T &val, T_b real)
Definition: cu_complex.h:48
__device__ cuDoubleComplex axpy< cuDoubleComplex, cuDoubleComplex >(cuDoubleComplex val, cuDoubleComplex val2, cuDoubleComplex val3)
Definition: cu_complex.h:164
__device__ cuFloatComplex mult< cuFloatComplex >(cuFloatComplex x, cuFloatComplex y, cuFloatComplex a)
Definition: cu_complex.h:343
__device__ T conj(T x)
Definition: cu_complex.h:226
__device__ cuFloatComplex accu< cuFloatComplex >(cuFloatComplex val, cuFloatComplex val2)
Definition: cu_complex.h:109
__device__ cuFloatComplex axpby< cuFloatComplex >(cuFloatComplex x, cuFloatComplex y, cuFloatComplex a, cuFloatComplex b)
Definition: cu_complex.h:262
__device__ cuFloatComplex mulConj< cuFloatComplex >(cuFloatComplex x, cuFloatComplex y)
Definition: cu_complex.h:250
__device__ float axpy< float, cuDoubleComplex >(float val, float val2, cuDoubleComplex val3)
Definition: cu_complex.h:195
__device__ T_b mulConjSame(T x)
Definition: cu_complex.h:208
__device__ __host__ void one(T &val)
Definition: cu_complex.h:30
__device__ T axpby(T x, T y, T a, T b)
Definition: cu_complex.h:202
__device__ double axpy< double, cuDoubleComplex >(double val, double val2, cuDoubleComplex val3)
Definition: cu_complex.h:182
__device__ cuDoubleComplex axpy< cuDoubleComplex, double >(cuDoubleComplex val, cuDoubleComplex val2, double val3)
Definition: cu_complex.h:152
__device__ cuFloatComplex scale2< cuFloatComplex, double >(cuFloatComplex y, double a)
Definition: cu_complex.h:311
__device__ float mulConjSame< cuFloatComplex, float >(cuFloatComplex x)
Definition: cu_complex.h:214
__device__ cuDoubleComplex mulConj< cuDoubleComplex >(cuDoubleComplex x, cuDoubleComplex y)
Definition: cu_complex.h:256
__device__ T scale(T y, T a)
Definition: cu_complex.h:275
__device__ T1 scale2(T1 y, T2 a)
Definition: cu_complex.h:293
__device__ T mulConj(T x, T y)
Definition: cu_complex.h:244
__device__ float axpy< float, cuFloatComplex >(float val, float val2, cuFloatComplex val3)
Definition: cu_complex.h:188
__device__ cuDoubleComplex accu< cuDoubleComplex >(cuDoubleComplex val, cuDoubleComplex val2)
Definition: cu_complex.h:115
__device__ T_b Real(T val)
Definition: cu_complex.h:66
__device__ cuFloatComplex scale2< cuFloatComplex, cuFloatComplex >(cuFloatComplex y, cuFloatComplex a)
Definition: cu_complex.h:299
__device__ cuFloatComplex conj< cuFloatComplex >(cuFloatComplex x)
Definition: cu_complex.h:232
__device__ cuDoubleComplex axpy< cuDoubleComplex, float >(cuDoubleComplex val, cuDoubleComplex val2, float val3)
Definition: cu_complex.h:158
__device__ double Imag< cuDoubleComplex, double >(cuDoubleComplex val)
Definition: cu_complex.h:90
__device__ void fromReal< cuDoubleComplex, double >(cuDoubleComplex &val, double real)
Definition: cu_complex.h:54
__device__ __host__ void one< cuDoubleComplex >(cuDoubleComplex &val)
Definition: cu_complex.h:42
__device__ void fromReal< cuFloatComplex, float >(cuFloatComplex &val, float real)
Definition: cu_complex.h:60
__device__ T_b Imag(T val)
Definition: cu_complex.h:84
__device__ __host__ void one< cuFloatComplex >(cuFloatComplex &val)
Definition: cu_complex.h:36
__device__ float Imag< cuFloatComplex, float >(cuFloatComplex val)
Definition: cu_complex.h:96
__device__ double axpy< double, cuFloatComplex >(double val, double val2, cuFloatComplex val3)
Definition: cu_complex.h:176
__device__ __host__ void zero(T &val)
Definition: cu_complex.h:12
__device__ cuDoubleComplex scale2< cuDoubleComplex, float >(cuDoubleComplex y, float a)
Definition: cu_complex.h:324
__device__ double mulConjSame< cuDoubleComplex, double >(cuDoubleComplex x)
Definition: cu_complex.h:220
__device__ cuDoubleComplex mult< cuDoubleComplex >(cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex a)
Definition: cu_complex.h:349
__device__ t accu(t val, t val2)
Definition: cu_complex.h:103
__device__ cuFloatComplex scale2< cuFloatComplex, float >(cuFloatComplex y, float a)
Definition: cu_complex.h:305
__device__ __host__ void zero< cuDoubleComplex >(cuDoubleComplex &val)
Definition: cu_complex.h:24
__device__ cuDoubleComplex scale2< cuDoubleComplex, double >(cuDoubleComplex y, double a)
Definition: cu_complex.h:330
__device__ float Real< cuFloatComplex, float >(cuFloatComplex val)
Definition: cu_complex.h:78
__device__ cuFloatComplex axpy< cuFloatComplex, float >(cuFloatComplex val, cuFloatComplex val2, float val3)
Definition: cu_complex.h:140
__device__ T axpy(T val, T val2, T2 val3)
Definition: cu_complex.h:122
__device__ cuFloatComplex scale< cuFloatComplex >(cuFloatComplex y, cuFloatComplex a)
Definition: cu_complex.h:281
__device__ __host__ void zero< cuFloatComplex >(cuFloatComplex &val)
Definition: cu_complex.h:18
__device__ cuDoubleComplex scale2< cuDoubleComplex, cuDoubleComplex >(cuDoubleComplex y, cuDoubleComplex a)
Definition: cu_complex.h:317
__device__ T mult(T x, T y, T a)
Definition: cu_complex.h:337
__device__ cuFloatComplex axpy< cuFloatComplex, double >(cuFloatComplex val, cuFloatComplex val2, double val3)
Definition: cu_complex.h:134
__device__ cuFloatComplex axpy< cuFloatComplex, cuDoubleComplex >(cuFloatComplex val, cuFloatComplex val2, cuDoubleComplex val3)
Definition: cu_complex.h:146
__device__ cuFloatComplex axpy< cuFloatComplex, cuFloatComplex >(cuFloatComplex val, cuFloatComplex val2, cuFloatComplex val3)
Definition: cu_complex.h:128
__device__ cuDoubleComplex scale< cuDoubleComplex >(cuDoubleComplex y, cuDoubleComplex a)
Definition: cu_complex.h:287
__device__ cuDoubleComplex axpy< cuDoubleComplex, cuFloatComplex >(cuDoubleComplex val, cuDoubleComplex val2, cuFloatComplex val3)
Definition: cu_complex.h:170
__device__ cuDoubleComplex axpby< cuDoubleComplex >(cuDoubleComplex x, cuDoubleComplex y, cuDoubleComplex a, cuDoubleComplex b)
Definition: cu_complex.h:268
__device__ cuDoubleComplex conj< cuDoubleComplex >(cuDoubleComplex x)
Definition: cu_complex.h:238