![]() |
TAPs 0.7.7.3
|
00001 /****************************************************************************** 00002 TAPsCUDA_UsefulDeviceFns.cu 00003 ******************************************************************************/ 00008 /****************************************************************************** 00009 SUKITTI PUNAK (09/03/2009) 00010 ******************************************************************************/ 00011 #ifndef TAPs_CUDA_USEFUL_DEVICE_FNS_HPP 00012 #define TAPs_CUDA_USEFUL_DEVICE_FNS_HPP 00013 00014 #include "../Core/TAPsListOfNamespaces.hpp" 00015 00016 BEGIN_NAMESPACE_TAPs__CUDA 00017 //============================================================================= 00018 // START: CUDA Constants 00019 //----------------------------------------------------------------------------- 00028 //__constant__ float CUDA_K_PI = 4.0*atanf(1.0); 00029 //__constant__ float TAPsCUDA_K_PI = 3.1415926535897932384626433832795f; 00030 //----------------------------------------------------------------------------- 00031 // END: CUDA Constants 00032 //============================================================================= 00033 00034 00035 //============================================================================= 00036 // START: Operators for build-in data-types 00037 //----------------------------------------------------------------------------- 00038 00039 // float3 00040 __device__ 00041 float3 operator+ ( const float3 & a, const float3 & b ) 00042 { 00043 float3 c; 00044 c.x = a.x + b.x; 00045 c.y = a.y + b.y; 00046 c.z = a.z + b.z; 00047 return c; 00048 } 00049 00050 __device__ 00051 float3 operator- ( const float3 & a, const float3 & b ) 00052 { 00053 float3 c; 00054 c.x = a.x - b.x; 00055 c.y = a.y - b.y; 00056 c.z = a.z - b.z; 00057 return c; 00058 } 00059 00060 __device__ 00061 float3 operator* ( const float3 & a, float s ) 00062 { 00063 float3 c; 00064 c.x = a.x * s; 00065 c.y = a.y * s; 00066 c.z = a.z * s; 00067 return c; 00068 } 00069 00070 __device__ 00071 float3 operator* ( float s, const float3 & a ) 00072 { 00073 return a * s; 00074 } 00075 00076 __device__ 00077 float3 operator/ ( const float3 & a, float s ) 00078 { 00079 float3 c; 00080 c.x = a.x / s; 00081 c.y = a.y / s; 00082 c.z = a.z / s; 00083 return c; 00084 } 00085 00086 00087 // float4 00088 __device__ 00089 float4 operator+ ( const float4 & a, const float4 & b ) 00090 { 00091 float4 c; 00092 c.x = a.x + b.x; 00093 c.y = a.y + b.y; 00094 c.z = a.z + b.z; 00095 c.w = a.w + b.w; 00096 return c; 00097 } 00098 00099 __device__ 00100 float4 operator- ( const float4 & a, const float4 & b ) 00101 { 00102 float4 c; 00103 c.x = a.x - b.x; 00104 c.y = a.y - b.y; 00105 c.z = a.z - b.z; 00106 c.w = a.w - b.w; 00107 return c; 00108 } 00109 00110 __device__ 00111 float4 operator* ( const float4 & a, float s ) 00112 { 00113 float4 c; 00114 c.x = a.x * s; 00115 c.y = a.y * s; 00116 c.z = a.z * s; 00117 c.w = a.w * s; 00118 return c; 00119 } 00120 00121 __device__ 00122 float4 operator* ( float s, const float4 & a ) 00123 { 00124 return a * s; 00125 } 00126 00127 __device__ 00128 float4 operator/ ( const float4 & a, float s ) 00129 { 00130 float4 c; 00131 c.x = a.x / s; 00132 c.y = a.y / s; 00133 c.z = a.z / s; 00134 c.w = a.w / s; 00135 return c; 00136 } 00137 //----------------------------------------------------------------------------- 00138 // END: Operators for build-in data-types 00139 //============================================================================= 00140 00141 00142 //============================================================================= 00143 // START: 3-by-3 Matrix 00144 //----------------------------------------------------------------------------- 00146 struct matrix3x3 { 00147 float3 col[3]; 00148 }; 00149 __device__ 00150 struct matrix3x3 make_matrix3x3( 00151 float e00, float e01, float e02, 00152 float e10, float e11, float e12, 00153 float e20, float e21, float e22 00154 ) 00155 { 00156 struct matrix3x3 A; 00157 A.col[0].x = e00; A.col[0].y = e10; A.col[0].z = e20; 00158 A.col[1].x = e01; A.col[1].y = e11; A.col[1].z = e21; 00159 A.col[2].x = e02; A.col[2].y = e12; A.col[2].z = e22; 00160 return A; 00161 } 00162 00163 __device__ 00164 struct matrix3x3 operator+ ( const struct matrix3x3 & A, const struct matrix3x3 & B ) 00165 { 00166 struct matrix3x3 C; 00167 #pragma unroll 3 00168 for ( int i = 0; i < 3; ++i ) { 00169 C.col[i] = A.col[i] + B.col[i]; 00170 } 00171 return C; 00172 } 00173 00174 __device__ 00175 struct matrix3x3 operator- ( const struct matrix3x3 & A, const struct matrix3x3 & B ) 00176 { 00177 struct matrix3x3 C; 00178 #pragma unroll 3 00179 for ( int i = 0; i < 3; ++i ) { 00180 C.col[i] = A.col[i] - B.col[i]; 00181 } 00182 return C; 00183 } 00184 00185 __device__ 00186 struct matrix3x3 operator* ( const struct matrix3x3 & A, float s ) 00187 { 00188 struct matrix3x3 C; 00189 #pragma unroll 3 00190 for ( int i = 0; i < 3; ++i ) { 00191 C.col[i] = A.col[i] * s; 00192 } 00193 return C; 00194 } 00195 00196 __device__ 00197 struct matrix3x3 operator/ ( const struct matrix3x3 & A, float s ) 00198 { 00199 struct matrix3x3 C; 00200 #pragma unroll 3 00201 for ( int i = 0; i < 3; ++i ) { 00202 C.col[i] = A.col[i] / s; 00203 } 00204 return C; 00205 } 00206 //----------------------------------------------------------------------------- 00207 // END: 3-by-3 Matrix 00208 //============================================================================= 00209 00210 00211 //============================================================================= 00212 // START: 4-by-4 Matrix 00213 //----------------------------------------------------------------------------- 00215 struct matrix4x4 { 00216 float4 col[4]; 00217 }; 00218 00219 __device__ 00220 struct matrix4x4 make_matrix4x4( 00221 float e00, float e01, float e02, float e03, 00222 float e10, float e11, float e12, float e13, 00223 float e20, float e21, float e22, float e23, 00224 float e30, float e31, float e32, float e33 00225 ) 00226 { 00227 struct matrix4x4 A; 00228 A.col[0].x = e00; A.col[0].y = e10; A.col[0].z = e20; A.col[0].w = e30; 00229 A.col[1].x = e01; A.col[1].y = e11; A.col[1].z = e21; A.col[1].w = e31; 00230 A.col[2].x = e02; A.col[2].y = e12; A.col[2].z = e22; A.col[2].w = e32; 00231 A.col[3].x = e03; A.col[3].y = e13; A.col[3].z = e23; A.col[3].w = e33; 00232 return A; 00233 } 00234 00235 __device__ 00236 struct matrix4x4 operator+ ( const struct matrix4x4 & A, const struct matrix4x4 & B ) 00237 { 00238 struct matrix4x4 C; 00239 #pragma unroll 4 00240 for ( int i = 0; i < 4; ++i ) { 00241 C.col[i] = A.col[i] + B.col[i]; 00242 } 00243 return C; 00244 } 00245 00246 __device__ 00247 struct matrix4x4 operator- ( const struct matrix4x4 & A, const struct matrix4x4 & B ) 00248 { 00249 struct matrix4x4 C; 00250 #pragma unroll 4 00251 for ( int i = 0; i < 4; ++i ) { 00252 C.col[i] = A.col[i] - B.col[i]; 00253 } 00254 return C; 00255 } 00256 00257 __device__ 00258 struct matrix4x4 operator* ( const struct matrix4x4 & A, float s ) 00259 { 00260 struct matrix4x4 C; 00261 #pragma unroll 4 00262 for ( int i = 0; i < 4; ++i ) { 00263 C.col[i] = A.col[i] * s; 00264 } 00265 return C; 00266 } 00267 00268 __device__ 00269 struct matrix4x4 operator/ ( const struct matrix4x4 & A, float s ) 00270 { 00271 struct matrix4x4 C; 00272 #pragma unroll 4 00273 for ( int i = 0; i < 4; ++i ) { 00274 C.col[i] = A.col[i] / s; 00275 } 00276 return C; 00277 } 00278 00279 __device__ 00280 float4 operator* ( const struct matrix4x4 & A, const float4 & v ) 00281 { 00282 float4 r = make_float4( 00283 A.col[0].x*v.x + A.col[1].x*v.y + A.col[2].x*v.z + A.col[3].x*v.w, 00284 A.col[0].y*v.x + A.col[1].y*v.y + A.col[2].y*v.z + A.col[3].y*v.w, 00285 A.col[0].z*v.x + A.col[1].z*v.y + A.col[2].z*v.z + A.col[3].z*v.w, 00286 A.col[0].w*v.x + A.col[1].w*v.y + A.col[2].w*v.z + A.col[3].w*v.w 00287 ); 00288 return r; 00289 } 00290 //----------------------------------------------------------------------------- 00291 // END: 4-by-4 Matrix 00292 //============================================================================= 00293 00294 00295 00296 00297 //============================================================================= 00298 //----------------------------------------------------------------------------- 00300 __device__ 00301 float3 XYZ ( const float4 & V ) 00302 { 00303 return make_float3( V.x, V.y, V.z ); 00304 } 00305 00307 __device__ 00308 float4 XYZW ( const float4 & V, float w ) 00309 { 00310 return make_float4( V.x, V.y, V.z, w ); 00311 } 00312 00314 __device__ 00315 float3 Add ( const float3 & V, const float3 & W ) 00316 { 00317 return make_float3( V.x+W.x, V.y+W.y, V.z+W.z ); 00318 } 00319 00321 __device__ 00322 float4 Add ( const float4 & V, const float4 & W ) 00323 { 00324 return make_float4( V.x+W.x, V.y+W.y, V.z+W.z, V.w+W.w ); 00325 } 00326 00328 __device__ 00329 float3 Sub ( const float3 & V, const float3 & W ) 00330 { 00331 return make_float3( V.x-W.x, V.y-W.y, V.z-W.z ); 00332 } 00333 00335 __device__ 00336 float4 Sub ( const float4 & V, const float4 & W ) 00337 { 00338 return make_float4( V.x-W.x, V.y-W.y, V.z-W.z, V.w-W.w ); 00339 } 00340 00343 __device__ 00344 float InnerProduct ( const float3 & V, const float3 & W ) 00345 { 00346 return V.x*W.x + V.y*W.y + V.z*W.z; 00347 } 00348 00351 __device__ 00352 float InnerProduct ( const float4 & V, const float4 & W ) 00353 { 00354 return V.x*W.x + V.y*W.y + V.z*W.z + V.w*W.w; 00355 } 00356 00358 __device__ 00359 float3 Mul ( const float3 & V, float s ) 00360 { 00361 return make_float3( V.x*s, V.y*s, V.z*s ); 00362 } 00363 00365 __device__ 00366 float4 Mul ( const float4 & V, float s ) 00367 { 00368 return make_float4( V.x*s, V.y*s, V.z*s, V.w*s ); 00369 } 00370 00372 __device__ 00373 float3 Div ( const float3 & V, float s ) 00374 { 00375 return make_float3( V.x/s, V.y/s, V.z/s ); 00376 } 00377 00379 __device__ 00380 float4 Div ( const float4 & V, float s ) 00381 { 00382 return make_float4( V.x/s, V.y/s, V.z/s, V.w/s ); 00383 } 00384 00386 __device__ 00387 float LengthSquare ( const float3 & V ) 00388 { 00389 return InnerProduct( V, V ); 00390 } 00391 00393 __device__ 00394 float LengthSquare ( const float4 & V ) 00395 { 00396 return InnerProduct( V, V ); 00397 } 00398 00400 __device__ 00401 float Length ( const float3 & V ) 00402 { 00403 return sqrtf( LengthSquare( V ) ); 00404 } 00405 00407 __device__ 00408 float Length ( const float4 & V ) 00409 { 00410 return sqrtf( LengthSquare( V ) ); 00411 } 00412 00414 __device__ 00415 float3 Unit ( const float3 & V ) 00416 { 00417 return Div( V, Length(V) ); 00418 } 00419 00421 __device__ 00422 float4 Unit ( const float4 & V ) 00423 { 00424 return Div( V, Length(V) ); 00425 } 00426 //----------------------------------------------------------------------------- 00427 //============================================================================= 00428 00429 00430 //============================================================================= 00431 // START: Fns for Quaternion 00432 //----------------------------------------------------------------------------- 00434 __device__ 00435 float4 QuaternionConjugate ( const float4 & Q ) 00436 { 00437 return make_float4( Q.x, -Q.y, -Q.z, -Q.w ); 00438 } 00439 00441 __device__ 00442 float QuaternionNormSquare ( const float4 & Q ) 00443 { 00444 return Q.x*Q.x + Q.y*Q.y + Q.z*Q.z + Q.w*Q.w; 00445 } 00446 00448 __device__ 00449 float QuaternionNorm ( const float4 & Q ) 00450 { 00451 return sqrtf( QuaternionNormSquare( Q ) ); 00452 } 00453 00455 __device__ 00456 float4 QuaternionUnit ( const float4 & Q ) 00457 { 00458 return Div( Q, QuaternionNorm(Q) ); 00459 } 00460 00462 __device__ 00463 float4 QuaternionMul ( const float4 & Q1, const float4 & Q2 ) 00464 { 00465 return make_float4( 00466 Q1.x*Q2.x - Q1.y*Q2.y - Q1.z*Q2.z - Q1.w*Q2.w, 00467 Q1.x*Q2.y + Q1.y*Q2.x + Q1.z*Q2.w - Q1.w*Q2.z, 00468 Q1.x*Q2.z + Q1.z*Q2.x + Q1.w*Q2.y - Q1.y*Q2.w, 00469 Q1.x*Q2.w + Q1.w*Q2.x + Q1.y*Q2.z - Q1.z*Q2.y 00470 ); 00471 } 00472 00474 __device__ 00475 struct matrix3x3 QuatenionToRotationMatrix3x3 ( const float4 & Q ) 00476 { 00477 float rr = Q.x*Q.x; 00478 float ii = Q.y*Q.y; 00479 float jj = Q.z*Q.z; 00480 float kk = Q.w*Q.w; 00481 float ri = Q.x*Q.y; 00482 float rj = Q.x*Q.z; 00483 float rk = Q.x*Q.w; 00484 float ij = Q.y*Q.z; 00485 float jk = Q.z*Q.w; 00486 float ik = Q.y*Q.w; 00487 00488 struct matrix3x3 R = make_matrix3x3( 00489 rr+ii-jj-kk, 2*(ij-rk), 2*(ik+rj), 00490 2*(ij+rk), rr-ii+jj-kk, 2*(jk-ri), 00491 2*(ik-rj), 2*(jk+ri), rr-ii-jj+kk 00492 ); 00493 return R; 00494 } 00495 00497 __device__ 00498 struct matrix4x4 QuatenionToRotationMatrix4x4 ( const float4 & Q ) 00499 { 00500 float rr = Q.x*Q.x; 00501 float ii = Q.y*Q.y; 00502 float jj = Q.z*Q.z; 00503 float kk = Q.w*Q.w; 00504 float ri = Q.x*Q.y; 00505 float rj = Q.x*Q.z; 00506 float rk = Q.x*Q.w; 00507 float ij = Q.y*Q.z; 00508 float jk = Q.z*Q.w; 00509 float ik = Q.y*Q.w; 00510 00511 struct matrix4x4 R = make_matrix4x4( 00512 rr+ii-jj-kk, 2*(ij-rk), 2*(ik+rj), 0, 00513 2*(ij+rk), rr-ii+jj-kk, 2*(jk-ri), 0, 00514 2*(ik-rj), 2*(jk+ri), rr-ii-jj+kk, 0, 00515 0, 0, 0, 1 00516 ); 00517 return R; 00518 } 00519 00520 //----------------------------------------------------------------------------- 00521 // END: Fns for Quaternion 00522 //============================================================================= 00523 END_NAMESPACE_TAPs__CUDA 00524 //----------------------------------------------------------------------------- 00525 //#include "TAPsCUDA_UsefulDeviceFns_Def.cu" 00526 #endif 00527 //34567890123456789012345678901234567890123456789012345678901234567890123456789 00528 //--+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----