TAPs 0.7.7.3
TAPsCUDA_UsefulDeviceFns.cu
Go to the documentation of this file.
00001 /******************************************************************************
00002 TAPsCUDA_UsefulDeviceFns.cu
00003 ******************************************************************************/
00008 /******************************************************************************
00009 SUKITTI PUNAK   (09/03/2009)
00010 ******************************************************************************/
00011 #ifndef TAPs_CUDA_USEFUL_DEVICE_FNS_HPP
00012 #define TAPs_CUDA_USEFUL_DEVICE_FNS_HPP
00013 
00014 #include "../Core/TAPsListOfNamespaces.hpp"
00015 
00016 BEGIN_NAMESPACE_TAPs__CUDA
00017 //=============================================================================
00018 // START: CUDA Constants
00019 //-----------------------------------------------------------------------------
00028  //__constant__ float CUDA_K_PI = 4.0*atanf(1.0);
00029  //__constant__ float TAPsCUDA_K_PI = 3.1415926535897932384626433832795f;
00030 //-----------------------------------------------------------------------------
00031 // END: CUDA Constants
00032 //=============================================================================
00033 
00034 
00035 //=============================================================================
00036 // START: Operators for build-in data-types
00037 //-----------------------------------------------------------------------------
00038 
00039 // float3
00040 __device__
00041 float3 operator+ ( const float3 & a, const float3 & b )
00042 {
00043     float3 c;
00044     c.x = a.x + b.x;
00045     c.y = a.y + b.y;
00046     c.z = a.z + b.z;
00047     return c;
00048 }
00049 
00050 __device__
00051 float3 operator- ( const float3 & a, const float3 & b )
00052 {
00053     float3 c;
00054     c.x = a.x - b.x;
00055     c.y = a.y - b.y;
00056     c.z = a.z - b.z;
00057     return c;
00058 }
00059 
00060 __device__
00061 float3 operator* ( const float3 & a, float s )
00062 {
00063     float3 c;
00064     c.x = a.x * s;
00065     c.y = a.y * s;
00066     c.z = a.z * s;
00067     return c;
00068 }
00069 
00070 __device__
00071 float3 operator* ( float s, const float3 & a )
00072 {
00073     return a * s;
00074 }
00075 
00076 __device__
00077 float3 operator/ ( const float3 & a, float s )
00078 {
00079     float3 c;
00080     c.x = a.x / s;
00081     c.y = a.y / s;
00082     c.z = a.z / s;
00083     return c;
00084 }
00085 
00086 
00087 // float4
00088 __device__
00089 float4 operator+ ( const float4 & a, const float4 & b )
00090 {
00091     float4 c;
00092     c.x = a.x + b.x;
00093     c.y = a.y + b.y;
00094     c.z = a.z + b.z;
00095     c.w = a.w + b.w;
00096     return c;
00097 }
00098 
00099 __device__
00100 float4 operator- ( const float4 & a, const float4 & b )
00101 {
00102     float4 c;
00103     c.x = a.x - b.x;
00104     c.y = a.y - b.y;
00105     c.z = a.z - b.z;
00106     c.w = a.w - b.w;
00107     return c;
00108 }
00109 
00110 __device__
00111 float4 operator* ( const float4 & a, float s )
00112 {
00113     float4 c;
00114     c.x = a.x * s;
00115     c.y = a.y * s;
00116     c.z = a.z * s;
00117     c.w = a.w * s;
00118     return c;
00119 }
00120 
00121 __device__
00122 float4 operator* ( float s, const float4 & a )
00123 {
00124     return a * s;
00125 }
00126 
00127 __device__
00128 float4 operator/ ( const float4 & a, float s )
00129 {
00130     float4 c;
00131     c.x = a.x / s;
00132     c.y = a.y / s;
00133     c.z = a.z / s;
00134     c.w = a.w / s;
00135     return c;
00136 }
00137 //-----------------------------------------------------------------------------
00138 // END: Operators for build-in data-types
00139 //=============================================================================
00140 
00141 
00142 //=============================================================================
00143 // START: 3-by-3 Matrix
00144 //-----------------------------------------------------------------------------
00146 struct matrix3x3 {
00147     float3 col[3];  
00148 };
00149 __device__
00150 struct matrix3x3 make_matrix3x3( 
00151     float e00, float e01, float e02,
00152     float e10, float e11, float e12,
00153     float e20, float e21, float e22 
00154 )
00155 {
00156     struct matrix3x3 A;
00157     A.col[0].x = e00;  A.col[0].y = e10;  A.col[0].z = e20;
00158     A.col[1].x = e01;  A.col[1].y = e11;  A.col[1].z = e21;
00159     A.col[2].x = e02;  A.col[2].y = e12;  A.col[2].z = e22;
00160     return A;
00161 }
00162 
00163 __device__
00164 struct matrix3x3 operator+ ( const struct matrix3x3 & A, const struct matrix3x3 & B )
00165 {
00166     struct matrix3x3 C;
00167     #pragma unroll 3
00168     for ( int i = 0; i < 3; ++i ) {
00169         C.col[i] = A.col[i] + B.col[i];
00170     }
00171     return C;
00172 }
00173 
00174 __device__
00175 struct matrix3x3 operator- ( const struct matrix3x3 & A, const struct matrix3x3 & B )
00176 {
00177     struct matrix3x3 C;
00178     #pragma unroll 3
00179     for ( int i = 0; i < 3; ++i ) {
00180         C.col[i] = A.col[i] - B.col[i];
00181     }
00182     return C;
00183 }
00184 
00185 __device__
00186 struct matrix3x3 operator* ( const struct matrix3x3 & A, float s )
00187 {
00188     struct matrix3x3 C;
00189     #pragma unroll 3
00190     for ( int i = 0; i < 3; ++i ) {
00191         C.col[i] = A.col[i] * s;
00192     }
00193     return C;
00194 }
00195 
00196 __device__
00197 struct matrix3x3 operator/ ( const struct matrix3x3 & A, float s )
00198 {
00199     struct matrix3x3 C;
00200     #pragma unroll 3
00201     for ( int i = 0; i < 3; ++i ) {
00202         C.col[i] = A.col[i] / s;
00203     }
00204     return C;
00205 }
00206 //-----------------------------------------------------------------------------
00207 // END: 3-by-3 Matrix
00208 //=============================================================================
00209 
00210 
00211 //=============================================================================
00212 // START: 4-by-4 Matrix
00213 //-----------------------------------------------------------------------------
00215 struct matrix4x4 {
00216     float4 col[4];  
00217 };
00218 
00219 __device__
00220 struct matrix4x4 make_matrix4x4( 
00221     float e00, float e01, float e02, float e03, 
00222     float e10, float e11, float e12, float e13, 
00223     float e20, float e21, float e22, float e23, 
00224     float e30, float e31, float e32, float e33
00225 )
00226 {
00227     struct matrix4x4 A;
00228     A.col[0].x = e00;  A.col[0].y = e10;  A.col[0].z = e20;  A.col[0].w = e30;
00229     A.col[1].x = e01;  A.col[1].y = e11;  A.col[1].z = e21;  A.col[1].w = e31;
00230     A.col[2].x = e02;  A.col[2].y = e12;  A.col[2].z = e22;  A.col[2].w = e32;
00231     A.col[3].x = e03;  A.col[3].y = e13;  A.col[3].z = e23;  A.col[3].w = e33;
00232     return A;
00233 }
00234 
00235 __device__
00236 struct matrix4x4 operator+ ( const struct matrix4x4 & A, const struct matrix4x4 & B )
00237 {
00238     struct matrix4x4 C;
00239     #pragma unroll 4
00240     for ( int i = 0; i < 4; ++i ) {
00241         C.col[i] = A.col[i] + B.col[i];
00242     }
00243     return C;
00244 }
00245 
00246 __device__
00247 struct matrix4x4 operator- ( const struct matrix4x4 & A, const struct matrix4x4 & B )
00248 {
00249     struct matrix4x4 C;
00250     #pragma unroll 4
00251     for ( int i = 0; i < 4; ++i ) {
00252         C.col[i] = A.col[i] - B.col[i];
00253     }
00254     return C;
00255 }
00256 
00257 __device__
00258 struct matrix4x4 operator* ( const struct matrix4x4 & A, float s )
00259 {
00260     struct matrix4x4 C;
00261     #pragma unroll 4
00262     for ( int i = 0; i < 4; ++i ) {
00263         C.col[i] = A.col[i] * s;
00264     }
00265     return C;
00266 }
00267 
00268 __device__
00269 struct matrix4x4 operator/ ( const struct matrix4x4 & A, float s )
00270 {
00271     struct matrix4x4 C;
00272     #pragma unroll 4
00273     for ( int i = 0; i < 4; ++i ) {
00274         C.col[i] = A.col[i] / s;
00275     }
00276     return C;
00277 }
00278 
00279 __device__
00280 float4 operator* ( const struct matrix4x4 & A, const float4 & v )
00281 {
00282     float4 r = make_float4(
00283         A.col[0].x*v.x + A.col[1].x*v.y + A.col[2].x*v.z + A.col[3].x*v.w,
00284         A.col[0].y*v.x + A.col[1].y*v.y + A.col[2].y*v.z + A.col[3].y*v.w,
00285         A.col[0].z*v.x + A.col[1].z*v.y + A.col[2].z*v.z + A.col[3].z*v.w,
00286         A.col[0].w*v.x + A.col[1].w*v.y + A.col[2].w*v.z + A.col[3].w*v.w
00287     );
00288     return r;
00289 }
00290 //-----------------------------------------------------------------------------
00291 // END: 4-by-4 Matrix
00292 //=============================================================================
00293 
00294 
00295 
00296 
00297 //=============================================================================
00298 //-----------------------------------------------------------------------------
00300 __device__
00301 float3 XYZ ( const float4 & V )
00302 {
00303     return make_float3( V.x, V.y, V.z );
00304 }
00305 
00307 __device__
00308 float4 XYZW ( const float4 & V, float w )
00309 {
00310     return make_float4( V.x, V.y, V.z, w );
00311 }
00312 
00314 __device__
00315 float3 Add ( const float3 & V, const float3 & W )
00316 {
00317     return make_float3( V.x+W.x, V.y+W.y, V.z+W.z );
00318 }
00319 
00321 __device__
00322 float4 Add ( const float4 & V, const float4 & W )
00323 {
00324     return make_float4( V.x+W.x, V.y+W.y, V.z+W.z, V.w+W.w );
00325 }
00326 
00328 __device__
00329 float3 Sub ( const float3 & V, const float3 & W )
00330 {
00331     return make_float3( V.x-W.x, V.y-W.y, V.z-W.z );
00332 }
00333 
00335 __device__
00336 float4 Sub ( const float4 & V, const float4 & W )
00337 {
00338     return make_float4( V.x-W.x, V.y-W.y, V.z-W.z, V.w-W.w );
00339 }
00340 
00343 __device__
00344 float InnerProduct ( const float3 & V, const float3 & W )
00345 {
00346     return V.x*W.x + V.y*W.y + V.z*W.z;
00347 }
00348 
00351 __device__
00352 float InnerProduct ( const float4 & V, const float4 & W )
00353 {
00354     return V.x*W.x + V.y*W.y + V.z*W.z + V.w*W.w;
00355 }
00356 
00358 __device__
00359 float3 Mul ( const float3 & V, float s )
00360 {
00361     return make_float3( V.x*s, V.y*s, V.z*s );
00362 }
00363 
00365 __device__
00366 float4 Mul ( const float4 & V, float s )
00367 {
00368     return make_float4( V.x*s, V.y*s, V.z*s, V.w*s );
00369 }
00370 
00372 __device__
00373 float3 Div ( const float3 & V, float s )
00374 {
00375     return make_float3( V.x/s, V.y/s, V.z/s );
00376 }
00377 
00379 __device__
00380 float4 Div ( const float4 & V, float s )
00381 {
00382     return make_float4( V.x/s, V.y/s, V.z/s, V.w/s );
00383 }
00384 
00386 __device__
00387 float LengthSquare ( const float3 & V )
00388 {
00389     return InnerProduct( V, V );
00390 }
00391 
00393 __device__
00394 float LengthSquare ( const float4 & V )
00395 {
00396     return InnerProduct( V, V );
00397 }
00398 
00400 __device__
00401 float Length ( const float3 & V )
00402 {
00403     return sqrtf( LengthSquare( V ) );
00404 }
00405 
00407 __device__
00408 float Length ( const float4 & V )
00409 {
00410     return sqrtf( LengthSquare( V ) );
00411 }
00412 
00414 __device__
00415 float3 Unit ( const float3 & V )
00416 {
00417     return Div( V, Length(V) );
00418 }
00419 
00421 __device__
00422 float4 Unit ( const float4 & V )
00423 {
00424     return Div( V, Length(V) );
00425 }
00426 //-----------------------------------------------------------------------------
00427 //=============================================================================
00428 
00429 
00430 //=============================================================================
00431 // START: Fns for Quaternion
00432 //-----------------------------------------------------------------------------
00434 __device__
00435 float4 QuaternionConjugate ( const float4 & Q )
00436 {
00437     return make_float4( Q.x, -Q.y, -Q.z, -Q.w );
00438 }
00439 
00441 __device__
00442 float QuaternionNormSquare ( const float4 & Q )
00443 {
00444     return Q.x*Q.x + Q.y*Q.y + Q.z*Q.z + Q.w*Q.w;
00445 }
00446 
00448 __device__
00449 float QuaternionNorm ( const float4 & Q )
00450 {
00451     return sqrtf( QuaternionNormSquare( Q ) );
00452 }
00453 
00455 __device__
00456 float4 QuaternionUnit ( const float4 & Q )
00457 {
00458     return Div( Q, QuaternionNorm(Q) );
00459 }
00460 
00462 __device__
00463 float4 QuaternionMul ( const float4 & Q1, const float4 & Q2 )
00464 {
00465     return make_float4(
00466         Q1.x*Q2.x - Q1.y*Q2.y - Q1.z*Q2.z - Q1.w*Q2.w,
00467         Q1.x*Q2.y + Q1.y*Q2.x + Q1.z*Q2.w - Q1.w*Q2.z,
00468         Q1.x*Q2.z + Q1.z*Q2.x + Q1.w*Q2.y - Q1.y*Q2.w,
00469         Q1.x*Q2.w + Q1.w*Q2.x + Q1.y*Q2.z - Q1.z*Q2.y
00470     );
00471 }
00472 
00474 __device__
00475 struct matrix3x3 QuatenionToRotationMatrix3x3 ( const float4 & Q )
00476 {
00477     float rr = Q.x*Q.x;
00478     float ii = Q.y*Q.y;
00479     float jj = Q.z*Q.z;
00480     float kk = Q.w*Q.w;
00481     float ri = Q.x*Q.y;
00482     float rj = Q.x*Q.z;
00483     float rk = Q.x*Q.w;
00484     float ij = Q.y*Q.z;
00485     float jk = Q.z*Q.w;
00486     float ik = Q.y*Q.w;
00487 
00488     struct matrix3x3 R = make_matrix3x3(
00489         rr+ii-jj-kk,    2*(ij-rk),    2*(ik+rj),
00490           2*(ij+rk),  rr-ii+jj-kk,    2*(jk-ri),
00491           2*(ik-rj),    2*(jk+ri),  rr-ii-jj+kk
00492     );
00493     return R;
00494 }
00495 
00497 __device__
00498 struct matrix4x4 QuatenionToRotationMatrix4x4 ( const float4 & Q )
00499 {
00500     float rr = Q.x*Q.x;
00501     float ii = Q.y*Q.y;
00502     float jj = Q.z*Q.z;
00503     float kk = Q.w*Q.w;
00504     float ri = Q.x*Q.y;
00505     float rj = Q.x*Q.z;
00506     float rk = Q.x*Q.w;
00507     float ij = Q.y*Q.z;
00508     float jk = Q.z*Q.w;
00509     float ik = Q.y*Q.w;
00510 
00511     struct matrix4x4 R = make_matrix4x4(
00512         rr+ii-jj-kk,    2*(ij-rk),    2*(ik+rj),  0,
00513           2*(ij+rk),  rr-ii+jj-kk,    2*(jk-ri),  0,
00514           2*(ik-rj),    2*(jk+ri),  rr-ii-jj+kk,  0,
00515              0,            0,            0,       1
00516     );
00517     return R;
00518 }
00519 
00520 //-----------------------------------------------------------------------------
00521 // END: Fns for Quaternion
00522 //=============================================================================
00523 END_NAMESPACE_TAPs__CUDA
00524 //-----------------------------------------------------------------------------
00525 //#include "TAPsCUDA_UsefulDeviceFns_Def.cu"
00526 #endif
00527 //34567890123456789012345678901234567890123456789012345678901234567890123456789
00528 //--+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines