TAPs 0.7.7.3
TAPsCUDA_GL_ModelElasticRod_Def.cu
Go to the documentation of this file.
00001 /******************************************************************************
00002 TAPsCUDA_VertexListModelElasticRod_Def.cu
00003 
00004 CUDA Definition for ModelElasticRod (i.e., cpp file).
00005 
00006 SUKITTI PUNAK   (09/03/2009)
00007 UPDATE          (09/18/2009)
00008 ******************************************************************************/
00009 
00010 #include "TAPsCUDA_GL_ModelElasticRod.cu"
00011 
00012 BEGIN_NAMESPACE_TAPs__CUDA
00013 //=============================================================================
00014 // CUDA for Drawing by OpenGL
00015 //-----------------------------------------------------------------------------
00016 //#if defined(__gl_h_) || defined(__GL_H__)
00017 
00018 __global__
00019 void GL__GenCylinderForElasticRodModel_CU ( 
00020     unsigned int numOfNodes,    
00021     unsigned int numOfThreads,  
00022     float4 *     pVertexData,   
00023     unsigned int numOfCrossSectionVertices, 
00024     float        radius         
00025 )
00026 {
00027     // Block index
00028     int bx = blockIdx.x;
00029     // Thread index
00030     int tx = threadIdx.x;
00031     // Vertex number
00032     int vertexNo = (numOfThreads * bx + tx);
00033     if ( vertexNo >= numOfNodes-1 ) return;
00034     
00035     // Index for output
00036     int idx = vertexNo * (numOfCrossSectionVertices+1) * 4 * 3;
00037     
00038     float4 curr_ori = tex1Dfetch( CudaTexOrientationList, vertexNo );
00039     matrix4x4 rotMat0 = QuatenionToRotationMatrix4x4( curr_ori );
00040     float4 next_ori = tex1Dfetch( CudaTexOrientationList, vertexNo+1 );
00041     matrix4x4 rotMat1 = QuatenionToRotationMatrix4x4( next_ori );
00042     float4 curr_pos = tex1Dfetch( CudaTexVertexList, vertexNo );
00043     matrix4x4 rotMatAvg = (rotMat1 + rotMat0) / 2.0f;
00044     float4 next_pos = tex1Dfetch( CudaTexVertexList, vertexNo+1 );
00045     float4 middlePt0 = (next_pos + curr_pos) / 2.0f;
00046     float4 next2_pos = tex1Dfetch( CudaTexVertexList, vertexNo+2 );
00047     float4 middlePt1 = (next2_pos + next_pos) / 2.0f;
00048     
00049     //__shared__ float3 cs_vertices[numOfCrossSectionVertices];
00050     float4 vertices[3][20];
00051     float4 normals[3][20];
00052     float4 texCoords[3][20];
00053     
00054     // Compute the cross section vertices, normals, and texture coordinates for middle point from pos0 to pos1
00055     for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00056         vertices[0][i] = rotMat0 * tex2D( CudaTexGLElasticRod, i, 0.0f );
00057         vertices[0][i].x *= radius;
00058         vertices[0][i].y *= radius;
00059         vertices[0][i].z *= radius;
00060         vertices[0][i].x += middlePt0.x;
00061         vertices[0][i].y += middlePt0.y;
00062         vertices[0][i].z += middlePt0.z;
00063         normals[0][i]  = rotMat0 * tex2D( CudaTexGLElasticRod, i, 1.0f );
00064         texCoords[0][i] = tex2D( CudaTexGLElasticRod, i, 2.0f );
00065     }
00066     // Compute the cross section vertices, normals, and texture coordinates for pos1
00067     for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00068         vertices[1][i] = rotMatAvg * tex2D( CudaTexGLElasticRod, i, 0.0f );
00069         vertices[1][i].x *= radius;
00070         vertices[1][i].y *= radius;
00071         vertices[1][i].z *= radius;
00072         vertices[1][i].x += next_pos.x;
00073         vertices[1][i].y += next_pos.y;
00074         vertices[1][i].z += next_pos.z;
00075         normals[1][i]  = rotMatAvg * tex2D( CudaTexGLElasticRod, i, 1.0f );
00076         texCoords[1][i] = tex2D( CudaTexGLElasticRod, i, 2.0f );
00077         texCoords[1][i].x = 0.5f;
00078     }
00079     // Compute the cross section vertices, normals, and texture coordinates for middle point from pos1 to pos2
00080     for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00081         vertices[2][i] = rotMat1 * tex2D( CudaTexGLElasticRod, i, 0.0f );
00082         vertices[2][i].x *= radius;
00083         vertices[2][i].y *= radius;
00084         vertices[2][i].z *= radius;
00085         vertices[2][i].x += middlePt1.x;
00086         vertices[2][i].y += middlePt1.y;
00087         vertices[2][i].z += middlePt1.z;
00088         normals[2][i]  = rotMat1 * tex2D( CudaTexGLElasticRod, i, 1.0f );
00089         texCoords[2][i] = tex2D( CudaTexGLElasticRod, i, 2.0f );
00090         texCoords[2][i].x = 1.0f;
00091     }
00092 
00093     {
00094         // Two triangle strips from middlePt0 to pos1 and from pos1 to middlePt1
00095         // Draw triangle strip from middlePt0 to pos1
00096         for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00097             pVertexData[idx++] = vertices[1][i];
00098             pVertexData[idx++] = normals[1][i];
00099             pVertexData[idx++] = texCoords[1][i];
00100 
00101             pVertexData[idx++] = vertices[0][i];
00102             pVertexData[idx++] = normals[0][i];
00103             pVertexData[idx++] = texCoords[0][i];
00104         }
00105         pVertexData[idx++] = vertices[1][0];
00106         pVertexData[idx++] = normals[1][0];
00107         pVertexData[idx++] = texCoords[1][0];
00108 
00109         pVertexData[idx++] = vertices[0][0];
00110         pVertexData[idx++] = normals[0][0];
00111         pVertexData[idx++] = texCoords[0][0];
00112 
00113         // Draw triangle strip from pos1 to middlePt1
00114         for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00115             pVertexData[idx++] = vertices[2][i];
00116             pVertexData[idx++] = normals[2][i];
00117             pVertexData[idx++] = texCoords[2][i];
00118 
00119             pVertexData[idx++] = vertices[1][i];
00120             pVertexData[idx++] = normals[1][i];
00121             pVertexData[idx++] = texCoords[1][i];
00122 
00123         }
00124         pVertexData[idx++] = vertices[2][0];
00125         pVertexData[idx++] = normals[2][0];
00126         pVertexData[idx++] = texCoords[2][0];
00127 
00128         pVertexData[idx++] = vertices[1][0];
00129         pVertexData[idx++] = normals[1][0];
00130         pVertexData[idx++] = texCoords[1][0];
00131     }
00132 }
00133 
00134 
00135 __global__
00136 void GL__GenCylinderForElasticRodModel_PLHM_CU ( 
00137     unsigned int numOfNodes,    
00138     unsigned int numOfThreads,  
00139     float *      pCenterlines,  
00140     float *      pOrientations, 
00141     float4 *     pVertexData,   
00142     unsigned int numOfCrossSectionVertices, 
00143     float        radius         
00144 )
00145 {
00146     // Block index
00147     int bx = blockIdx.x;
00148     // Thread index
00149     int tx = threadIdx.x;
00150     // Vertex number
00151     int vertexNo = (numOfThreads * bx + tx);
00152     if ( vertexNo >= numOfNodes-1 ) return;
00153     
00154     // Index for output
00155     int idx = vertexNo * (numOfCrossSectionVertices+1) * 4 * 3;
00156     
00157     int idx0 = vertexNo * 4;
00158     int idx1 = idx0+1;
00159     int idx2 = idx1+1;
00160     int idx3 = idx2+1;
00161     
00162     //float4 curr_ori = tex1Dfetch( CudaTexOrientationList, vertexNo );
00163     float4 curr_ori = make_float4( pOrientations[idx0], pOrientations[idx1], pOrientations[idx2], pOrientations[idx3] );
00164     matrix4x4 rotMat0 = QuatenionToRotationMatrix4x4( curr_ori );
00165     //float4 next_ori = tex1Dfetch( CudaTexOrientationList, vertexNo+1 );
00166     float4 next_ori = make_float4( pOrientations[idx0+4], pOrientations[idx1+4], pOrientations[idx2+4], pOrientations[idx3+4] );
00167     matrix4x4 rotMat1 = QuatenionToRotationMatrix4x4( next_ori );
00168     //float4 curr_pos = tex1Dfetch( CudaTexVertexList, vertexNo );
00169     float4 curr_pos = make_float4( pCenterlines[idx0], pCenterlines[idx1], pCenterlines[idx2], pCenterlines[idx3] );
00170     matrix4x4 rotMatAvg = (rotMat1 + rotMat0) / 2.0f;
00171     //float4 next_pos = tex1Dfetch( CudaTexVertexList, vertexNo+1 );
00172     float4 next_pos = make_float4( pCenterlines[idx0+4], pCenterlines[idx1+4], pCenterlines[idx2+4], pCenterlines[idx3+4] );
00173     float4 middlePt0 = (next_pos + curr_pos) / 2.0f;
00174     //float4 next2_pos = tex1Dfetch( CudaTexVertexList, vertexNo+2 );
00175     float4 next2_pos = make_float4( pCenterlines[idx0+8], pCenterlines[idx1+8], pCenterlines[idx2+8], pCenterlines[idx3+8] );
00176     float4 middlePt1 = (next2_pos + next_pos) / 2.0f;
00177     
00178     //__shared__ float3 cs_vertices[numOfCrossSectionVertices];
00179     float4 vertices[3][20];
00180     float4 normals[3][20];
00181     float4 texCoords[3][20];
00182     
00183     // Compute the cross section vertices, normals, and texture coordinates for middle point from pos0 to pos1
00184     for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00185         vertices[0][i] = rotMat0 * tex2D( CudaTexGLElasticRod, i, 0.0f );
00186         vertices[0][i].x *= radius;
00187         vertices[0][i].y *= radius;
00188         vertices[0][i].z *= radius;
00189         vertices[0][i].x += middlePt0.x;
00190         vertices[0][i].y += middlePt0.y;
00191         vertices[0][i].z += middlePt0.z;
00192         normals[0][i]  = rotMat0 * tex2D( CudaTexGLElasticRod, i, 1.0f );
00193         texCoords[0][i] = tex2D( CudaTexGLElasticRod, i, 2.0f );
00194     }
00195     // Compute the cross section vertices, normals, and texture coordinates for pos1
00196     for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00197         vertices[1][i] = rotMatAvg * tex2D( CudaTexGLElasticRod, i, 0.0f );
00198         vertices[1][i].x *= radius;
00199         vertices[1][i].y *= radius;
00200         vertices[1][i].z *= radius;
00201         vertices[1][i].x += next_pos.x;
00202         vertices[1][i].y += next_pos.y;
00203         vertices[1][i].z += next_pos.z;
00204         normals[1][i]  = rotMatAvg * tex2D( CudaTexGLElasticRod, i, 1.0f );
00205         texCoords[1][i] = tex2D( CudaTexGLElasticRod, i, 2.0f );
00206         texCoords[1][i].x = 0.5f;
00207     }
00208     // Compute the cross section vertices, normals, and texture coordinates for middle point from pos1 to pos2
00209     for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00210         vertices[2][i] = rotMat1 * tex2D( CudaTexGLElasticRod, i, 0.0f );
00211         vertices[2][i].x *= radius;
00212         vertices[2][i].y *= radius;
00213         vertices[2][i].z *= radius;
00214         vertices[2][i].x += middlePt1.x;
00215         vertices[2][i].y += middlePt1.y;
00216         vertices[2][i].z += middlePt1.z;
00217         normals[2][i]  = rotMat1 * tex2D( CudaTexGLElasticRod, i, 1.0f );
00218         texCoords[2][i] = tex2D( CudaTexGLElasticRod, i, 2.0f );
00219         texCoords[2][i].x = 1.0f;
00220     }
00221 
00222     {
00223         // Two triangle strips from middlePt0 to pos1 and from pos1 to middlePt1
00224         // Draw triangle strip from middlePt0 to pos1
00225         for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00226             pVertexData[idx++] = vertices[1][i];
00227             pVertexData[idx++] = normals[1][i];
00228             pVertexData[idx++] = texCoords[1][i];
00229 
00230             pVertexData[idx++] = vertices[0][i];
00231             pVertexData[idx++] = normals[0][i];
00232             pVertexData[idx++] = texCoords[0][i];
00233         }
00234         pVertexData[idx++] = vertices[1][0];
00235         pVertexData[idx++] = normals[1][0];
00236         pVertexData[idx++] = texCoords[1][0];
00237 
00238         pVertexData[idx++] = vertices[0][0];
00239         pVertexData[idx++] = normals[0][0];
00240         pVertexData[idx++] = texCoords[0][0];
00241 
00242         // Draw triangle strip from pos1 to middlePt1
00243         for ( int i = 0; i < numOfCrossSectionVertices; ++i ) {
00244             pVertexData[idx++] = vertices[2][i];
00245             pVertexData[idx++] = normals[2][i];
00246             pVertexData[idx++] = texCoords[2][i];
00247 
00248             pVertexData[idx++] = vertices[1][i];
00249             pVertexData[idx++] = normals[1][i];
00250             pVertexData[idx++] = texCoords[1][i];
00251 
00252         }
00253         pVertexData[idx++] = vertices[2][0];
00254         pVertexData[idx++] = normals[2][0];
00255         pVertexData[idx++] = texCoords[2][0];
00256 
00257         pVertexData[idx++] = vertices[1][0];
00258         pVertexData[idx++] = normals[1][0];
00259         pVertexData[idx++] = texCoords[1][0];
00260     }
00261 }
00262 
00263 
00264 // Is the texture for cross section's vertex data initialized?
00265 bool GL__IsInitailizeDataForElasticRodModel_Drawing ()
00266 { return CUDA_GL_ElasticRod::IsInitialized(); }
00267 
00268 // Initialize the CUDA/GL for elastic rod's drawing
00269 bool GL__InitailizeDataForElasticRodModel_Drawing (
00270     unsigned int    numberOfCrossSectionVertices,   // number of cross section's vertices
00271     float *         crossSectionVertexData,         // cross section's vertex data
00272     GLuint          vbo_GL                          // the OpenGL buffer object for vertex data
00273 )
00274 {
00275     return Cuda_GL_ElasticRod.Create( numberOfCrossSectionVertices, crossSectionVertexData, vbo_GL );
00276 }
00277 
00278 // Clear the CUDA/GL for elastic rod's drawing
00279 void GL__ClearDataForElasticRodModel_Drawing ()
00280 {
00281     Cuda_GL_ElasticRod.Clear();
00282 }
00283 
00284 // Compute the generalized cylinder for the elastic rod
00285 void GL__GenCylinderForElasticRodModel (
00286     unsigned int cudaID,        // CUDA ID
00287     unsigned int numOfNodes,    // number of nodes
00288     unsigned int numOfThreads,  // number of threads per CUDA thread block
00289     GLuint       vbo_GL,        // OpenGL vertex buffer object
00290     unsigned int numOfCrossSectionVertices, // number of cross section's vertices
00291     float        radius         // radius
00292 )
00293 {
00294     // Kernel invocation
00295     unsigned int numOfThreadBlocks = ( numOfNodes + numOfThreads - 1 ) / numOfThreads;
00296     dim3 Dg( numOfThreadBlocks, 1, 1 );
00297     dim3 Db( numOfThreads, 1, 1 );
00298     size_t Ns = 0;
00299     cudaStream_t S = 0;
00300 
00301     // Bind Textures
00302     DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfNodes );
00303     DATA_GlobalPool.DataForVertexList[cudaID]->BindOrientationList( numOfNodes );
00304     Cuda_GL_ElasticRod.BindCrossSectionVertices();
00305     // Map to GL
00306     float4 * pVertexData;
00307     CUDA_SAFE_CALL( cudaGLMapBufferObject( (void**)&pVertexData, vbo_GL ) );
00308     CUT_CHECK_ERROR( "Mapping an OpenGL buffer object FAILED!" );
00309 
00310     // Call the CUDA kernel
00311     GL__GenCylinderForElasticRodModel_CU<<< Dg, Db, Ns, S >>>( 
00312         numOfNodes, numOfThreads, pVertexData, numOfCrossSectionVertices, radius
00313     );
00314     CUT_CHECK_ERROR( "GenCylinderForElasticRodModel Kernel execution failed!" );
00315 
00316     // Unbind Textures
00317     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
00318     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindOrientationList();
00319     Cuda_GL_ElasticRod.UnbindCrossSectionVertices();
00320     // Unmap from GL
00321     CUDA_SAFE_CALL( cudaGLUnmapBufferObject( vbo_GL ) );
00322 }
00323 
00324 
00325 // Compute the generalized cylinder for the elastic rod (with CUDA's PLHM)
00326 void GL__GenCylinderForElasticRodModel_PLHM (
00327     unsigned int cudaID,        // CUDA ID
00328     unsigned int numOfNodes,    // number of nodes
00329     unsigned int numOfThreads,  // number of threads per CUDA thread block
00330     GLuint       vbo_GL,        // OpenGL vertex buffer object
00331     unsigned int numOfCrossSectionVertices, // number of cross section's vertices
00332     float        radius         // radius
00333 )
00334 {
00335     // Kernel invocation
00336     unsigned int numOfThreadBlocks = ( numOfNodes + numOfThreads - 1 ) / numOfThreads;
00337     dim3 Dg( numOfThreadBlocks, 1, 1 );
00338     dim3 Db( numOfThreads, 1, 1 );
00339     size_t Ns = 0;
00340     cudaStream_t S = 0;
00341 
00342     // Bind Texture
00343     Cuda_GL_ElasticRod.BindCrossSectionVertices();
00344     // Map to GL
00345     float4 * pVertexData;
00346     CUDA_SAFE_CALL( cudaGLMapBufferObject( (void**)&pVertexData, vbo_GL ) );
00347     CUT_CHECK_ERROR( "Mapping an OpenGL buffer object FAILED!" );
00348 
00349     // Call the CUDA kernel
00350     GL__GenCylinderForElasticRodModel_PLHM_CU<<< Dg, Db, Ns, S >>>( 
00351         numOfNodes, numOfThreads,
00352         DATA_GlobalPool.DataForVertexListPLHM[cudaID]->GetDevVertexList(),
00353         DATA_GlobalPool.DataForVertexListPLHM[cudaID]->GetDevOrientationList(),
00354         pVertexData, numOfCrossSectionVertices, radius
00355     );
00356     CUT_CHECK_ERROR( "GenCylinderForElasticRodModel Kernel execution failed!" );
00357 
00358     // Unbind Texture
00359     Cuda_GL_ElasticRod.UnbindCrossSectionVertices();
00360     // Unmap from GL
00361     CUDA_SAFE_CALL( cudaGLUnmapBufferObject( vbo_GL ) );
00362 }
00363 
00364 
00365 // class GL_ElasticRod ----------------------------------------------
00366 bool            CUDA_GL_ElasticRod::g_isInitialized = false;
00367 cudaArray *     CUDA_GL_ElasticRod::g_Ptr_Mem_CrossSectionVertexList = NULL;
00368 unsigned int    CUDA_GL_ElasticRod::g_numOfCrossSectionVertices = 0;
00369 unsigned int    CUDA_GL_ElasticRod::g_sizeOfCrossSectionVertexData = 0;
00370 
00371 // Constructor
00372 CUDA_GL_ElasticRod::CUDA_GL_ElasticRod ()
00373 {}
00374 
00375 // Destructor
00376 CUDA_GL_ElasticRod::~CUDA_GL_ElasticRod ()
00377 {
00378     Clear();
00379 }
00380 
00381 // Create
00382 bool CUDA_GL_ElasticRod::Create (
00383     unsigned int    numberOfCrossSectionVertices,   // number of cross section's vertices
00384     float *         crossSectionVertexData,         // cross section's vertex data
00385     GLuint          vbo_GL                          // the OpenGL buffer object for vertex data
00386 )
00387 {
00388     g_numOfCrossSectionVertices = numberOfCrossSectionVertices;
00389     if ( !g_isInitialized ) {
00390     
00391         cudaError_t error = cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() );
00392         //cudaError_t error = cudaGLSetGLDevice( 1 );
00393         if ( error != cudaSuccess ) {
00394             const char * str = cudaGetErrorString(error);
00395             printf( "CUDA ERROR: %i (%s)\n", error, str );
00396         }
00397         
00398         printf( "cutGetMaxGflopsDeviceId(): %i\n", cutGetMaxGflopsDeviceId() );
00399         
00400         size_t dimension_size = g_numOfCrossSectionVertices * sizeof(float4);   // in bytes
00401         g_sizeOfCrossSectionVertexData = dimension_size * 3;    // in bytes
00402         cudaChannelFormatDesc desc = cudaCreateChannelDesc( 32, 32, 32, 32, cudaChannelFormatKindFloat );
00403         CUDA_SAFE_CALL( cudaMallocArray( &g_Ptr_Mem_CrossSectionVertexList, &desc, g_numOfCrossSectionVertices, 3 ) );
00404         CUT_CHECK_ERROR( "cudaMallocArray for Cross Section's Vertex Data Failed!" );
00405         CUDA_SAFE_CALL( cudaMemcpyToArray( g_Ptr_Mem_CrossSectionVertexList, 0, 0, crossSectionVertexData, g_sizeOfCrossSectionVertexData, cudaMemcpyHostToDevice ) );
00406         CUT_CHECK_ERROR( "cudaMemcpyToArray for Cross Section's Vertex Data Failed!" );
00407         g_isInitialized = true;
00408     }
00409     
00410     cudaError_t ret = cudaGLRegisterBufferObject( vbo_GL );
00411     if ( ret != 0 ) {
00412         const char * str = cudaGetErrorString(ret);
00413         printf( "CUDA ERROR: %i (%s)\n", ret, str );
00414     }
00415     // ret = cudaGLSetBufferObjectMapFlags( vbo_GL, cudaGLMapFlagsWriteDiscard );
00416     // if ( ret != 0 ) {
00417         // const char * str = cudaGetErrorString(ret);
00418         // printf( "CUDA ERROR: %i (%s)\n", ret, str );
00419     // }
00420 
00421     return true;
00422 }
00423 
00424 // Clear
00425 void CUDA_GL_ElasticRod::Clear ()
00426 {
00427     UnbindCrossSectionVertices();
00428     if ( g_isInitialized ) {
00429         CUDA_SAFE_CALL( cudaFreeArray( g_Ptr_Mem_CrossSectionVertexList ) );
00430         g_isInitialized = false;
00431     }
00432 }
00433 // class CUDA_GL_ElasticRod ----------------------------------------------
00434 
00435 //#endif //#if defined(__gl_h_) || defined(__GL_H__)
00436 //-----------------------------------------------------------------------------
00437 // CUDA for Drawing by OpenGL
00438 //=============================================================================
00439 
00440 
00441 //-----------------------------------------------------------------------------
00442 //=============================================================================
00443 END_NAMESPACE_TAPs__CUDA
00444 //-----------------------------------------------------------------------------
00445 //34567890123456789012345678901234567890123456789012345678901234567890123456789
00446 //--+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines