TAPs 0.7.7.3
TAPsCUDA_VertexListModelElasticRod_Def.cu
Go to the documentation of this file.
00001 /******************************************************************************
00002 TAPsCUDA_VertexListModelElasticRod_Def.cu
00003 
00004 CUDA Definition for ModelElasticRod (i.e., cpp file).
00005 
00006 SUKITTI PUNAK   (09/03/2009)
00007 UPDATE          (09/24/2009)
00008 ******************************************************************************/
00009 
00010 #include "TAPsCUDA_VertexListModelElasticRod.cu"
00011 
00012 BEGIN_NAMESPACE_TAPs__CUDA
00013 //=============================================================================
00014 // CUDA Initialization and Clear Functions
00015 //-----------------------------------------------------------------------------
00017 bool InitailizeDataForElasticRodModel ( 
00018     unsigned int & cudaID,      // CUDA ID for the object
00019     unsigned int numOfNodes,    // number of nodes
00020     float * posList,            // list of centerlines' position -- xyzw
00021     float * prevPosList,        // list of centerlines' previous position -- xyzw
00022     float * oriList,            // list of orientations -- xyzw
00023     float * prevOriList,        // list of previous orientations -- xyzw
00024     float * intForceList,       // list of (internal) forces -- xyzw
00025     float * extForceList        // list of (external) forces -- xyzw
00026 )
00027 {
00028     // Here the Elastic Rod Model uses Vertex List Data
00029     // However, it has implicit connection (each vertex connect to the previous and next vertices).
00030     // So the connection list is set to zero.
00031 
00032     // Home vertex is unused for now.  Plan is to use home vertex for sticking suture to a surface.
00033 
00034     // Assign CUDA ID to the object
00035     // The object has to use cudaID to communicate with the CUDA.
00036     cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;
00037 
00038     // Allocate CUDA data for the object.
00039     DATA_Vertex_List * newData = new DATA_Vertex_List( 
00040         numOfNodes,     // total number of nodes
00041         false,          // include home positions into the data
00042         0,              // maximum vertex connection
00043         false,          // include simulation flags constraint into the data
00044         true,           // include orientations
00045         true,           // include previous orientations
00046         true,           // include forces (set 1) as for internal force
00047         true            // include forces (set 1) as for external force
00048     );
00049     AddToGlobal_Pool_Of_DATA_Vertex_List( newData );
00050 
00051     // Size of memory for (xyzw) vertices
00052     int size = numOfNodes * 4 * sizeof(float);
00053 
00054     // Copy Vertex List from host data to device data
00055     CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), posList, size, cudaMemcpyHostToDevice ) );
00056     // Copy Previous Vertex List from host data to device data
00057     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevPosList, size, cudaMemcpyHostToDevice ) );
00058     // Copy Home Vertex List from host data to device data
00059     //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
00060     // Copy Vertex Connection List from host data to device data
00062     CUDA_SAFE_CALL( cudaMemcpy( newData->GetOrientationList(), oriList, size, cudaMemcpyHostToDevice ) );
00063     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevOrientationList(), prevOriList, size, cudaMemcpyHostToDevice ) );
00064     CUDA_SAFE_CALL( cudaMemcpy( newData->GetForceList_1(), intForceList, size, cudaMemcpyHostToDevice ) );
00065     CUDA_SAFE_CALL( cudaMemcpy( newData->GetForceList_2(), extForceList, size, cudaMemcpyHostToDevice ) );
00066 
00067     return true;
00068 }
00069 
00070 
00071 // Clear CUDA Data for an Elastic Rod model (ModelElasticRod)
00072 void ClearDataForElasticRodModel ( unsigned int & cudaID )
00073 {
00074     if ( cudaID != 0 ) {
00075         delete DATA_GlobalPool.DataForVertexList[cudaID];
00076         cudaID = 0;
00077     }
00078 }
00079 //-----------------------------------------------------------------------------
00080 // CUDA Initialization and Clear Functions
00081 //=============================================================================
00082 
00083 
00116 /*
00118 __global__
00119 void Global__ModelElasticRod_AdvSim_CU_Centerline ( 
00120     unsigned int numOfNodes,    // number of nodes
00121     unsigned int numOfThreads,  // number of threads
00122     float currentTime,          // current time
00123     float timeStep,             // time step
00124     float radius,               // radius
00125     float length,               // centerline's rest length
00126     float length_ori,           // orientation's rest length
00127     float mass,                 // point mass
00128     float k_mdr,                // A constant value in proportion to material density and radius
00129     //float Kt,                 // kinetic translational constant
00130     //float3 Kr,                // kinetic rotational constant -- xyz
00131     //float Dt,                 // translational constant
00132     //float3 Dr,                // rotational dissipation constant -- xyz
00133     float Kconstraint_3rdDirAlignTangent,   // Kconstraint for aligning centerline's tangent with orientation's 3rd direction
00134     float Kvdamping,            // centerline's velocity damper
00135     float Ps,                   // potential stretch constant
00136     float3 Pb,                  // potential bend constant -- xyz
00137     float * out_data_1      // output data for centerlines
00138 )
00139 {
00140     // Block index
00141     int bx = blockIdx.x;
00142     // Thread index
00143     int tx = threadIdx.x;
00144     // Vertex number
00145     int vertexNo = (numOfThreads * bx + tx);
00146     if ( vertexNo >= numOfNodes )   return;
00147     // Index for output
00148     int idx = (numOfThreads * bx + tx) * 4;
00149 
00150     // Fetch data from texture linear memory
00151     float4 current_pos  = tex1Dfetch( CudaTexVertexList, vertexNo );
00152     float3 force = make_float3( 0.0f, 0.0f, 0.0f );
00153     
00154     if ( current_pos.w == 1 ) {
00155         force = Device__CalForce_ModelElasticRod( 
00156             vertexNo,       // vertex number
00157             numOfNodes,     // number of vertices
00158             radius,         // radius
00159             length,         // rest length
00160             mass,           // point mass
00161             //Kt,           // kinetic translational constant
00162             //Kr,           // kinetic rotational constant -- xyz
00163             //Dt,           // translational constant
00164             //Dr,           // rotational dissipation constant -- xyz
00165             Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction
00166             Kvdamping,      // centerline's velocity damper
00167             Ps,             // potential stretch constant
00168             Pb              // potential bend constant -- xyz
00169         );
00170 
00171         // Use a semi Euler integration to find the new velocity and position
00172         float3 new_vel = Device__EulerInt_NewVel_ModelElasticRod (
00173             force,              // force
00174             make_float3(0.0f, 0.0f, 0.0f),  // velocity
00175             mass,               // mass
00176             timeStep            // time step
00177         );
00178         float3 new_pos = Device__EulerInt_NewPos_ModelElasticRod (
00179             new_vel,            // velocity
00180             XYZ(current_pos),   // position
00181             timeStep            // time step
00182         );
00183         
00184         //  WARNING: out data have to be set at the end, otherwise it won't work!!! ???
00185         out_data_1[idx  ] = new_pos.x;
00186         out_data_1[idx+1] = new_pos.y;
00187         out_data_1[idx+2] = new_pos.z;
00188         out_data_1[idx+3] = current_pos.w;
00189     }
00190     else {
00191         out_data_1[idx  ] = current_pos.x;
00192         out_data_1[idx+1] = current_pos.y;
00193         out_data_1[idx+2] = current_pos.z;
00194         out_data_1[idx+3] = current_pos.w;
00195     }
00196     // Synchronize to make sure the data are loaded
00197     //__syncthreads();
00198     
00199     //out_data_1[idx  ] = current_pos.x;
00200     //out_data_1[idx+1] = current_pos.y;
00201     //out_data_1[idx+2] = current_pos.z;
00202     //out_data_1[idx+3] = current_pos.w;
00203 }
00204 */
00205 
00206 
00207 /*
00209 __global__
00210 void Global__ModelElasticRod_AdvSim_CU_Orientation ( 
00211     unsigned int numOfNodes,    // number of nodes
00212     unsigned int numOfThreads,  // number of threads
00213     float currentTime,          // current time
00214     float timeStep,             // time step
00215     float radius,               // radius
00216     float length,               // centerline's rest length
00217     float length_ori,           // orientation's rest length
00218     float mass,                 // point mass
00219     float k_mdr,                // A constant value in proportion to material density and radius
00220     //float Kt,                 // kinetic translational constant
00221     //float3 Kr,                // kinetic rotational constant -- xyz
00222     //float Dt,                 // translational constant
00223     //float3 Dr,                // rotational dissipation constant -- xyz
00224     float Kconstraint_3rdDirAlignTangent,   // Kconstraint for aligning centerline's tangent with orientation's 3rd direction
00225     float Kvdamping,            // centerline's velocity damper
00226     float Ps,                   // potential stretch constant
00227     float3 Pb,                  // potential bend constant -- xyz
00228     float * out_data_2      // output data for centerlines
00229 )
00230 {
00231     // Block index
00232     int bx = blockIdx.x;
00233     // Thread index
00234     int tx = threadIdx.x;
00235     // Vertex number
00236     int vertexNo = (numOfThreads * bx + tx);
00237     if ( vertexNo >= numOfNodes - 1 )   return;
00238     // Index for output
00239     int idx = (numOfThreads * bx + tx) * 4;
00240 
00241     // Fetch data from texture linear memory
00242     float4 current_pos  = tex1Dfetch( CudaTexVertexList, vertexNo );
00243     float4 current_ori = tex1Dfetch( CudaTexOrientationList, vertexNo );
00244     float3 force = make_float3( 0.0f, 0.0f, 0.0f );
00245     float4 torque = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
00246     
00247     if ( current_pos.w == 1 ) {
00248         torque = Device__CalTorque_ModelElasticRod( 
00249             vertexNo,       // vertex number
00250             numOfNodes,     // number of vertices
00251             radius,         // radius
00252             length,         // rest length
00253             mass,           // point mass
00254             //Kt,           // kinetic translational constant
00255             //Kr,           // kinetic rotational constant -- xyz
00256             //Dt,           // translational constant
00257             //Dr,           // rotational dissipation constant -- xyz
00258             Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction
00259             Kvdamping,      // centerline's velocity damper
00260             Ps,             // potential stretch constant
00261             Pb              // potential bend constant -- xyz
00262         );
00263         
00264         //torque = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
00265         
00266         // Use a semi Euler integration to find the new orientation
00267         //torque = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );   // DEBUG
00268         float4 new_ori = Device__EulerInt_NewOri_ModelElasticRod (
00269             k_mdr,          // an constant value in proportion to material density and radius
00270             length_ori,     // orientation's rest length
00271             current_ori,    // orientation
00272             torque,         // torque in 4-dimension
00273             timeStep        // time step
00274         );
00275         
00276         // Normalize the new orientation
00277         //new_ori = current_ori;    // DEBUG
00278         new_ori = QuaternionUnit( new_ori );
00279         
00280         //  WARNING: out data have to be set at the end, otherwise it won't work!!! ???
00281         out_data_2[idx  ] = new_ori.x;
00282         out_data_2[idx+1] = new_ori.y;
00283         out_data_2[idx+2] = new_ori.z;
00284         out_data_2[idx+3] = new_ori.w;
00285     }
00286     else {
00287         out_data_2[idx  ] = current_ori.x;
00288         out_data_2[idx+1] = current_ori.y;
00289         out_data_2[idx+2] = current_ori.z;
00290         out_data_2[idx+3] = current_ori.w;
00291     }
00292     // Synchronize to make sure the data are loaded
00293     //__syncthreads();
00294 }
00295 */
00296 
00298 __global__
00299 void Global__ModelElasticRod_AdvSim_CU (
00300     unsigned int numOfNodes,    // number of nodes
00301     unsigned int numOfThreads,  // number of threads
00302     float currentTime,          // current time
00303     float timeStep,             // time step
00304     float radius,               // radius
00305     float length,               // centerline's rest length
00306     float length_ori,           // orientation's rest length
00307     float mass,                 // point mass
00308     float k_mdr,                // A constant value in proportion to material density and radius
00309     //float Kt,                 // kinetic translational constant
00310     //float3 Kr,                // kinetic rotational constant -- xyz
00311     //float Dt,                 // translational constant
00312     //float3 Dr,                // rotational dissipation constant -- xyz
00313     float Kconstraint_3rdDirAlignTangent,   // Kconstraint for aligning centerline's tangent with orientation's 3rd direction
00314     float Kvdamping,            // centerline's velocity damper
00315     float Ps,                   // potential stretch constant
00316     float3 Pb,                  // potential bend constant -- xyz
00317     float * out_data_1,     // output data for centerlines
00318     float * out_data_2      // output data for orientations
00319 )
00320 {
00321     // Block index
00322     int bx = blockIdx.x;
00323     // Thread index
00324     int tx = threadIdx.x;
00325     // Vertex number
00326     int vertexNo = (numOfThreads * bx + tx);
00327     
00328     #ifdef  __DEVICE_EMULATION__
00329     printf( "(vertexNo: %i\n", vertexNo );
00330     #endif//__DEVICE_EMULATION__
00331     
00332     // DEBUG -- vertex#0 is set somewhere else or fixed
00333     //if ( vertexNo == 0 ) return;
00334 
00335     if ( vertexNo >= numOfNodes )   return;
00336     // Index for output
00337     int idx = vertexNo * 4;
00338 
00339     // Fetch data from texture linear memory
00340     float4 curr_pos  = tex1Dfetch( CudaTexVertexList, vertexNo );
00341     float4 curr_ori  = tex1Dfetch( CudaTexOrientationList, vertexNo );
00342     float4 int_force = tex1Dfetch( CudaTexForceList_1, vertexNo );
00343     float4 ext_force = tex1Dfetch( CudaTexForceList_2, vertexNo );
00344     float3 force  = make_float3( 0.0f, 0.0f, 0.0f );
00345     float4 torque = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
00346     
00347     float4 prev_pos = tex1Dfetch( CudaTexVertexList, vertexNo-1 );
00348     float4 next_pos = tex1Dfetch( CudaTexVertexList, vertexNo+1 );
00349     float4 prev_ori = tex1Dfetch( CudaTexOrientationList, vertexNo-1 );
00350     float4 next_ori = tex1Dfetch( CudaTexOrientationList, vertexNo+1 );
00351 
00352     #ifdef  __DEVICE_EMULATION__
00353     printf( "prev_pos: %f %f %f\n", prev_pos.x, prev_pos.y, prev_pos.z );
00354     printf( "prev_ori: %f %f %f %f\n", prev_ori.x, prev_ori.y, prev_ori.z, prev_ori.w );
00355     printf( "curr_pos: %f %f %f\n", curr_pos.x, curr_pos.y, curr_pos.z );
00356     printf( "curr_ori: %f %f %f %f\n", curr_ori.x, curr_ori.y, curr_ori.z, curr_ori.w );
00357     printf( "next_pos: %f %f %f\n", next_pos.x, next_pos.y, next_pos.z );
00358     printf( "next_ori: %f %f %f %f\n", next_ori.x, next_ori.y, next_ori.z, next_ori.w );
00359     printf( "int_force: %f %f %f %f\n", int_force.x, int_force.y, int_force.z, int_force.w );
00360     printf( "ext_force: %f %f %f %f\n", ext_force.x, ext_force.y, ext_force.z, ext_force.w );
00361     #endif
00362     
00363     if ( curr_pos.w == 1 ) {
00364         force = Device__CalForce_ModelElasticRod( 
00365             vertexNo,       // vertex number
00366             numOfNodes,     // number of vertices
00367             prev_pos,       // previous position
00368             curr_pos,       // current position
00369             next_pos,       // next position
00370             prev_ori,       // previous orientation
00371             curr_ori,       // current orientation
00372             radius,         // radius
00373             length,         // rest length
00374             mass,           // point mass
00375             //Kt,           // kinetic translational constant
00376             //Kr,           // kinetic rotational constant -- xyz
00377             //Dt,           // translational constant
00378             //Dr,           // rotational dissipation constant -- xyz
00379             Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction
00380             Kvdamping,      // centerline's velocity damper
00381             Ps,             // potential stretch constant
00382             Pb              // potential bend constant -- xyz
00383         );
00384         
00385         torque = Device__CalTorque_ModelElasticRod(
00386             vertexNo,       // vertex number
00387             numOfNodes,     // number of vertices
00388             curr_pos,       // current position
00389             next_pos,       // next position
00390             prev_ori,       // previous orientation
00391             curr_ori,       // current orientation
00392             next_ori,       // next orientation
00393             radius,         // radius
00394             length,         // rest length
00395             mass,           // point mass
00396             //Kt,           // kinetic translational constant
00397             //Kr,           // kinetic rotational constant -- xyz
00398             //Dt,           // translational constant
00399             //Dr,           // rotational dissipation constant -- xyz
00400             Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction
00401             Kvdamping,      // centerline's velocity damper
00402             Ps,             // potential stretch constant
00403             Pb              // potential bend constant -- xyz
00404         );
00405         
00406         force.x += ext_force.x;
00407         force.y += ext_force.y;
00408         force.z += ext_force.z;
00409 
00410         force.x += int_force.x;
00411         force.y += int_force.y;
00412         force.z += int_force.z;
00413 
00414         // Use a semi Euler integration to find the new velocity and position
00415         float3 new_vel = Device__EulerInt_NewVel_ModelElasticRod (
00416             force,          // force
00417             make_float3(0.0f, 0.0f, 0.0f),  // velocity
00418             mass,           // mass
00419             timeStep        // time step
00420         );
00421         float3 new_pos = Device__EulerInt_NewPos_ModelElasticRod (
00422             new_vel,        // velocity
00423             XYZ(curr_pos),  // position
00424             timeStep        // time step
00425         );
00426         
00427         #ifdef  __DEVICE_EMULATION__
00428         printf( "mass: %f\n", mass );
00429         printf( "timeStep: %f\n", timeStep );
00430         printf( "Force: %f %f %f\n", force.x, force.y, force.z );
00431         printf( "New Vel: %f %f %f\n", new_vel.x, new_vel.y, new_vel.z );
00432         printf( "New Pos: %f %f %f\n", new_pos.x, new_pos.y, new_pos.z );
00433         #endif
00434 
00435         // Use a semi Euler integration to find the new orientation
00436         float4 new_ori = Device__EulerInt_NewOri_ModelElasticRod (
00437             k_mdr,          // an constant value in proportion to material density and radius
00438             length_ori,     // orientation's rest length
00439             curr_ori,       // orientation
00440             torque,         // torque in 4-dimension
00441             timeStep        // time step
00442         );
00443 
00444         // Normalize the new orientation
00445         new_ori = QuaternionUnit( new_ori );
00446         
00447         //  WARNING: out data have to be set at the end, otherwise it won't work!!!
00448         out_data_1[idx  ] = new_pos.x;
00449         out_data_1[idx+1] = new_pos.y;
00450         out_data_1[idx+2] = new_pos.z;
00451         out_data_1[idx+3] = curr_pos.w;
00452 
00453         out_data_2[idx  ] = new_ori.x;
00454         out_data_2[idx+1] = new_ori.y;
00455         out_data_2[idx+2] = new_ori.z;
00456         out_data_2[idx+3] = new_ori.w;
00457     }
00458     else {
00459         out_data_1[idx  ] = curr_pos.x;
00460         out_data_1[idx+1] = curr_pos.y;
00461         out_data_1[idx+2] = curr_pos.z;
00462         out_data_1[idx+3] = curr_pos.w;
00463 
00464         out_data_2[idx  ] = curr_ori.x;
00465         out_data_2[idx+1] = curr_ori.y;
00466         out_data_2[idx+2] = curr_ori.z;
00467         out_data_2[idx+3] = curr_ori.w;
00468     }
00469 }
00470 //-----------------------------------------------------------------------------
00471 // CUDA Device Functions
00472 //=============================================================================
00473 
00474 
00475 //=============================================================================
00476 // CUDA Wrapper Functions
00477 //-----------------------------------------------------------------------------
00478 void Global__ModelElasticRod_AdvSim ( 
00479     unsigned int cudaID,        // CUDA ID
00480     unsigned int numOfNodes,    // number of nodes
00481     unsigned int numOfThreads,  // number of threads per CUDA thread block
00482     float currentTime,          // current time
00483     float timeStep,             // time step
00484     int numOfSubSteps,          // number of sub-steps
00485     float radius,               // radius
00486     float length,               // rest length
00487     float length_ori,           // orientation's rest length    
00488     float mass,                 // point mass
00489     float material_density,     // material density
00490     //float Kt,                 // kinetic translational constant
00491     //float Kr_x,               // kinetic rotational constant -- x
00492     //float Kr_y,               // kinetic rotational constant -- y
00493     //float Kr_z,               // kinetic rotational constant -- z
00494     //float Dt,                 // translational constant
00495     //float Dr_x,               // rotational dissipation constant -- x
00496     //float Dr_y,               // rotational dissipation constant -- y
00497     //float Dr_z,               // rotational dissipation constant -- z
00498     float Kconstraint_3rdDirAlignTangent,   // Kconstraint for aligning centerline's tangent with orientation's 3rd direction
00499     float Kvdamping,            // centerline's velocity damper
00500     float Ps,                   // potential stretch constant
00501     float Pb_x,                 // potential bend constant -- x
00502     float Pb_y,                 // potential bend constant -- y
00503     float Pb_z,                 // potential bend constant -- z
00504     float * host_pos_data,      // host's position data
00505     float * host_prev_pos_data, // host's previous position data
00506     float * host_ori_data,      // host's orientation data
00507     float * host_prev_ori_data, // host's previous orientation data
00508     float * host_int_force_data,    // host's (internal) force data -- xyzw
00509     float * host_ext_force_data     // host's (external) force data -- xyzw
00510 )
00511 {
00512     //printf( "FILE:%s LINE:%d Global__ModelElasticRod_AdvSim\n", __FILE__, __LINE__ );
00513     //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
00514     //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
00515     //printf( "Number of vertices: %d\n", numOfVertices );
00516     //printf( "Number of threads: %d\n", numOfThreads );
00517     //printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads );
00518 
00519     // Allocate device memories for out data
00520     unsigned int size = sizeof(float) * numOfNodes * 4;
00521     float * out_data_1;     // for centerlines' position
00522     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_1, size ) );
00523     float * out_data_2;     // for orientations
00524     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_2, size ) );
00525 
00526     // Copy Vertex List (and Previous Vertex List) from host data to device data, 
00527     // since vertices are dynamically changed.
00528     // While home vertices and connection list are unchanged.
00529     CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_pos_data, size, cudaMemcpyHostToDevice ) );
00530     // The Previous Vertex List can be saved in CUDA memory.
00531     // Hence, the Previous Vertex List does not have to be updated.
00532     //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_pos_data, size, cudaMemcpyHostToDevice ) );
00533     CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetOrientationList(), host_ori_data, size, cudaMemcpyHostToDevice ) );
00534     CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetForceList_1(), host_int_force_data, size, cudaMemcpyHostToDevice ) );
00535     CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetForceList_2(), host_ext_force_data, size, cudaMemcpyHostToDevice ) );
00536 
00537     // Kernel invocation
00538     unsigned int numOfThreadBlocks = ( numOfNodes + numOfThreads - 1 ) / numOfThreads;
00539     dim3 Dg( numOfThreadBlocks, 1, 1 );
00540     dim3 Db( numOfThreads, 1, 1 );
00541     size_t Ns = 0;
00542     cudaStream_t S = 0;
00543 
00544     // Bind Textures
00545     DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfNodes );
00546     DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfNodes );
00547     //DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfNodes );
00548     DATA_GlobalPool.DataForVertexList[cudaID]->BindOrientationList( numOfNodes );
00549     DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevOrientationList( numOfNodes );
00550     DATA_GlobalPool.DataForVertexList[cudaID]->BindForceList_1( numOfNodes );
00551     DATA_GlobalPool.DataForVertexList[cudaID]->BindForceList_2( numOfNodes );
00552 
00553     #if ER_TIMING_ADV_SIM != 0
00554         static bool isFirstRun = true;
00555         cudaEvent_t start, stop;
00556         float time;
00557         static unsigned int counts = 0;
00558         static float totalTime = 0.0f;
00559         cudaEventCreate( &start );
00560         cudaEventCreate( &stop );
00561     #endif
00562     
00563     {
00564         //float k_mdr = material_density * radius * radius * K_PI;
00565         
00566         float k_mdr = material_density * radius * radius * 3.1415926535897932384626433832795f;
00567         
00568         //printf( "%f = %f * %f * %f * %f\n", k_mdr, material_density, radius, radius, K_PI );
00569 
00570         float subTimeStep = timeStep / numOfSubSteps;
00571         float3 Pb = make_float3( Pb_x, Pb_y, Pb_z );
00572         for ( int i = 1; i <= numOfSubSteps; ++i ) {
00573             //printf( "CUDA: numOfSubSteps %i\n", i );
00574         
00575             #if ER_TIMING_ADV_SIM != 0
00576                 cudaEventRecord( start, 0 );
00577             #endif
00578         
00579             /*
00580             // Call the CUDA kernel
00581             Global__ModelElasticRod_AdvSim_CU_Orientation<<< Dg, Db, Ns, S >>>( 
00582                 numOfNodes, numOfThreads, currentTime, subTimeStep, 
00583                 radius, length, length_ori, mass, k_mdr,
00584                 Kconstraint_3rdDirAlignTangent, Kvdamping,
00585                 Ps, Pb,
00586                 out_data_2
00587             );
00588             */
00589 
00590             /*
00591             // Call the CUDA kernel
00592             Global__ModelElasticRod_AdvSim_CU_Centerline<<< Dg, Db, Ns, S >>>( 
00593                 numOfNodes, numOfThreads, currentTime, subTimeStep, 
00594                 radius, length, length_ori, mass, k_mdr,
00595                 Kconstraint_3rdDirAlignTangent, Kvdamping,
00596                 Ps, Pb,
00597                 out_data_1
00598             );
00599             */
00600             
00601             // Call the CUDA kernel (Choice 1) -- combine centerline and orientation
00602             Global__ModelElasticRod_AdvSim_CU<<< Dg, Db, Ns, S >>>( 
00603                 numOfNodes, numOfThreads, currentTime, subTimeStep, 
00604                 radius, length, length_ori, mass, k_mdr,
00605                 Kconstraint_3rdDirAlignTangent, Kvdamping,
00606                 Ps, Pb,
00607                 out_data_1, out_data_2
00608             );
00609             
00610             //cudaThreadSynchronize();
00611             
00612             // Swap pointers for Vertex List and Previous Vertex List
00613             // So that the Previous Vertex List is remain unchanged in the CUDA memory.
00614             DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
00615 
00616             // Copy data to the device's memory for (current) Vertex List
00617             CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data_1, size, cudaMemcpyDeviceToDevice ) );
00618             CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetOrientationList(), out_data_2, size, cudaMemcpyDeviceToDevice ) );
00619 
00620             currentTime += subTimeStep;
00621             
00622             #if ER_TIMING_ADV_SIM != 0
00623                 cudaEventRecord( stop, 0 );
00624                 cudaEventSynchronize( stop ); 
00625                 cudaEventElapsedTime( &time, start, stop );
00626                 if ( !isFirstRun ) {
00627                     totalTime += time;
00628                     ++counts;
00629                 }
00630             #endif
00631         }
00632     }
00633 
00634     #if ER_TIMING_ADV_SIM != 0
00635         if ( isFirstRun )   isFirstRun = false;
00636         cudaEventDestroy( start );
00637         cudaEventDestroy( stop );
00638         printf( "CUDA Kernal TotalTime %f; Counts %i\n", totalTime, counts );
00639     #endif
00640 
00641     // Unbind Textures
00642     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
00643     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
00644     //DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList();
00645     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindOrientationList();
00646     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevOrientationList();
00647     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindForceList_1();
00648     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindForceList_2();
00649 
00650     // Check if kernel execution generated and error
00651     CUT_CHECK_ERROR( "Kernel execution failed!" );
00652 
00653     // Copy output data to host data Previous Vertex List
00654     // The host will have to swap its pointers (for current and previous vertex list)
00655     CUDA_SAFE_CALL( cudaMemcpy( host_prev_pos_data, out_data_1, size, cudaMemcpyDeviceToHost ) );
00656     CUDA_SAFE_CALL( cudaMemcpy( host_prev_ori_data, out_data_2, size, cudaMemcpyDeviceToHost ) );
00657 
00658     //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data );
00659 
00660     // Free device memories
00661     CUDA_SAFE_CALL( cudaFree( out_data_1 ) );
00662     CUDA_SAFE_CALL( cudaFree( out_data_2 ) );
00663 }
00664 //-----------------------------------------------------------------------------
00665 // CUDA Wrapper Functions
00666 //=============================================================================
00667 END_NAMESPACE_TAPs__CUDA
00668 //-----------------------------------------------------------------------------
00669 //34567890123456789012345678901234567890123456789012345678901234567890123456789
00670 //--+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines