![]() |
TAPs 0.7.7.3
|
00001 /****************************************************************************** 00002 TAPsCUDA_VertexListModelElasticRod_Def.cu 00003 00004 CUDA Definition for ModelElasticRod (i.e., cpp file). 00005 00006 SUKITTI PUNAK (09/03/2009) 00007 UPDATE (09/24/2009) 00008 ******************************************************************************/ 00009 00010 #include "TAPsCUDA_VertexListModelElasticRod.cu" 00011 00012 BEGIN_NAMESPACE_TAPs__CUDA 00013 //============================================================================= 00014 // CUDA Initialization and Clear Functions 00015 //----------------------------------------------------------------------------- 00017 bool InitailizeDataForElasticRodModel ( 00018 unsigned int & cudaID, // CUDA ID for the object 00019 unsigned int numOfNodes, // number of nodes 00020 float * posList, // list of centerlines' position -- xyzw 00021 float * prevPosList, // list of centerlines' previous position -- xyzw 00022 float * oriList, // list of orientations -- xyzw 00023 float * prevOriList, // list of previous orientations -- xyzw 00024 float * intForceList, // list of (internal) forces -- xyzw 00025 float * extForceList // list of (external) forces -- xyzw 00026 ) 00027 { 00028 // Here the Elastic Rod Model uses Vertex List Data 00029 // However, it has implicit connection (each vertex connect to the previous and next vertices). 00030 // So the connection list is set to zero. 00031 00032 // Home vertex is unused for now. Plan is to use home vertex for sticking suture to a surface. 00033 00034 // Assign CUDA ID to the object 00035 // The object has to use cudaID to communicate with the CUDA. 00036 cudaID = DATA_GlobalPool.SizeOfGlobal_Pool; 00037 00038 // Allocate CUDA data for the object. 00039 DATA_Vertex_List * newData = new DATA_Vertex_List( 00040 numOfNodes, // total number of nodes 00041 false, // include home positions into the data 00042 0, // maximum vertex connection 00043 false, // include simulation flags constraint into the data 00044 true, // include orientations 00045 true, // include previous orientations 00046 true, // include forces (set 1) as for internal force 00047 true // include forces (set 1) as for external force 00048 ); 00049 AddToGlobal_Pool_Of_DATA_Vertex_List( newData ); 00050 00051 // Size of memory for (xyzw) vertices 00052 int size = numOfNodes * 4 * sizeof(float); 00053 00054 // Copy Vertex List from host data to device data 00055 CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), posList, size, cudaMemcpyHostToDevice ) ); 00056 // Copy Previous Vertex List from host data to device data 00057 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevPosList, size, cudaMemcpyHostToDevice ) ); 00058 // Copy Home Vertex List from host data to device data 00059 //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) ); 00060 // Copy Vertex Connection List from host data to device data 00062 CUDA_SAFE_CALL( cudaMemcpy( newData->GetOrientationList(), oriList, size, cudaMemcpyHostToDevice ) ); 00063 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevOrientationList(), prevOriList, size, cudaMemcpyHostToDevice ) ); 00064 CUDA_SAFE_CALL( cudaMemcpy( newData->GetForceList_1(), intForceList, size, cudaMemcpyHostToDevice ) ); 00065 CUDA_SAFE_CALL( cudaMemcpy( newData->GetForceList_2(), extForceList, size, cudaMemcpyHostToDevice ) ); 00066 00067 return true; 00068 } 00069 00070 00071 // Clear CUDA Data for an Elastic Rod model (ModelElasticRod) 00072 void ClearDataForElasticRodModel ( unsigned int & cudaID ) 00073 { 00074 if ( cudaID != 0 ) { 00075 delete DATA_GlobalPool.DataForVertexList[cudaID]; 00076 cudaID = 0; 00077 } 00078 } 00079 //----------------------------------------------------------------------------- 00080 // CUDA Initialization and Clear Functions 00081 //============================================================================= 00082 00083 00116 /* 00118 __global__ 00119 void Global__ModelElasticRod_AdvSim_CU_Centerline ( 00120 unsigned int numOfNodes, // number of nodes 00121 unsigned int numOfThreads, // number of threads 00122 float currentTime, // current time 00123 float timeStep, // time step 00124 float radius, // radius 00125 float length, // centerline's rest length 00126 float length_ori, // orientation's rest length 00127 float mass, // point mass 00128 float k_mdr, // A constant value in proportion to material density and radius 00129 //float Kt, // kinetic translational constant 00130 //float3 Kr, // kinetic rotational constant -- xyz 00131 //float Dt, // translational constant 00132 //float3 Dr, // rotational dissipation constant -- xyz 00133 float Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction 00134 float Kvdamping, // centerline's velocity damper 00135 float Ps, // potential stretch constant 00136 float3 Pb, // potential bend constant -- xyz 00137 float * out_data_1 // output data for centerlines 00138 ) 00139 { 00140 // Block index 00141 int bx = blockIdx.x; 00142 // Thread index 00143 int tx = threadIdx.x; 00144 // Vertex number 00145 int vertexNo = (numOfThreads * bx + tx); 00146 if ( vertexNo >= numOfNodes ) return; 00147 // Index for output 00148 int idx = (numOfThreads * bx + tx) * 4; 00149 00150 // Fetch data from texture linear memory 00151 float4 current_pos = tex1Dfetch( CudaTexVertexList, vertexNo ); 00152 float3 force = make_float3( 0.0f, 0.0f, 0.0f ); 00153 00154 if ( current_pos.w == 1 ) { 00155 force = Device__CalForce_ModelElasticRod( 00156 vertexNo, // vertex number 00157 numOfNodes, // number of vertices 00158 radius, // radius 00159 length, // rest length 00160 mass, // point mass 00161 //Kt, // kinetic translational constant 00162 //Kr, // kinetic rotational constant -- xyz 00163 //Dt, // translational constant 00164 //Dr, // rotational dissipation constant -- xyz 00165 Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction 00166 Kvdamping, // centerline's velocity damper 00167 Ps, // potential stretch constant 00168 Pb // potential bend constant -- xyz 00169 ); 00170 00171 // Use a semi Euler integration to find the new velocity and position 00172 float3 new_vel = Device__EulerInt_NewVel_ModelElasticRod ( 00173 force, // force 00174 make_float3(0.0f, 0.0f, 0.0f), // velocity 00175 mass, // mass 00176 timeStep // time step 00177 ); 00178 float3 new_pos = Device__EulerInt_NewPos_ModelElasticRod ( 00179 new_vel, // velocity 00180 XYZ(current_pos), // position 00181 timeStep // time step 00182 ); 00183 00184 // WARNING: out data have to be set at the end, otherwise it won't work!!! ??? 00185 out_data_1[idx ] = new_pos.x; 00186 out_data_1[idx+1] = new_pos.y; 00187 out_data_1[idx+2] = new_pos.z; 00188 out_data_1[idx+3] = current_pos.w; 00189 } 00190 else { 00191 out_data_1[idx ] = current_pos.x; 00192 out_data_1[idx+1] = current_pos.y; 00193 out_data_1[idx+2] = current_pos.z; 00194 out_data_1[idx+3] = current_pos.w; 00195 } 00196 // Synchronize to make sure the data are loaded 00197 //__syncthreads(); 00198 00199 //out_data_1[idx ] = current_pos.x; 00200 //out_data_1[idx+1] = current_pos.y; 00201 //out_data_1[idx+2] = current_pos.z; 00202 //out_data_1[idx+3] = current_pos.w; 00203 } 00204 */ 00205 00206 00207 /* 00209 __global__ 00210 void Global__ModelElasticRod_AdvSim_CU_Orientation ( 00211 unsigned int numOfNodes, // number of nodes 00212 unsigned int numOfThreads, // number of threads 00213 float currentTime, // current time 00214 float timeStep, // time step 00215 float radius, // radius 00216 float length, // centerline's rest length 00217 float length_ori, // orientation's rest length 00218 float mass, // point mass 00219 float k_mdr, // A constant value in proportion to material density and radius 00220 //float Kt, // kinetic translational constant 00221 //float3 Kr, // kinetic rotational constant -- xyz 00222 //float Dt, // translational constant 00223 //float3 Dr, // rotational dissipation constant -- xyz 00224 float Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction 00225 float Kvdamping, // centerline's velocity damper 00226 float Ps, // potential stretch constant 00227 float3 Pb, // potential bend constant -- xyz 00228 float * out_data_2 // output data for centerlines 00229 ) 00230 { 00231 // Block index 00232 int bx = blockIdx.x; 00233 // Thread index 00234 int tx = threadIdx.x; 00235 // Vertex number 00236 int vertexNo = (numOfThreads * bx + tx); 00237 if ( vertexNo >= numOfNodes - 1 ) return; 00238 // Index for output 00239 int idx = (numOfThreads * bx + tx) * 4; 00240 00241 // Fetch data from texture linear memory 00242 float4 current_pos = tex1Dfetch( CudaTexVertexList, vertexNo ); 00243 float4 current_ori = tex1Dfetch( CudaTexOrientationList, vertexNo ); 00244 float3 force = make_float3( 0.0f, 0.0f, 0.0f ); 00245 float4 torque = make_float4( 0.0f, 0.0f, 0.0f, 0.0f ); 00246 00247 if ( current_pos.w == 1 ) { 00248 torque = Device__CalTorque_ModelElasticRod( 00249 vertexNo, // vertex number 00250 numOfNodes, // number of vertices 00251 radius, // radius 00252 length, // rest length 00253 mass, // point mass 00254 //Kt, // kinetic translational constant 00255 //Kr, // kinetic rotational constant -- xyz 00256 //Dt, // translational constant 00257 //Dr, // rotational dissipation constant -- xyz 00258 Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction 00259 Kvdamping, // centerline's velocity damper 00260 Ps, // potential stretch constant 00261 Pb // potential bend constant -- xyz 00262 ); 00263 00264 //torque = make_float4( 0.0f, 0.0f, 0.0f, 0.0f ); 00265 00266 // Use a semi Euler integration to find the new orientation 00267 //torque = make_float4( 0.0f, 0.0f, 0.0f, 0.0f ); // DEBUG 00268 float4 new_ori = Device__EulerInt_NewOri_ModelElasticRod ( 00269 k_mdr, // an constant value in proportion to material density and radius 00270 length_ori, // orientation's rest length 00271 current_ori, // orientation 00272 torque, // torque in 4-dimension 00273 timeStep // time step 00274 ); 00275 00276 // Normalize the new orientation 00277 //new_ori = current_ori; // DEBUG 00278 new_ori = QuaternionUnit( new_ori ); 00279 00280 // WARNING: out data have to be set at the end, otherwise it won't work!!! ??? 00281 out_data_2[idx ] = new_ori.x; 00282 out_data_2[idx+1] = new_ori.y; 00283 out_data_2[idx+2] = new_ori.z; 00284 out_data_2[idx+3] = new_ori.w; 00285 } 00286 else { 00287 out_data_2[idx ] = current_ori.x; 00288 out_data_2[idx+1] = current_ori.y; 00289 out_data_2[idx+2] = current_ori.z; 00290 out_data_2[idx+3] = current_ori.w; 00291 } 00292 // Synchronize to make sure the data are loaded 00293 //__syncthreads(); 00294 } 00295 */ 00296 00298 __global__ 00299 void Global__ModelElasticRod_AdvSim_CU ( 00300 unsigned int numOfNodes, // number of nodes 00301 unsigned int numOfThreads, // number of threads 00302 float currentTime, // current time 00303 float timeStep, // time step 00304 float radius, // radius 00305 float length, // centerline's rest length 00306 float length_ori, // orientation's rest length 00307 float mass, // point mass 00308 float k_mdr, // A constant value in proportion to material density and radius 00309 //float Kt, // kinetic translational constant 00310 //float3 Kr, // kinetic rotational constant -- xyz 00311 //float Dt, // translational constant 00312 //float3 Dr, // rotational dissipation constant -- xyz 00313 float Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction 00314 float Kvdamping, // centerline's velocity damper 00315 float Ps, // potential stretch constant 00316 float3 Pb, // potential bend constant -- xyz 00317 float * out_data_1, // output data for centerlines 00318 float * out_data_2 // output data for orientations 00319 ) 00320 { 00321 // Block index 00322 int bx = blockIdx.x; 00323 // Thread index 00324 int tx = threadIdx.x; 00325 // Vertex number 00326 int vertexNo = (numOfThreads * bx + tx); 00327 00328 #ifdef __DEVICE_EMULATION__ 00329 printf( "(vertexNo: %i\n", vertexNo ); 00330 #endif//__DEVICE_EMULATION__ 00331 00332 // DEBUG -- vertex#0 is set somewhere else or fixed 00333 //if ( vertexNo == 0 ) return; 00334 00335 if ( vertexNo >= numOfNodes ) return; 00336 // Index for output 00337 int idx = vertexNo * 4; 00338 00339 // Fetch data from texture linear memory 00340 float4 curr_pos = tex1Dfetch( CudaTexVertexList, vertexNo ); 00341 float4 curr_ori = tex1Dfetch( CudaTexOrientationList, vertexNo ); 00342 float4 int_force = tex1Dfetch( CudaTexForceList_1, vertexNo ); 00343 float4 ext_force = tex1Dfetch( CudaTexForceList_2, vertexNo ); 00344 float3 force = make_float3( 0.0f, 0.0f, 0.0f ); 00345 float4 torque = make_float4( 0.0f, 0.0f, 0.0f, 0.0f ); 00346 00347 float4 prev_pos = tex1Dfetch( CudaTexVertexList, vertexNo-1 ); 00348 float4 next_pos = tex1Dfetch( CudaTexVertexList, vertexNo+1 ); 00349 float4 prev_ori = tex1Dfetch( CudaTexOrientationList, vertexNo-1 ); 00350 float4 next_ori = tex1Dfetch( CudaTexOrientationList, vertexNo+1 ); 00351 00352 #ifdef __DEVICE_EMULATION__ 00353 printf( "prev_pos: %f %f %f\n", prev_pos.x, prev_pos.y, prev_pos.z ); 00354 printf( "prev_ori: %f %f %f %f\n", prev_ori.x, prev_ori.y, prev_ori.z, prev_ori.w ); 00355 printf( "curr_pos: %f %f %f\n", curr_pos.x, curr_pos.y, curr_pos.z ); 00356 printf( "curr_ori: %f %f %f %f\n", curr_ori.x, curr_ori.y, curr_ori.z, curr_ori.w ); 00357 printf( "next_pos: %f %f %f\n", next_pos.x, next_pos.y, next_pos.z ); 00358 printf( "next_ori: %f %f %f %f\n", next_ori.x, next_ori.y, next_ori.z, next_ori.w ); 00359 printf( "int_force: %f %f %f %f\n", int_force.x, int_force.y, int_force.z, int_force.w ); 00360 printf( "ext_force: %f %f %f %f\n", ext_force.x, ext_force.y, ext_force.z, ext_force.w ); 00361 #endif 00362 00363 if ( curr_pos.w == 1 ) { 00364 force = Device__CalForce_ModelElasticRod( 00365 vertexNo, // vertex number 00366 numOfNodes, // number of vertices 00367 prev_pos, // previous position 00368 curr_pos, // current position 00369 next_pos, // next position 00370 prev_ori, // previous orientation 00371 curr_ori, // current orientation 00372 radius, // radius 00373 length, // rest length 00374 mass, // point mass 00375 //Kt, // kinetic translational constant 00376 //Kr, // kinetic rotational constant -- xyz 00377 //Dt, // translational constant 00378 //Dr, // rotational dissipation constant -- xyz 00379 Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction 00380 Kvdamping, // centerline's velocity damper 00381 Ps, // potential stretch constant 00382 Pb // potential bend constant -- xyz 00383 ); 00384 00385 torque = Device__CalTorque_ModelElasticRod( 00386 vertexNo, // vertex number 00387 numOfNodes, // number of vertices 00388 curr_pos, // current position 00389 next_pos, // next position 00390 prev_ori, // previous orientation 00391 curr_ori, // current orientation 00392 next_ori, // next orientation 00393 radius, // radius 00394 length, // rest length 00395 mass, // point mass 00396 //Kt, // kinetic translational constant 00397 //Kr, // kinetic rotational constant -- xyz 00398 //Dt, // translational constant 00399 //Dr, // rotational dissipation constant -- xyz 00400 Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction 00401 Kvdamping, // centerline's velocity damper 00402 Ps, // potential stretch constant 00403 Pb // potential bend constant -- xyz 00404 ); 00405 00406 force.x += ext_force.x; 00407 force.y += ext_force.y; 00408 force.z += ext_force.z; 00409 00410 force.x += int_force.x; 00411 force.y += int_force.y; 00412 force.z += int_force.z; 00413 00414 // Use a semi Euler integration to find the new velocity and position 00415 float3 new_vel = Device__EulerInt_NewVel_ModelElasticRod ( 00416 force, // force 00417 make_float3(0.0f, 0.0f, 0.0f), // velocity 00418 mass, // mass 00419 timeStep // time step 00420 ); 00421 float3 new_pos = Device__EulerInt_NewPos_ModelElasticRod ( 00422 new_vel, // velocity 00423 XYZ(curr_pos), // position 00424 timeStep // time step 00425 ); 00426 00427 #ifdef __DEVICE_EMULATION__ 00428 printf( "mass: %f\n", mass ); 00429 printf( "timeStep: %f\n", timeStep ); 00430 printf( "Force: %f %f %f\n", force.x, force.y, force.z ); 00431 printf( "New Vel: %f %f %f\n", new_vel.x, new_vel.y, new_vel.z ); 00432 printf( "New Pos: %f %f %f\n", new_pos.x, new_pos.y, new_pos.z ); 00433 #endif 00434 00435 // Use a semi Euler integration to find the new orientation 00436 float4 new_ori = Device__EulerInt_NewOri_ModelElasticRod ( 00437 k_mdr, // an constant value in proportion to material density and radius 00438 length_ori, // orientation's rest length 00439 curr_ori, // orientation 00440 torque, // torque in 4-dimension 00441 timeStep // time step 00442 ); 00443 00444 // Normalize the new orientation 00445 new_ori = QuaternionUnit( new_ori ); 00446 00447 // WARNING: out data have to be set at the end, otherwise it won't work!!! 00448 out_data_1[idx ] = new_pos.x; 00449 out_data_1[idx+1] = new_pos.y; 00450 out_data_1[idx+2] = new_pos.z; 00451 out_data_1[idx+3] = curr_pos.w; 00452 00453 out_data_2[idx ] = new_ori.x; 00454 out_data_2[idx+1] = new_ori.y; 00455 out_data_2[idx+2] = new_ori.z; 00456 out_data_2[idx+3] = new_ori.w; 00457 } 00458 else { 00459 out_data_1[idx ] = curr_pos.x; 00460 out_data_1[idx+1] = curr_pos.y; 00461 out_data_1[idx+2] = curr_pos.z; 00462 out_data_1[idx+3] = curr_pos.w; 00463 00464 out_data_2[idx ] = curr_ori.x; 00465 out_data_2[idx+1] = curr_ori.y; 00466 out_data_2[idx+2] = curr_ori.z; 00467 out_data_2[idx+3] = curr_ori.w; 00468 } 00469 } 00470 //----------------------------------------------------------------------------- 00471 // CUDA Device Functions 00472 //============================================================================= 00473 00474 00475 //============================================================================= 00476 // CUDA Wrapper Functions 00477 //----------------------------------------------------------------------------- 00478 void Global__ModelElasticRod_AdvSim ( 00479 unsigned int cudaID, // CUDA ID 00480 unsigned int numOfNodes, // number of nodes 00481 unsigned int numOfThreads, // number of threads per CUDA thread block 00482 float currentTime, // current time 00483 float timeStep, // time step 00484 int numOfSubSteps, // number of sub-steps 00485 float radius, // radius 00486 float length, // rest length 00487 float length_ori, // orientation's rest length 00488 float mass, // point mass 00489 float material_density, // material density 00490 //float Kt, // kinetic translational constant 00491 //float Kr_x, // kinetic rotational constant -- x 00492 //float Kr_y, // kinetic rotational constant -- y 00493 //float Kr_z, // kinetic rotational constant -- z 00494 //float Dt, // translational constant 00495 //float Dr_x, // rotational dissipation constant -- x 00496 //float Dr_y, // rotational dissipation constant -- y 00497 //float Dr_z, // rotational dissipation constant -- z 00498 float Kconstraint_3rdDirAlignTangent, // Kconstraint for aligning centerline's tangent with orientation's 3rd direction 00499 float Kvdamping, // centerline's velocity damper 00500 float Ps, // potential stretch constant 00501 float Pb_x, // potential bend constant -- x 00502 float Pb_y, // potential bend constant -- y 00503 float Pb_z, // potential bend constant -- z 00504 float * host_pos_data, // host's position data 00505 float * host_prev_pos_data, // host's previous position data 00506 float * host_ori_data, // host's orientation data 00507 float * host_prev_ori_data, // host's previous orientation data 00508 float * host_int_force_data, // host's (internal) force data -- xyzw 00509 float * host_ext_force_data // host's (external) force data -- xyzw 00510 ) 00511 { 00512 //printf( "FILE:%s LINE:%d Global__ModelElasticRod_AdvSim\n", __FILE__, __LINE__ ); 00513 //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep ); 00514 //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd ); 00515 //printf( "Number of vertices: %d\n", numOfVertices ); 00516 //printf( "Number of threads: %d\n", numOfThreads ); 00517 //printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads ); 00518 00519 // Allocate device memories for out data 00520 unsigned int size = sizeof(float) * numOfNodes * 4; 00521 float * out_data_1; // for centerlines' position 00522 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_1, size ) ); 00523 float * out_data_2; // for orientations 00524 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_2, size ) ); 00525 00526 // Copy Vertex List (and Previous Vertex List) from host data to device data, 00527 // since vertices are dynamically changed. 00528 // While home vertices and connection list are unchanged. 00529 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_pos_data, size, cudaMemcpyHostToDevice ) ); 00530 // The Previous Vertex List can be saved in CUDA memory. 00531 // Hence, the Previous Vertex List does not have to be updated. 00532 //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_pos_data, size, cudaMemcpyHostToDevice ) ); 00533 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetOrientationList(), host_ori_data, size, cudaMemcpyHostToDevice ) ); 00534 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetForceList_1(), host_int_force_data, size, cudaMemcpyHostToDevice ) ); 00535 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetForceList_2(), host_ext_force_data, size, cudaMemcpyHostToDevice ) ); 00536 00537 // Kernel invocation 00538 unsigned int numOfThreadBlocks = ( numOfNodes + numOfThreads - 1 ) / numOfThreads; 00539 dim3 Dg( numOfThreadBlocks, 1, 1 ); 00540 dim3 Db( numOfThreads, 1, 1 ); 00541 size_t Ns = 0; 00542 cudaStream_t S = 0; 00543 00544 // Bind Textures 00545 DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfNodes ); 00546 DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfNodes ); 00547 //DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfNodes ); 00548 DATA_GlobalPool.DataForVertexList[cudaID]->BindOrientationList( numOfNodes ); 00549 DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevOrientationList( numOfNodes ); 00550 DATA_GlobalPool.DataForVertexList[cudaID]->BindForceList_1( numOfNodes ); 00551 DATA_GlobalPool.DataForVertexList[cudaID]->BindForceList_2( numOfNodes ); 00552 00553 #if ER_TIMING_ADV_SIM != 0 00554 static bool isFirstRun = true; 00555 cudaEvent_t start, stop; 00556 float time; 00557 static unsigned int counts = 0; 00558 static float totalTime = 0.0f; 00559 cudaEventCreate( &start ); 00560 cudaEventCreate( &stop ); 00561 #endif 00562 00563 { 00564 //float k_mdr = material_density * radius * radius * K_PI; 00565 00566 float k_mdr = material_density * radius * radius * 3.1415926535897932384626433832795f; 00567 00568 //printf( "%f = %f * %f * %f * %f\n", k_mdr, material_density, radius, radius, K_PI ); 00569 00570 float subTimeStep = timeStep / numOfSubSteps; 00571 float3 Pb = make_float3( Pb_x, Pb_y, Pb_z ); 00572 for ( int i = 1; i <= numOfSubSteps; ++i ) { 00573 //printf( "CUDA: numOfSubSteps %i\n", i ); 00574 00575 #if ER_TIMING_ADV_SIM != 0 00576 cudaEventRecord( start, 0 ); 00577 #endif 00578 00579 /* 00580 // Call the CUDA kernel 00581 Global__ModelElasticRod_AdvSim_CU_Orientation<<< Dg, Db, Ns, S >>>( 00582 numOfNodes, numOfThreads, currentTime, subTimeStep, 00583 radius, length, length_ori, mass, k_mdr, 00584 Kconstraint_3rdDirAlignTangent, Kvdamping, 00585 Ps, Pb, 00586 out_data_2 00587 ); 00588 */ 00589 00590 /* 00591 // Call the CUDA kernel 00592 Global__ModelElasticRod_AdvSim_CU_Centerline<<< Dg, Db, Ns, S >>>( 00593 numOfNodes, numOfThreads, currentTime, subTimeStep, 00594 radius, length, length_ori, mass, k_mdr, 00595 Kconstraint_3rdDirAlignTangent, Kvdamping, 00596 Ps, Pb, 00597 out_data_1 00598 ); 00599 */ 00600 00601 // Call the CUDA kernel (Choice 1) -- combine centerline and orientation 00602 Global__ModelElasticRod_AdvSim_CU<<< Dg, Db, Ns, S >>>( 00603 numOfNodes, numOfThreads, currentTime, subTimeStep, 00604 radius, length, length_ori, mass, k_mdr, 00605 Kconstraint_3rdDirAlignTangent, Kvdamping, 00606 Ps, Pb, 00607 out_data_1, out_data_2 00608 ); 00609 00610 //cudaThreadSynchronize(); 00611 00612 // Swap pointers for Vertex List and Previous Vertex List 00613 // So that the Previous Vertex List is remain unchanged in the CUDA memory. 00614 DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList(); 00615 00616 // Copy data to the device's memory for (current) Vertex List 00617 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data_1, size, cudaMemcpyDeviceToDevice ) ); 00618 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetOrientationList(), out_data_2, size, cudaMemcpyDeviceToDevice ) ); 00619 00620 currentTime += subTimeStep; 00621 00622 #if ER_TIMING_ADV_SIM != 0 00623 cudaEventRecord( stop, 0 ); 00624 cudaEventSynchronize( stop ); 00625 cudaEventElapsedTime( &time, start, stop ); 00626 if ( !isFirstRun ) { 00627 totalTime += time; 00628 ++counts; 00629 } 00630 #endif 00631 } 00632 } 00633 00634 #if ER_TIMING_ADV_SIM != 0 00635 if ( isFirstRun ) isFirstRun = false; 00636 cudaEventDestroy( start ); 00637 cudaEventDestroy( stop ); 00638 printf( "CUDA Kernal TotalTime %f; Counts %i\n", totalTime, counts ); 00639 #endif 00640 00641 // Unbind Textures 00642 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList(); 00643 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList(); 00644 //DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList(); 00645 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindOrientationList(); 00646 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevOrientationList(); 00647 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindForceList_1(); 00648 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindForceList_2(); 00649 00650 // Check if kernel execution generated and error 00651 CUT_CHECK_ERROR( "Kernel execution failed!" ); 00652 00653 // Copy output data to host data Previous Vertex List 00654 // The host will have to swap its pointers (for current and previous vertex list) 00655 CUDA_SAFE_CALL( cudaMemcpy( host_prev_pos_data, out_data_1, size, cudaMemcpyDeviceToHost ) ); 00656 CUDA_SAFE_CALL( cudaMemcpy( host_prev_ori_data, out_data_2, size, cudaMemcpyDeviceToHost ) ); 00657 00658 //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data ); 00659 00660 // Free device memories 00661 CUDA_SAFE_CALL( cudaFree( out_data_1 ) ); 00662 CUDA_SAFE_CALL( cudaFree( out_data_2 ) ); 00663 } 00664 //----------------------------------------------------------------------------- 00665 // CUDA Wrapper Functions 00666 //============================================================================= 00667 END_NAMESPACE_TAPs__CUDA 00668 //----------------------------------------------------------------------------- 00669 //34567890123456789012345678901234567890123456789012345678901234567890123456789 00670 //--+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----