![]() |
TAPs 0.7.7.3
|
00001 /****************************************************************************** 00002 TAPsCUDA_VertexListMSM_Def.cu 00003 00004 CUDA Definition (i.e., cpp file). 00005 00006 SUKITTI PUNAK (08/27/2008) 00007 UPDATE (09/21/2009) 00008 ******************************************************************************/ 00009 00010 #include "TAPsCUDA_VertexListMSM.cu" 00011 00012 BEGIN_NAMESPACE_TAPs__CUDA 00013 //============================================================================= 00014 //----------------------------------------------------------------------------- 00015 00016 //============================================================================= 00017 // Interface Functions 00018 //----------------------------------------------------------------------------- 00019 00020 // Initialize CUDA Data for a Mesh Model by Using Vertex List 00021 bool InitailizeDataForVertexList ( 00022 unsigned int & cudaID, 00023 unsigned int numOfVertices, 00024 unsigned int max_connection_size, 00025 float * vertexList, 00026 float * prevVertexList, 00027 float * homeVertexList, 00028 int * vertexConnectionList 00029 ) 00030 { 00031 //printf( "InitailizeDataForVertexList\n" ); 00032 //fflush( stdout ); 00033 00034 // Assign CUDA ID to the object 00035 // The object has to use cudaID to communicate with the CUDA. 00036 cudaID = DATA_GlobalPool.SizeOfGlobal_Pool; 00037 00038 // Allocate CUDA data for the object. 00039 DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, true, max_connection_size, false ); 00040 AddToGlobal_Pool_Of_DATA_Vertex_List( newData ); 00041 00042 // Size of memory for (xyzw) vertices 00043 int size = numOfVertices * 4 * sizeof(float); 00044 00045 // Copy Vertex List from host data to device data 00046 CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) ); 00047 // Copy Previous Vertex List from host data to device data 00048 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) ); 00049 // Copy Home Vertex List from host data to device data 00050 CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) ); 00051 // Copy Vertex Connection List from host data to device data 00052 CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) ); 00053 00054 return true; 00055 } 00056 00057 00058 //--- Suture & Strand Model --------------------------------------------------- 00059 // Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture) 00060 bool InitailizeDataForSutureModel ( 00061 unsigned int & cudaID, 00062 unsigned int numOfVertices, 00063 float * vertexList, 00064 float * prevVertexList 00065 ) 00066 { 00067 // Here Suture Model uses Vertex List Data 00068 // However, it has implicit connection (each vertex connect to the previous and next vertices). 00069 // So the connection list is set to zero. 00070 00071 // Home vertex is unused for now. Plan is to use home vertex for sticking suture to a surface. 00072 00073 // Assign CUDA ID to the object 00074 // The object has to use cudaID to communicate with the CUDA. 00075 cudaID = DATA_GlobalPool.SizeOfGlobal_Pool; 00076 00077 // Allocate CUDA data for the object. 00078 DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, false, 0, true ); 00079 AddToGlobal_Pool_Of_DATA_Vertex_List( newData ); 00080 00081 // Size of memory for (xyzw) vertices 00082 int size = numOfVertices * 4 * sizeof(float); 00083 00084 // Copy Vertex List from host data to device data 00085 CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) ); 00086 // Copy Previous Vertex List from host data to device data 00087 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) ); 00088 // Copy Home Vertex List from host data to device data 00089 //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) ); 00090 // Copy Vertex Connection List from host data to device data 00092 00093 return true; 00094 } 00095 00096 00097 //--- Suture & Strand Model --------------------------------------------------- 00098 // Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture) 00099 bool InitailizeDataForSutureModel_ADVSIM ( 00100 unsigned int & cudaID, 00101 unsigned int numOfVertices, 00102 float * vertexList, 00103 float * prevVertexList, 00104 unsigned int * simFlagsList, 00105 float * posConstraintList 00106 ) 00107 { 00108 // Here Suture Model uses Vertex List Data 00109 // However, it has implicit connection (each vertex connect to the previous and next vertices). 00110 // So the connection list is set to zero. 00111 00112 // Home vertex is unused for now. Plan is to use home vertex for sticking suture to a surface. 00113 00114 // Assign CUDA ID to the object 00115 // The object has to use cudaID to communicate with the CUDA. 00116 cudaID = DATA_GlobalPool.SizeOfGlobal_Pool; 00117 00118 // Allocate CUDA data for the object. 00119 DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, false, 0, true ); 00120 AddToGlobal_Pool_Of_DATA_Vertex_List( newData ); 00121 00122 // Size of memory for (xyzw) vertices 00123 int size = numOfVertices * 4 * sizeof(float); 00124 00125 // Copy Vertex List from host data to device data 00126 CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) ); 00127 // Copy Previous Vertex List from host data to device data 00128 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) ); 00129 // Copy Home Vertex List from host data to device data 00130 //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) ); 00131 // Copy Vertex Connection List from host data to device data 00132 //CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) ); 00133 // Copy Simulation Flags List from host data to device data 00134 CUDA_SAFE_CALL( cudaMemcpy( newData->GetSimFlagsList(), simFlagsList, numOfVertices*sizeof(unsigned int), cudaMemcpyHostToDevice ) ); 00135 // Copy Position Constraint List from host data to device data 00136 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPosConstraintList(), posConstraintList, size, cudaMemcpyHostToDevice ) ); 00137 00138 return true; 00139 } 00140 00141 00142 00143 //----------------------------------------------------------------------------- 00144 // Interface Functions 00145 //============================================================================= 00146 00147 00148 00149 00150 //============================================================================= 00151 // Device Functions 00152 //----------------------------------------------------------------------------- 00153 00154 // Calculate a spring force 00155 __device__ 00156 float3 Device__CalSpringForce ( 00157 float Ks, 00158 float Kd, 00159 float rest, 00160 float4 X1, 00161 float4 X2, 00162 float4 V1, 00163 float4 V2 00164 ) 00165 { 00166 // Formula from Physically Based Modeling (Siggraph 2001 Course Note) 00167 // F1 = -{Ks(l - r) + Kd[(V1-V2)*L]/l}*L/l 00168 // F2 = -F1 00169 // where Ks = spring stiffness, Kd = spring damping, 00170 // V1 and V2 are velocity of particles linked by the spring 00171 // r = spring rest length 00172 // L = vector of difference of positions of particle#1 and #2 00173 // l = magnitude of L 00174 //return ( ( m_tK * (scl - m_tL) ) + ( m_tD * V * cL ) ) * cL; 00175 00176 float3 Xdif = make_float3( X1.x-X2.x, X1.y-X2.y, X1.z-X2.z ); 00177 float3 Vdif = make_float3( V1.x-V2.x, V1.y-V2.y, V1.z-V2.z ); 00178 00179 //float len = length( Diff ); // length is an inline function in CUDA (cutil_math.h) 00180 float len = sqrtf( Xdif.x*Xdif.x + Xdif.y*Xdif.y + Xdif.z*Xdif.z ); 00181 if ( len < 1.0E-16f ) { 00182 return make_float3( 0.0f, 0.0f, 0.0f ); 00183 } 00184 float invLen = 1.0f / len; 00185 float3 Dir = make_float3( Xdif.x*invLen, Xdif.y*invLen, Xdif.z*invLen ); 00186 00187 float Vdif_Dir = Vdif.x*Dir.x + Vdif.y*Dir.y + Vdif.z*Dir.z; 00188 00189 float fmag = -( Ks*(len-rest) + Kd*Vdif_Dir ); 00190 float3 force = make_float3( fmag*Dir.x, fmag*Dir.y, fmag*Dir.z ); 00191 00192 return force; 00193 } 00194 00195 00197 __device__ 00198 float3 Device__VerletIntegration ( 00199 float4 A1, 00200 float4 X1, 00201 float4 X0, 00202 float dt_sqrt, 00203 float damper 00204 ) 00205 { 00206 // F = ma; a = F/m; 00207 // x(t+dt) = 2x(t) - x(t-dt) + a(t)*{dt}^2 + O({dt}^4) 00208 float3 new_pos; 00209 new_pos.x = 2.0f*X1.x - X0.x + A1.x*dt_sqrt - damper*(X1.x - X0.x); 00210 new_pos.y = 2.0f*X1.y - X0.y + A1.y*dt_sqrt - damper*(X1.y - X0.y); 00211 new_pos.z = 2.0f*X1.z - X0.z + A1.z*dt_sqrt - damper*(X1.z - X0.z); 00212 return new_pos; 00213 } 00214 00215 00216 // CUDA Wrapper Function -- For ModelStrand AdvSim Function 00217 // with TAPs_ADVANCED_SIMULATION 00218 // return enforced position 00219 __device__ 00220 float3 Device__EnforceConstraints_ADVSIM ( 00221 float3 vertexA, 00222 float4 vertexB 00223 ) 00224 { 00225 float one_ratio = 1.0f - vertexB.w; 00226 float3 enforced_pos; 00227 enforced_pos.x = vertexB.w*vertexA.x + one_ratio*vertexB.x; 00228 enforced_pos.y = vertexB.w*vertexA.y + one_ratio*vertexB.y; 00229 enforced_pos.z = vertexB.w*vertexA.z + one_ratio*vertexB.z; 00230 return enforced_pos; 00231 } 00232 00233 00234 //----------------------------------------------------------------------------- 00235 // Device Functions 00236 //============================================================================= 00237 00238 00239 00240 00241 //============================================================================= 00242 // For Model Suture 00243 //----------------------------------------------------------------------------- 00244 00245 // CUDA Kernel -- For ModelStrand AdvSim Function 00246 __device__ 00247 float3 Device__CalSpringForce_ModelStrand ( 00248 float Ks, 00249 float Kd, 00250 float Lrest, 00251 int vertexNo, 00252 unsigned int numOfVertices 00253 ) 00254 { 00255 float3 total_force = make_float3( 0.0f, 0.0f, 0.0f ); 00256 //float4 HX1 = tex1Dfetch( CudaTexHomeVertexList, vertexNo ); 00257 float4 X1 = tex1Dfetch( CudaTexVertexList, vertexNo ); 00258 float4 V1 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f ); 00259 //float4 HX2; 00260 float4 X2; 00261 float4 V2 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f ); 00262 float3 force; 00263 00264 // The recommended way to round a single-precision floating-point operand to an 00265 // integer, with the result being a single-precision floating-point number is rintf(), 00266 // not roundf(). 00267 00268 // Cal force due to connection with the previous vertex 00269 if ( vertexNo > 0 ) { 00270 int prevVertex = vertexNo-1; 00271 //HX2 = tex1Dfetch( CudaTexHomeVertexList, prevVertex ); 00272 X2 = tex1Dfetch( CudaTexVertexList, prevVertex ); 00273 00274 //float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z ); 00275 //rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z ); 00276 00277 force = Device__CalSpringForce( 00278 Ks, 00279 Kd, 00280 Lrest, 00281 X1, 00282 X2, 00283 V1, 00284 V2 00285 ); 00286 total_force.x += force.x; 00287 total_force.y += force.y; 00288 total_force.z += force.z; 00289 } 00290 00291 // Cal force due to connection with the next vertex 00292 if ( vertexNo < numOfVertices - 1 ) { 00293 int nextVertex = vertexNo+1; 00294 //HX2 = tex1Dfetch( CudaTexHomeVertexList, prevVertex ); 00295 X2 = tex1Dfetch( CudaTexVertexList, nextVertex ); 00296 00297 //float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z ); 00298 //rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z ); 00299 00300 force = Device__CalSpringForce( 00301 Ks, 00302 Kd, 00303 Lrest, 00304 X1, 00305 X2, 00306 V1, 00307 V2 00308 ); 00309 total_force.x += force.x; 00310 total_force.y += force.y; 00311 total_force.z += force.z; 00312 } 00313 00314 /* 00315 // Add force from Home Spring 00316 force = Device__CalSpringForce( 00317 Ks, //!< spring stiffness 00318 Kd, //!< spring damper 00319 0.0f, //!< spring rest length 00320 X1, //!< position of particle one 00321 HX1, //!< position of particle two 00322 V1, //!< velocity of particle one 00323 V2 //!< velocity of particle two 00324 ); 00325 total_force.x += force.x; 00326 total_force.y += force.y; 00327 total_force.z += force.z; 00328 */ 00329 00330 return total_force; 00331 } 00332 00333 // CUDA Kernel -- For ModelStrand AdvSim Function 00334 template <int COMPUTATION_CHOICE> 00335 __global__ 00336 void Global__ModelStrand_AdvSim_CU ( 00337 unsigned int numOfVertices, 00338 unsigned int numOfThreads, 00339 float currentTime, 00340 float timeStep, 00341 float ptMass, 00342 float Ks, 00343 float Kd, 00344 float Lrest, 00345 float * out_data 00346 ) 00347 { 00348 switch ( COMPUTATION_CHOICE ) { 00349 00350 //------------------------------------------------- 00351 // Emulate Viscoelastic by mass spring connections 00352 // with TAPs_ADVANCED_SIMULATION 00353 case 101: 00354 { 00355 // Block index 00356 int bx = blockIdx.x; 00357 00358 // Thread index 00359 int tx = threadIdx.x; 00360 00361 // Vertex number 00362 int vertexNo = (numOfThreads * bx + tx); 00363 if ( vertexNo >= numOfVertices ) return; 00364 00365 // Index for output 00366 int idx = (numOfThreads * bx + tx) * 4; 00367 00368 // Fetch data from texture linear memory 00369 float4 current_pos = tex1Dfetch( CudaTexVertexList, vertexNo ); 00370 00371 // If the suture vertex is not fixed 00372 if ( current_pos.w == 1 ) { 00373 float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo ); 00374 float3 force = Device__CalSpringForce_ModelStrand( 00375 Ks, 00376 Kd, 00377 Lrest, 00378 vertexNo, 00379 numOfVertices 00380 ); 00381 // Convert force to acceleration 00382 float invMass = 1.0f / ptMass; 00383 float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f ); 00384 float dt_sqrt = timeStep*timeStep; 00385 float3 new_pos = Device__VerletIntegration( 00386 acceleration, 00387 current_pos, 00388 previous_pos, 00389 dt_sqrt, 00390 Kd 00391 ); 00392 00393 // Enforce constraints with TAPs_ADVANCED_SIMULATION 00394 uint1 simFlags = tex1Dfetch( CudaTexSimFlagsList, vertexNo ); 00395 if ( simFlags.x > 0 ) { 00396 float4 constraint_pos = tex1Dfetch( CudaTexPosConstraintList, vertexNo ); 00397 new_pos = Device__EnforceConstraints_ADVSIM( new_pos, constraint_pos ); 00398 00399 //new_pos.x = constraint_pos.x; 00400 //new_pos.y = constraint_pos.y; 00401 //new_pos.z = constraint_pos.z; 00402 } 00403 00404 // WARNING: out data have to be set at the end, otherwise it won't work!!! ??? 00405 out_data[idx ] = new_pos.x; 00406 out_data[idx+1] = new_pos.y; 00407 out_data[idx+2] = new_pos.z; 00408 out_data[idx+3] = current_pos.w; 00409 } 00410 // If the suture vertex is fixed 00411 else { 00412 out_data[idx ] = current_pos.x; 00413 out_data[idx+1] = current_pos.y; 00414 out_data[idx+2] = current_pos.z; 00415 out_data[idx+3] = current_pos.w; 00416 } 00417 } 00418 // Synchronize to make sure the data are loaded 00419 //__syncthreads(); 00420 break; 00421 00422 //------------------------------------------------- 00423 // Emulate Viscoelastic by mass spring connections 00424 case 1: 00425 { 00426 // Block index 00427 int bx = blockIdx.x; 00428 // Thread index 00429 int tx = threadIdx.x; 00430 // Vertex number 00431 int vertexNo = (numOfThreads * bx + tx); 00432 if ( vertexNo >= numOfVertices ) return; 00433 // Index for output 00434 int idx = (numOfThreads * bx + tx) * 4; 00435 00436 // Fetch data from texture linear memory 00437 float4 current_pos = tex1Dfetch( CudaTexVertexList, vertexNo ); 00438 if ( current_pos.w == 1 ) { 00439 float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo ); 00440 float3 force = Device__CalSpringForce_ModelStrand( 00441 Ks, 00442 Kd, 00443 Lrest, 00444 vertexNo, 00445 numOfVertices 00446 ); 00447 // Convert force to acceleration 00448 float invMass = 1.0f / ptMass; 00449 float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f ); 00450 float dt_sqrt = timeStep*timeStep; 00451 // Calculate the new position 00452 float3 new_pos = Device__VerletIntegration( 00453 acceleration, 00454 current_pos, 00455 previous_pos, 00456 dt_sqrt, 00457 Kd 00458 ); 00459 00460 // WARNING: out data have to be set at the end, otherwise it won't work!!! ??? 00461 out_data[idx ] = new_pos.x; 00462 out_data[idx+1] = new_pos.y; 00463 out_data[idx+2] = new_pos.z; 00464 out_data[idx+3] = current_pos.w; 00465 } 00466 else { 00467 out_data[idx ] = current_pos.x; 00468 out_data[idx+1] = current_pos.y; 00469 out_data[idx+2] = current_pos.z; 00470 out_data[idx+3] = current_pos.w; 00471 } 00472 } 00473 // Synchronize to make sure the data are loaded 00474 //__syncthreads(); 00475 break; 00476 } 00477 } 00478 00479 00480 // CUDA Kernel -- For ModelStrand AdvSim Function -- Enforce Constraint 00481 template <int COMPUTATION_CHOICE> 00482 __global__ 00483 void Global__ModelStrand_AdvSim_Enforce_Constraint_CU ( 00484 unsigned int numOfVertices, 00485 unsigned int numOfThreads, 00486 float currentTime, 00487 float timeStep, 00488 float ptMass, 00489 float Ks, 00490 float Kd, 00491 float Lrest, 00492 unsigned int * out_data_SimFlagsList, 00493 float * out_data_PosConstraintList 00494 ) 00495 { 00496 switch ( COMPUTATION_CHOICE ) { 00497 00498 //------------------------------------------------- 00499 // Strand's vertex is slidable 00500 // with TAPs_ADVANCED_SIMULATION 00501 case 101: 00502 { 00503 // Block index 00504 int bx = blockIdx.x; 00505 // Thread index 00506 int tx = threadIdx.x; 00507 // Vertex number 00508 int vertexNo = (numOfThreads * bx + tx); 00509 if ( vertexNo >= numOfVertices ) return; 00510 // Index for output 00511 int idx = vertexNo * 4; 00512 00513 // Fetch data from texture linear memories 00514 float4 pos_j = tex1Dfetch( CudaTexVertexList, vertexNo ); 00515 uint1 simFlags = tex1Dfetch( CudaTexSimFlagsList, vertexNo ); 00516 float4 posConstraint = tex1Dfetch( CudaTexPosConstraintList, vertexNo ); 00517 00518 bool bDefault = false; 00519 00520 // If the suture vertex is not fixed 00521 if ( pos_j.w == 1.0f ) { 00522 00523 // Values of simulation flags (declared in TAPsEnumList.hpp): 00524 // SIMULATION CONSTRAINTS 00525 // enum SimConstraints { 00526 // CLEARED = 0, 00527 // FIXED = 1, 00528 // ATTACHED = 1 << 1, 00529 // PUNCTURED = 1 << 2, 00530 // SLIDABLE = 1 << 3, 00531 // DUMMY 00532 // }; 00533 00534 // CLEARED 00535 if ( simFlags.x == 0 ) { 00536 bDefault = true; 00537 } 00538 // FIXED 00539 else if ( simFlags.x == 1 ) { 00540 bDefault = true; 00541 } 00542 // ATTACHED 00543 else if ( simFlags.x == 2 ) { 00544 bDefault = true; 00545 } 00546 // PUNCTURED (n/a) 00547 //else if ( simFlags.x == 4 ) { 00548 //} 00549 // SLIDABLE 00550 else if ( simFlags.x == 8 ) { 00551 //else if ( false ) { 00552 if ( 0 < vertexNo && vertexNo < numOfVertices-2 ) { 00553 int i = vertexNo-1; 00554 int k = vertexNo+1; 00555 float4 pos_i = tex1Dfetch( CudaTexVertexList, i ); 00556 float4 pos_k = tex1Dfetch( CudaTexVertexList, k ); 00557 float3 linkA = make_float3( pos_i.x - pos_j.x, pos_i.y - pos_j.y, pos_i.z - pos_j.z ); 00558 float3 linkB = make_float3( pos_k.x - pos_j.x, pos_k.y - pos_j.y, pos_k.z - pos_j.z ); 00559 float linkAlen = sqrt( linkA.x*linkA.x + linkA.y*linkA.y + linkA.z*linkA.z ); 00560 float linkBlen = sqrt( linkB.x*linkB.x + linkB.y*linkB.y + linkB.z*linkB.z ); 00561 00562 float threshold = 1.025f; 00563 //float threshold = 2.0f; 00564 00565 // Shift up 00566 if ( linkAlen > linkBlen*threshold ) { 00567 //uint1 simFlags_up = tex1Dfetch( CudaTexSimFlagsList, vertexNo+1 ); 00568 // if simFlags of the right vertex is cleared 00569 //if ( simFlags_up.x == 0 ) { 00570 out_data_SimFlagsList[vertexNo] = simFlags.x; 00571 out_data_PosConstraintList[idx ] = posConstraint.x; 00572 out_data_PosConstraintList[idx+1] = posConstraint.y; 00573 out_data_PosConstraintList[idx+2] = posConstraint.z; 00574 out_data_PosConstraintList[idx+3] = posConstraint.w + TAPs_Const_Suture_ShiftUp; // +20 to signify that the strand vertex must be shifted up 00575 //} 00576 //else { 00577 // bDefault = true; 00578 //} 00579 } 00580 // Shift down 00581 else if ( linkBlen > linkAlen*threshold ) { 00582 //uint1 simFlags_down = tex1Dfetch( CudaTexSimFlagsList, vertexNo-1 ); 00583 // if simFlags of the left vertex is cleared 00584 //if ( simFlags_down.x == 0 ) { 00585 out_data_SimFlagsList[vertexNo] = simFlags.x; 00586 out_data_PosConstraintList[idx ] = posConstraint.x; 00587 out_data_PosConstraintList[idx+1] = posConstraint.y; 00588 out_data_PosConstraintList[idx+2] = posConstraint.z; 00589 out_data_PosConstraintList[idx+3] = posConstraint.w + TAPs_Const_Suture_ShiftDown; // +10 to signify that the strand vertex must be shifted down 00590 //} 00591 //else { 00592 // bDefault = true; 00593 //} 00594 } 00595 else { 00596 bDefault = true; 00597 } 00598 } 00599 else { 00600 bDefault = true; 00601 } 00602 } 00603 // DEFAULT CASE 00604 else { 00605 bDefault = true; 00606 } 00607 } 00608 00609 // If the suture vertex is fixed 00610 else { 00611 bDefault = true; 00612 } 00613 00614 if ( bDefault ) { 00615 out_data_SimFlagsList[vertexNo] = simFlags.x; 00616 out_data_PosConstraintList[idx ] = posConstraint.x; 00617 out_data_PosConstraintList[idx+1] = posConstraint.y; 00618 out_data_PosConstraintList[idx+2] = posConstraint.z; 00619 out_data_PosConstraintList[idx+3] = posConstraint.w; 00620 } 00621 } 00622 // Synchronize to make sure the data are loaded 00623 //__syncthreads(); 00624 break; 00625 } 00626 } 00627 00628 00629 // CUDA Wrapper Function -- For ModelStrand AdvSim Function 00630 void Global__ModelStrand_AdvSim ( 00631 unsigned int cudaID, 00632 unsigned int numOfVertices, 00633 unsigned int numOfThreads, 00634 float currentTime, 00635 float timeStep, 00636 int numOfSubSteps, 00637 float ptMass, 00638 float Ks, 00639 float Kd, 00640 float Lrest, 00641 float * host_vertex_data, 00642 float * host_prev_vertex_data 00643 //float * host_home_vertex_data, //!< host's home vertex data 00644 ) 00645 { 00646 //printf( "FILE:%s LINE:%d Global__ModelStrand_AdvSim\n", __FILE__, __LINE__ ); 00647 //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep ); 00648 //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd ); 00649 //printf( "Number of vertices: %d\n", numOfVertices ); 00650 //printf( "Number of threads: %d\n", numOfThreads ); 00651 //printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads ); 00652 00653 // Allocate device memory for out data 00654 unsigned int size = sizeof(float) * numOfVertices * 4; 00655 float * out_data; 00656 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) ); 00657 00658 // Copy Vertex List (and Previous Vertex List) from host data to device data, 00659 // since vertices are dynamically changed. 00660 // While home vertices and connection list are unchanged. 00661 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) ); 00662 // The Previous Vertex List can be saved in CUDA memory. 00663 // Hence, the Previous Vertex List does not have to be updated. 00664 //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) ); 00665 00666 // Kernel invocation 00667 unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads; 00668 dim3 Dg( numOfThreadBlocks, 1, 1 ); 00669 dim3 Db( numOfThreads, 1, 1 ); 00670 size_t Ns = 0; 00671 cudaStream_t S = 0; 00672 00673 // Bind Textures 00674 DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices ); 00675 DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices ); 00676 //DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices ); 00677 00678 //* 00679 int CHOICE = 1; 00680 00681 // MASS SPRING SYSTEM 00682 if ( CHOICE == 1 ) { 00683 float subTimeStep = timeStep / numOfSubSteps; 00684 for ( int i = 0; i < numOfSubSteps; ++i ) { 00685 // Call the CUDA kernel (Choice 1 -- Mass Spring System) 00686 Global__ModelStrand_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 00687 numOfVertices, numOfThreads, 00688 currentTime, subTimeStep, 00689 ptMass, Ks, Kd, Lrest, 00690 out_data 00691 ); 00692 // Swap pointers for Vertex List and Previous Vertex List 00693 // So that the Previous Vertex List is remain unchanged in the CUDA memory. 00694 DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList(); 00695 00696 // Copy data to the device's memory for (current) Vertex List 00697 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) ); 00698 currentTime += subTimeStep; 00699 } 00700 } 00701 //*/ 00702 00703 // Unbind Textures 00704 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList(); 00705 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList(); 00706 //DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList(); 00707 00708 // Check if kernel execution generated and error 00709 CUT_CHECK_ERROR( "Kernel execution failed!" ); 00710 00711 // Copy output data to host data Previous Vertex List 00712 // The host will have to swap its pointers (for current and previous vertex list) 00713 if ( CHOICE > 0 ) { 00714 CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) ); 00715 } 00716 00717 //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data ); 00718 00719 // Free device memory 00720 CUDA_SAFE_CALL( cudaFree( out_data ) ); 00721 } 00722 00723 00724 // CUDA Wrapper Function -- For ModelStrand AdvSim Function 00725 // with TAPs_ADVANCED_SIMULATION 00726 void Global__ModelStrand_AdvSim_ADVSIM ( 00727 unsigned int cudaID, 00728 unsigned int numOfVertices, 00729 unsigned int numOfThreads, 00730 float currentTime, 00731 float timeStep, 00732 int numOfSubSteps, 00733 float ptMass, 00734 float Ks, 00735 float Kd, 00736 float Lrest, 00737 float * host_vertex_data, 00738 float * host_prev_vertex_data, 00739 unsigned int * host_sim_flags_data, 00740 float * host_pos_constraint_data 00741 ) 00742 { 00743 printf( "FILE:%s LINE:%d Global__ModelStrand_AdvSim_ADVSIM\n", __FILE__, __LINE__ ); 00744 printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep ); 00745 printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd ); 00746 printf( "Number of vertices: %d\n", numOfVertices ); 00747 printf( "Number of threads: %d\n", numOfThreads ); 00748 printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads ); 00749 00750 // Allocate device memory for out data 00751 unsigned int size = sizeof(float) * numOfVertices * 4; 00752 float * out_data; 00753 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) ); 00754 00755 // Allocate device memory for out data for changing suture's simulation flags list 00756 unsigned int size_SimFlagsList = sizeof(unsigned int) * numOfVertices; 00757 unsigned int * out_data_SimFlagsList; 00758 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_SimFlagsList, size_SimFlagsList ) ); 00759 00760 // Allocate device memory for out data for changing suture's position constraint list 00761 float * out_data_PosConstraintList; 00762 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_PosConstraintList, size ) ); 00763 00764 // Copy Vertex List (and Previous Vertex List) from host data to device data, 00765 // since vertices are dynamically changed. 00766 // While home vertices and connection list are unchanged. 00767 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) ); 00768 // The Previous Vertex List can be saved in CUDA memory. 00769 // Hence, the Previous Vertex List does not have to be updated. 00770 //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) ); 00771 00772 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetSimFlagsList(), host_sim_flags_data, sizeof(unsigned int)*numOfVertices, cudaMemcpyHostToDevice ) ); 00773 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPosConstraintList(), host_pos_constraint_data, size, cudaMemcpyHostToDevice ) ); 00774 00775 // Kernel invocation 00776 unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads; 00777 dim3 Dg( numOfThreadBlocks, 1, 1 ); 00778 dim3 Db( numOfThreads, 1, 1 ); 00779 size_t Ns = 0; 00780 cudaStream_t S = 0; 00781 00782 // Bind Textures 00783 DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices ); 00784 DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices ); 00785 DATA_GlobalPool.DataForVertexList[cudaID]->BindSimFlagsList( numOfVertices ); 00786 DATA_GlobalPool.DataForVertexList[cudaID]->BindPosConstraintList( numOfVertices ); 00787 00788 //* 00789 // Add 10 for TAPs_ADVANCED_SIMULATION 00790 int CHOICE = 100 + 1; 00791 00792 // MASS SPRING SYSTEM 00793 if ( CHOICE == 101 ) { 00794 float subTimeStep = timeStep / numOfSubSteps; 00795 for ( int i = 0; i < numOfSubSteps; ++i ) { 00796 //printf( "delta time: %g\n", timeStep/numOfSubSteps ); 00797 00798 // Call the CUDA kernel (Choice 100+1 -- Mass Spring System) 00799 // with TAPs_ADVANCED_SIMULATION 00800 Global__ModelStrand_AdvSim_CU<101><<< Dg, Db, Ns, S >>>( 00801 numOfVertices, numOfThreads, 00802 currentTime, subTimeStep, 00803 ptMass, Ks, Kd, Lrest, 00804 out_data 00805 ); 00806 // Swap pointers for Vertex List and Previous Vertex List 00807 // So that the Previous Vertex List is remain unchanged in the CUDA memory. 00808 DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList(); 00809 00810 // Copy data to the device's memory for (current) Vertex List 00811 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) ); 00812 currentTime += subTimeStep; 00813 } 00814 00815 // Call the CUDA kernel to enforce constraints 00816 // with TAPs_ADVANCED_SIMULATION 00817 Global__ModelStrand_AdvSim_Enforce_Constraint_CU<101><<< Dg, Db, Ns, S >>>( 00818 numOfVertices, numOfThreads, 00819 currentTime, subTimeStep, 00820 ptMass, Ks, Kd, Lrest, 00821 out_data_SimFlagsList, 00822 out_data_PosConstraintList 00823 ); 00824 } 00825 //*/ 00826 00827 // Unbind Textures 00828 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList(); 00829 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList(); 00830 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindSimFlagsList(); 00831 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPosConstraintList(); 00832 00833 // Check if kernel execution generated and error 00834 CUT_CHECK_ERROR( "Kernel execution failed!" ); 00835 00836 // Copy output data to host data Previous Vertex List 00837 // The host will have to swap its pointers (for current and previous vertex list) 00838 if ( CHOICE > 0 ) { 00839 CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) ); 00840 CUDA_SAFE_CALL( cudaMemcpy( host_sim_flags_data, out_data_SimFlagsList, size_SimFlagsList, cudaMemcpyDeviceToHost ) ); 00841 CUDA_SAFE_CALL( cudaMemcpy( host_pos_constraint_data, out_data_PosConstraintList, size, cudaMemcpyDeviceToHost ) ); 00842 } 00843 00844 /* 00845 // DEBUG 00846 { 00847 float * data = (float *)malloc( size ); 00848 CUDA_SAFE_CALL( cudaMemcpy( data, out_data, size, cudaMemcpyDeviceToHost ) ); 00849 for ( int i = 0, n = 0; i < 2; ++i, n+=4 ) { 00850 printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] ); 00851 } 00852 for ( int i = numOfVertices-2, n = i*4; i < numOfVertices; ++i, n+=4 ) { 00853 printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] ); 00854 } 00855 free( data ); 00856 } 00857 */ 00858 00859 DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, NULL, NULL, host_sim_flags_data, host_pos_constraint_data ); 00860 00861 // Free device memory 00862 CUDA_SAFE_CALL( cudaFree( out_data ) ); 00863 CUDA_SAFE_CALL( cudaFree( out_data_SimFlagsList ) ); 00864 CUDA_SAFE_CALL( cudaFree( out_data_PosConstraintList ) ); 00865 } 00866 00867 //----------------------------------------------------------------------------- 00868 // For Model Suture 00869 //============================================================================= 00870 00871 00872 //============================================================================= 00873 // For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function 00874 //----------------------------------------------------------------------------- 00875 00876 // CUDA Kernel -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function 00877 template <int COMPUTATION_CHOICE> 00878 __global__ 00879 void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU ( 00880 int numOfVertices, 00881 int numOfThreads, 00882 float currentTime, 00883 float timeStep, 00884 float * device_data, 00885 float * out_data 00886 ) 00887 { 00888 // Block index 00889 int bx = blockIdx.x; 00890 00891 // Thread index 00892 int tx = threadIdx.x; 00893 00894 // Index 00895 int idx = (numOfThreads * bx + tx) * 3; 00896 00897 // Use device shared memory to load the vertex position from device global memory 00898 /*__shared__*/ float vertex_pos[3]; 00899 //vertex_pos = device_data[idx]; 00900 vertex_pos[0] = device_data[idx ]; 00901 vertex_pos[1] = device_data[idx+1]; 00902 vertex_pos[2] = device_data[idx+2]; 00903 00904 // Synchronize to make sure the data are loaded 00905 __syncthreads(); 00906 00907 //out_data[idx ] = vertex_pos[idx ]; 00908 //out_data[idx+1] = vertex_pos[idx+1]; 00909 //out_data[idx+2] = vertex_pos[idx+2]; 00910 //out_data[idx ] = 0.0f; 00911 //out_data[idx+1] = 0.0f; 00912 //out_data[idx+2] = 0.0f; 00913 00914 float new_pos[3]; 00915 //new_pos = vertex_pos + 0.0001f; 00916 new_pos[0] = vertex_pos[0] + 0.0001f; 00917 new_pos[1] = vertex_pos[1]; 00918 new_pos[2] = vertex_pos[2]; 00919 00920 //__syncthreads(); 00921 00922 //out_data[idx] = new_pos; 00923 out_data[idx ] = new_pos[0]; 00924 out_data[idx+1] = new_pos[1]; 00925 out_data[idx+2] = new_pos[2]; 00926 00927 //out_data[idx ] = 0.0f; 00928 //out_data[idx+1] = 0.0f; 00929 //out_data[idx+2] = 0.0f; 00930 00931 //device_data[idx ] = device_data[idx ] + 0.0001f; 00932 //device_data[idx+1] = device_data[idx+1]; 00933 //device_data[idx+2] = device_data[idx+2]; 00934 00935 } 00936 00937 00938 // CUDA Wrapper Function -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function 00939 void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim ( 00940 int numOfVertices, 00941 int numOfThreads, 00942 float currentTime, 00943 float timeStep, 00944 float * host_data 00945 ) 00946 { 00947 /* 00948 // CUDA device properties 00949 int dev_no; 00950 CUDA_SAFE_CALL( cudaGetDevice( &dev_no ) ); 00951 cudaDeviceProp * prop; 00952 CUDA_SAFE_CALL( cudaGetDeviceProperties( prop, dev_no ) ); 00953 //printf( "maxThreadsPerBlock: %d\n", prop->maxThreadsPerBlock ); 00954 */ 00955 00956 // Kernel invocation 00957 unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads; 00958 dim3 Dg( numOfThreadBlocks, 1, 1 ); 00959 dim3 Db( numOfThreads, 1, 1 ); 00960 size_t Ns = 0; 00961 cudaStream_t S = 0; 00962 00963 printf( "Number of vertices: %d\n", numOfVertices ); 00964 printf( "Number of threads: %d\n", numOfThreads ); 00965 printf( "Number of grids: %d\n", numOfVertices/numOfThreads ); 00966 00967 // Allocate device memory 00968 unsigned int size = sizeof(float) * numOfVertices * 3; 00969 float * device_data; 00970 CUDA_SAFE_CALL( cudaMalloc( (void **)&device_data, size ) ); 00971 float * out_data; 00972 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) ); 00973 00974 // Copy host data to device data 00975 CUDA_SAFE_CALL( cudaMemcpy( device_data, host_data, size, cudaMemcpyHostToDevice ) ); 00976 00977 // Call the CUDA kernel 00978 Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU<1><<< Dg, Db, Ns, S >>> 00979 ( numOfVertices, numOfThreads, currentTime, timeStep, device_data, out_data ); 00980 00981 // Check if kernel execution generated and error 00982 CUT_CHECK_ERROR( "Kernel execution failed" ); 00983 00984 // Copy output data to host data 00985 CUDA_SAFE_CALL( cudaMemcpy( host_data, out_data, size, cudaMemcpyDeviceToHost ) ); 00986 00987 00988 // Free device memory 00989 CUDA_SAFE_CALL( cudaFree( device_data ) ); 00990 CUDA_SAFE_CALL( cudaFree( out_data ) ); 00991 } 00992 00993 //----------------------------------------------------------------------------- 00994 // For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function 00995 //============================================================================= 00996 00997 00998 //============================================================================= 00999 // For HETriMeshOneModelMultiParts AdvSim Function 01000 //----------------------------------------------------------------------------- 01001 01002 // CUDA Kernel -- For HETriMeshOneModelMultiParts AdvSim Function 01003 __device__ 01004 float3 Device__CalSpringForce_VertexList ( 01005 float Ks, 01006 float Kd, 01007 float HKs, 01008 float HKd, 01009 int vertexNo, 01010 unsigned int max_connection_size 01011 ) 01012 { 01013 float3 total_force = make_float3( 0.0f, 0.0f, 0.0f ); 01014 float4 HX1 = tex1Dfetch( CudaTexHomeVertexList, vertexNo ); 01015 float4 X1 = tex1Dfetch( CudaTexVertexList, vertexNo ); 01016 float4 V1 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f ); 01017 float4 HX2; 01018 float4 X2; 01019 float4 V2 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f ); 01020 float3 force; 01021 float rest; 01022 int1 connectedVertexNo; 01023 int idx = vertexNo * max_connection_size; 01024 01025 // The recommended way to round a single-precision floating-point operand to an 01026 // integer, with the result being a single-precision floating-point number is rintf(), 01027 // not roundf(). 01028 01029 //* 01030 for ( int i = 0; i < max_connection_size; ++i, ++idx ) { 01031 connectedVertexNo = tex1Dfetch( CudaTexVertexConnectionList, idx ); 01032 if ( connectedVertexNo.x >= 0 ) { 01033 HX2 = tex1Dfetch( CudaTexHomeVertexList, connectedVertexNo.x ); 01034 X2 = tex1Dfetch( CudaTexVertexList, connectedVertexNo.x ); 01035 01036 float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z ); 01037 rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z ); 01038 01039 force = Device__CalSpringForce( 01040 Ks, 01041 Kd, 01042 rest, 01043 X1, 01044 X2, 01045 V1, 01046 V2 01047 ); 01048 total_force.x += force.x; 01049 total_force.y += force.y; 01050 total_force.z += force.z; 01051 } 01052 } 01053 //*/ 01054 01055 //* 01056 // Add force from Home Spring 01057 force = Device__CalSpringForce( 01058 HKs, 01059 HKd, 01060 0.0f, 01061 X1, 01062 HX1, 01063 V1, 01064 V2 01065 ); 01066 total_force.x += force.x; 01067 total_force.y += force.y; 01068 total_force.z += force.z; 01069 //*/ 01070 01071 return total_force; 01072 } 01073 01074 01075 // CUDA Kernel -- For HETriMeshOneModelMultiParts AdvSim Function 01076 template <int COMPUTATION_CHOICE> 01077 __global__ 01078 void Global__HETriMeshOneModelMultiParts_AdvSim_CU ( 01079 unsigned int numOfVertices, 01080 unsigned int numOfThreads, 01081 float currentTime, 01082 float timeStep, 01083 float ptMass, 01084 float Ks, 01085 float Kd, 01086 float HKs, 01087 float HKd, 01088 unsigned int max_connection_size, 01089 float * out_data 01090 ) 01091 { 01092 01093 switch ( COMPUTATION_CHOICE ) { 01094 01095 // Emulate Viscoelastic by home vertex positions 01096 case 1: 01097 { 01098 // Block index 01099 int bx = blockIdx.x; 01100 01101 // Thread index 01102 int tx = threadIdx.x; 01103 01104 // Indices 01105 int idx = (numOfThreads * bx + tx) * 4; 01106 int vertexNo = (numOfThreads * bx + tx); 01107 01108 // Fetch data from texture linear memory 01109 float4 homeVertexPos = tex1Dfetch( CudaTexHomeVertexList, vertexNo ); 01110 float4 vertex_pos = tex1Dfetch( CudaTexVertexList, vertexNo ); 01111 01112 float3 len; 01113 len.x = homeVertexPos.x - vertex_pos.x; 01114 len.y = homeVertexPos.y - vertex_pos.y; 01115 len.z = homeVertexPos.z - vertex_pos.z; 01116 01117 len.x *= timeStep; 01118 len.y *= timeStep; 01119 len.z *= timeStep; 01120 01121 float3 new_pos; 01122 new_pos.x = vertex_pos.x + len.x; 01123 new_pos.y = vertex_pos.y + len.y; 01124 new_pos.z = vertex_pos.z + len.z; 01125 01126 //out_data[idx] = new_pos; 01127 out_data[idx ] = new_pos.x; 01128 out_data[idx+1] = new_pos.y; 01129 out_data[idx+2] = new_pos.z; 01130 out_data[idx+3] = vertex_pos.w; 01131 } 01132 break; 01133 01134 // Emulate Viscoelastic by mass spring connections 01135 case 2: 01136 { 01137 // Block index 01138 int bx = blockIdx.x; 01139 01140 // Thread index 01141 int tx = threadIdx.x; 01142 01143 // Vertex number 01144 int vertexNo = (numOfThreads * bx + tx); 01145 if ( vertexNo >= numOfVertices ) return; 01146 01147 // Indix for output 01148 int idx = (numOfThreads * bx + tx) * 4; 01149 01150 // Fetch data from texture linear memory 01151 float4 current_pos = tex1Dfetch( CudaTexVertexList, vertexNo ); 01152 float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo ); 01153 01154 // Synchronize to make sure the data are loaded 01155 //__syncthreads(); 01156 01157 float3 force = Device__CalSpringForce_VertexList( 01158 Ks, 01159 Kd, 01160 HKs, 01161 HKd, 01162 vertexNo, 01163 max_connection_size 01164 ); 01165 01166 // Convert force to acceleration 01167 float invMass = 1.0f / ptMass; 01168 float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f ); 01169 float dt_sqrt = timeStep*timeStep; 01170 01171 float3 new_pos = Device__VerletIntegration( 01172 acceleration, 01173 current_pos, 01174 previous_pos, 01175 dt_sqrt, 01176 Kd 01177 ); 01178 01179 // WARNING: out data have to be set at the end, otherwise it won't work!!! ??? 01180 out_data[idx ] = new_pos.x; 01181 out_data[idx+1] = new_pos.y; 01182 out_data[idx+2] = new_pos.z; 01183 out_data[idx+3] = current_pos.w; 01184 } 01185 break; 01186 } 01187 } 01188 01189 01190 // CUDA Wrapper Function -- For HETriMeshOneModelMultiParts AdvSim Function 01191 void Global__HETriMeshOneModelMultiParts_AdvSim ( 01192 unsigned int cudaID, 01193 unsigned int numOfVertices, 01194 unsigned int numOfThreads, 01195 float currentTime, 01196 float timeStep, 01197 int numOfSubSteps, 01198 float ptMass, 01199 float Ks, 01200 float Kd, 01201 float HKs, 01202 float HKd, 01203 float * host_vertex_data, 01204 float * host_prev_vertex_data, 01205 float * host_home_vertex_data, 01206 int * host_connection_list, 01207 unsigned int max_connection_size 01208 ) 01209 { 01210 //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep ); 01211 //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd ); 01212 01213 //printf( "Number of vertices: %d\n", numOfVertices ); 01214 //printf( "Number of threads: %d\n", numOfThreads ); 01215 //printf( "Number of grids: %d\n", numOfVertices/numOfThreads ); 01216 01217 // Allocate device memory for out data 01218 unsigned int size = sizeof(float) * numOfVertices * 4; 01219 float * out_data; 01220 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) ); 01221 01222 // Copy Vertex List (and Previous Vertex List) from host data to device data, 01223 // since vertices are dynamically changed. 01224 // While home vertices and connection list are unchanged. 01225 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) ); 01226 // The Previous Vertex List can be saved in CUDA memory. 01227 // Hence, the Previous Vertex List does not have to be updated. 01228 //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) ); 01229 01230 // Kernel invocation 01231 unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads; 01232 dim3 Dg( numOfThreadBlocks, 1, 1 ); 01233 dim3 Db( numOfThreads, 1, 1 ); 01234 size_t Ns = 0; 01235 cudaStream_t S = 0; 01236 01237 // Bind Textures 01238 DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices ); 01239 DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices ); 01240 DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices ); 01241 DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexConnectionList( numOfVertices, max_connection_size ); 01242 01243 int CHOICE = 2; 01244 01245 // HOME VERTEX 01246 if ( CHOICE == 1 ) { 01247 // Call the CUDA kernel (Choice 1 -- Home Vertex Position Only) 01248 Global__HETriMeshOneModelMultiParts_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 01249 numOfVertices, numOfThreads, 01250 currentTime, timeStep, 01251 ptMass, Ks, Kd, HKs, HKd, 01252 max_connection_size, 01253 out_data 01254 ); 01255 // Swap pointers for Vertex List and Previous Vertex List 01256 // So that the Previous Vertex List is remain unchanged in the CUDA memory. 01257 DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList(); 01258 } 01259 01260 // MASS SPRING SYSTEM 01261 else if ( CHOICE == 2 ) { 01262 float subTimeStep = timeStep / numOfSubSteps; 01263 for ( int i = 0; i < numOfSubSteps; ++i ) { 01264 //printf( "delta time: %g\n", timeStep/numOfSubSteps ); 01265 01266 // Call the CUDA kernel (Choice 2 -- Mass Spring System) 01267 Global__HETriMeshOneModelMultiParts_AdvSim_CU<2><<< Dg, Db, Ns, S >>>( 01268 numOfVertices, numOfThreads, 01269 currentTime, subTimeStep, 01270 ptMass, Ks, Kd, HKs, HKd, 01271 max_connection_size, 01272 out_data 01273 ); 01274 // Swap pointers for Vertex List and Previous Vertex List 01275 // So that the Previous Vertex List is remain unchanged in the CUDA memory. 01276 DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList(); 01277 01278 // Copy data to the device's memory for (current) Vertex List 01279 CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) ); 01280 currentTime += subTimeStep; 01281 } 01282 } 01283 01284 // Unbind Textures 01285 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList(); 01286 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList(); 01287 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList(); 01288 DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexConnectionList(); 01289 01290 // Check if kernel execution generated and error 01291 CUT_CHECK_ERROR( "Kernel execution failed!" ); 01292 01293 // Copy output data to host data Previous Vertex List 01294 // The host will have to swap its pointers (for current and previous vertex list) 01295 if ( CHOICE > 0 ) { 01296 CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) ); 01297 } 01298 01299 //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, host_prev_vertex_data, host_home_vertex_data ); 01300 //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug_ConnectionList( numOfVertices, max_connection_size, host_connection_list ); 01301 01302 // Free device memory 01303 CUDA_SAFE_CALL( cudaFree( out_data ) ); 01304 } 01305 01306 //----------------------------------------------------------------------------- 01307 // For HETriMeshOneModelMultiParts AdvSim Function 01308 //============================================================================= 01309 01310 //----------------------------------------------------------------------------- 01311 //============================================================================= 01312 END_NAMESPACE_TAPs__CUDA 01313 //----------------------------------------------------------------------------- 01314 //34567890123456789012345678901234567890123456789012345678901234567890123456789 01315 //--+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----