TAPs 0.7.7.3
TAPsCUDA_VertexListMSM_Def.cu
Go to the documentation of this file.
00001 /******************************************************************************
00002 TAPsCUDA_VertexListMSM_Def.cu
00003 
00004 CUDA Definition (i.e., cpp file).
00005 
00006 SUKITTI PUNAK   (08/27/2008)
00007 UPDATE          (09/21/2009)
00008 ******************************************************************************/
00009 
00010 #include "TAPsCUDA_VertexListMSM.cu"
00011 
00012 BEGIN_NAMESPACE_TAPs__CUDA
00013 //=============================================================================
00014 //-----------------------------------------------------------------------------
00015 
00016 //=============================================================================
00017 // Interface Functions
00018 //-----------------------------------------------------------------------------
00019 
00020 // Initialize CUDA Data for a Mesh Model by Using Vertex List
00021 bool InitailizeDataForVertexList ( 
00022     unsigned int & cudaID,              
00023     unsigned int numOfVertices,         
00024     unsigned int max_connection_size,   
00025     float * vertexList,                 
00026     float * prevVertexList,             
00027     float * homeVertexList,             
00028     int *   vertexConnectionList        
00029 )
00030 {
00031     //printf( "InitailizeDataForVertexList\n" );
00032     //fflush( stdout );
00033 
00034     // Assign CUDA ID to the object
00035     // The object has to use cudaID to communicate with the CUDA.
00036     cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;
00037 
00038     // Allocate CUDA data for the object.
00039     DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, true, max_connection_size, false );
00040     AddToGlobal_Pool_Of_DATA_Vertex_List( newData );
00041 
00042     // Size of memory for (xyzw) vertices
00043     int size = numOfVertices * 4 * sizeof(float);
00044 
00045     // Copy Vertex List from host data to device data
00046     CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
00047     // Copy Previous Vertex List from host data to device data
00048     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
00049     // Copy Home Vertex List from host data to device data
00050     CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
00051     // Copy Vertex Connection List from host data to device data
00052     CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) );
00053 
00054     return true;
00055 }
00056 
00057 
00058 //--- Suture & Strand Model ---------------------------------------------------
00059 // Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture)
00060 bool InitailizeDataForSutureModel ( 
00061     unsigned int & cudaID,          
00062     unsigned int numOfVertices,     
00063     float * vertexList,             
00064     float * prevVertexList          
00065 )
00066 {
00067     // Here Suture Model uses Vertex List Data
00068     // However, it has implicit connection (each vertex connect to the previous and next vertices).
00069     // So the connection list is set to zero.
00070 
00071     // Home vertex is unused for now.  Plan is to use home vertex for sticking suture to a surface.
00072 
00073     // Assign CUDA ID to the object
00074     // The object has to use cudaID to communicate with the CUDA.
00075     cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;
00076 
00077     // Allocate CUDA data for the object.
00078     DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, false, 0, true );
00079     AddToGlobal_Pool_Of_DATA_Vertex_List( newData );
00080 
00081     // Size of memory for (xyzw) vertices
00082     int size = numOfVertices * 4 * sizeof(float);
00083 
00084     // Copy Vertex List from host data to device data
00085     CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
00086     // Copy Previous Vertex List from host data to device data
00087     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
00088     // Copy Home Vertex List from host data to device data
00089     //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
00090     // Copy Vertex Connection List from host data to device data
00092 
00093     return true;
00094 }
00095 
00096 
00097 //--- Suture & Strand Model ---------------------------------------------------
00098 // Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture)
00099 bool InitailizeDataForSutureModel_ADVSIM ( 
00100     unsigned int & cudaID,          
00101     unsigned int numOfVertices,     
00102     float * vertexList,             
00103     float * prevVertexList,         
00104     unsigned int * simFlagsList,    
00105     float * posConstraintList       
00106 )
00107 {
00108     // Here Suture Model uses Vertex List Data
00109     // However, it has implicit connection (each vertex connect to the previous and next vertices).
00110     // So the connection list is set to zero.
00111 
00112     // Home vertex is unused for now.  Plan is to use home vertex for sticking suture to a surface.
00113 
00114     // Assign CUDA ID to the object
00115     // The object has to use cudaID to communicate with the CUDA.
00116     cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;
00117 
00118     // Allocate CUDA data for the object.
00119     DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, false, 0, true );
00120     AddToGlobal_Pool_Of_DATA_Vertex_List( newData );
00121 
00122     // Size of memory for (xyzw) vertices
00123     int size = numOfVertices * 4 * sizeof(float);
00124 
00125     // Copy Vertex List from host data to device data
00126     CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
00127     // Copy Previous Vertex List from host data to device data
00128     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
00129     // Copy Home Vertex List from host data to device data
00130     //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
00131     // Copy Vertex Connection List from host data to device data
00132     //CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) );
00133     // Copy Simulation Flags List from host data to device data
00134     CUDA_SAFE_CALL( cudaMemcpy( newData->GetSimFlagsList(), simFlagsList, numOfVertices*sizeof(unsigned int), cudaMemcpyHostToDevice ) );
00135     // Copy Position Constraint List from host data to device data
00136     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPosConstraintList(), posConstraintList, size, cudaMemcpyHostToDevice ) );
00137 
00138     return true;
00139 }
00140 
00141 
00142 
00143 //-----------------------------------------------------------------------------
00144 // Interface Functions
00145 //=============================================================================
00146 
00147 
00148 
00149 
00150 //=============================================================================
00151 // Device Functions
00152 //-----------------------------------------------------------------------------
00153 
00154 // Calculate a spring force
00155 __device__
00156 float3 Device__CalSpringForce ( 
00157     float Ks,       
00158     float Kd,       
00159     float rest,     
00160     float4 X1,      
00161     float4 X2,      
00162     float4 V1,      
00163     float4 V2       
00164 )
00165 {
00166     // Formula from Physically Based Modeling (Siggraph 2001 Course Note)
00167     // F1 = -{Ks(l - r) + Kd[(V1-V2)*L]/l}*L/l
00168     // F2 = -F1
00169     //   where Ks = spring stiffness, Kd = spring damping,
00170     //     V1 and V2 are velocity of particles linked by the spring
00171     //     r = spring rest length
00172     //     L = vector of difference of positions of particle#1 and #2
00173     //     l = magnitude of L
00174     //return ( ( m_tK * (scl - m_tL) ) + ( m_tD * V * cL ) ) * cL;
00175 
00176     float3 Xdif = make_float3( X1.x-X2.x, X1.y-X2.y, X1.z-X2.z );
00177     float3 Vdif = make_float3( V1.x-V2.x, V1.y-V2.y, V1.z-V2.z );
00178 
00179     //float len = length( Diff );   // length is an inline function in CUDA (cutil_math.h)
00180     float len = sqrtf( Xdif.x*Xdif.x + Xdif.y*Xdif.y + Xdif.z*Xdif.z );
00181     if ( len < 1.0E-16f ) {
00182         return make_float3( 0.0f, 0.0f, 0.0f );
00183     }
00184     float invLen = 1.0f / len;
00185     float3 Dir = make_float3( Xdif.x*invLen, Xdif.y*invLen, Xdif.z*invLen );
00186 
00187     float Vdif_Dir = Vdif.x*Dir.x + Vdif.y*Dir.y + Vdif.z*Dir.z;
00188 
00189     float fmag = -( Ks*(len-rest) + Kd*Vdif_Dir );
00190     float3 force = make_float3( fmag*Dir.x, fmag*Dir.y, fmag*Dir.z );
00191 
00192     return force;
00193 }
00194 
00195 
00197 __device__
00198 float3 Device__VerletIntegration ( 
00199     float4 A1,      
00200     float4 X1,      
00201     float4 X0,      
00202     float  dt_sqrt, 
00203     float damper    
00204 )
00205 {
00206     // F = ma; a = F/m;
00207     // x(t+dt) = 2x(t) - x(t-dt) + a(t)*{dt}^2 + O({dt}^4)
00208     float3 new_pos;
00209     new_pos.x = 2.0f*X1.x - X0.x + A1.x*dt_sqrt - damper*(X1.x - X0.x);
00210     new_pos.y = 2.0f*X1.y - X0.y + A1.y*dt_sqrt - damper*(X1.y - X0.y);
00211     new_pos.z = 2.0f*X1.z - X0.z + A1.z*dt_sqrt - damper*(X1.z - X0.z);
00212     return new_pos;
00213 }
00214 
00215 
00216 // CUDA Wrapper Function -- For ModelStrand AdvSim Function
00217 // with TAPs_ADVANCED_SIMULATION
00218 // return enforced position
00219 __device__
00220 float3 Device__EnforceConstraints_ADVSIM ( 
00221     float3  vertexA,    
00222     float4  vertexB     
00223 )
00224 {
00225     float one_ratio = 1.0f - vertexB.w;
00226     float3 enforced_pos;
00227     enforced_pos.x = vertexB.w*vertexA.x + one_ratio*vertexB.x;
00228     enforced_pos.y = vertexB.w*vertexA.y + one_ratio*vertexB.y;
00229     enforced_pos.z = vertexB.w*vertexA.z + one_ratio*vertexB.z;
00230     return enforced_pos;
00231 }
00232 
00233 
00234 //-----------------------------------------------------------------------------
00235 // Device Functions
00236 //=============================================================================
00237 
00238 
00239 
00240 
00241 //=============================================================================
00242 // For Model Suture
00243 //-----------------------------------------------------------------------------
00244 
00245 // CUDA Kernel -- For ModelStrand AdvSim Function
00246 __device__
00247 float3 Device__CalSpringForce_ModelStrand ( 
00248     float Ks,           
00249     float Kd,           
00250     float Lrest,        
00251     int vertexNo,       
00252     unsigned int numOfVertices  
00253 )
00254 {
00255     float3 total_force = make_float3( 0.0f, 0.0f, 0.0f );
00256     //float4 HX1 = tex1Dfetch( CudaTexHomeVertexList, vertexNo );
00257     float4 X1 = tex1Dfetch( CudaTexVertexList, vertexNo );
00258     float4 V1 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
00259     //float4 HX2;
00260     float4 X2;
00261     float4 V2 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
00262     float3 force;
00263 
00264     // The recommended way to round a single-precision floating-point operand to an
00265     // integer, with the result being a single-precision floating-point number is rintf(),
00266     // not roundf().
00267 
00268     // Cal force due to connection with the previous vertex
00269     if ( vertexNo > 0 ) {
00270         int prevVertex = vertexNo-1;
00271         //HX2 = tex1Dfetch( CudaTexHomeVertexList, prevVertex );
00272         X2 = tex1Dfetch( CudaTexVertexList, prevVertex );
00273 
00274         //float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z );
00275         //rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z );
00276 
00277         force = Device__CalSpringForce( 
00278             Ks,     
00279             Kd,     
00280             Lrest,  
00281             X1,     
00282             X2,     
00283             V1,     
00284             V2      
00285         );
00286         total_force.x += force.x;
00287         total_force.y += force.y;
00288         total_force.z += force.z;
00289     }
00290 
00291     // Cal force due to connection with the next vertex
00292     if ( vertexNo < numOfVertices - 1 ) {
00293         int nextVertex = vertexNo+1;
00294         //HX2 = tex1Dfetch( CudaTexHomeVertexList, prevVertex );
00295         X2 = tex1Dfetch( CudaTexVertexList, nextVertex );
00296 
00297         //float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z );
00298         //rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z );
00299 
00300         force = Device__CalSpringForce( 
00301             Ks,     
00302             Kd,     
00303             Lrest,  
00304             X1,     
00305             X2,     
00306             V1,     
00307             V2      
00308         );
00309         total_force.x += force.x;
00310         total_force.y += force.y;
00311         total_force.z += force.z;
00312     }
00313 
00314     /*
00315     // Add force from Home Spring
00316     force = Device__CalSpringForce( 
00317         Ks,     //!< spring stiffness
00318         Kd,     //!< spring damper
00319         0.0f,   //!< spring rest length
00320         X1,     //!< position of particle one
00321         HX1,    //!< position of particle two
00322         V1,     //!< velocity of particle one
00323         V2      //!< velocity of particle two
00324     );
00325     total_force.x += force.x;
00326     total_force.y += force.y;
00327     total_force.z += force.z;
00328     */
00329 
00330     return total_force;
00331 }
00332 
00333 // CUDA Kernel -- For ModelStrand AdvSim Function
00334 template <int COMPUTATION_CHOICE>
00335 __global__
00336 void Global__ModelStrand_AdvSim_CU ( 
00337     unsigned int numOfVertices,     
00338     unsigned int numOfThreads,      
00339     float currentTime,          
00340     float timeStep,             
00341     float ptMass,               
00342     float Ks,                   
00343     float Kd,                   
00344     float Lrest,                
00345     float * out_data        
00346 )
00347 {
00348     switch ( COMPUTATION_CHOICE ) {
00349 
00350         //-------------------------------------------------
00351         // Emulate Viscoelastic by mass spring connections
00352         // with TAPs_ADVANCED_SIMULATION
00353         case 101:
00354         {
00355             // Block index
00356             int bx = blockIdx.x;
00357 
00358             // Thread index
00359             int tx = threadIdx.x;
00360 
00361             // Vertex number
00362             int vertexNo = (numOfThreads * bx + tx);
00363             if ( vertexNo >= numOfVertices )    return;
00364 
00365             // Index for output
00366             int idx = (numOfThreads * bx + tx) * 4;
00367 
00368             // Fetch data from texture linear memory
00369             float4 current_pos  = tex1Dfetch( CudaTexVertexList, vertexNo );
00370 
00371             // If the suture vertex is not fixed
00372             if ( current_pos.w == 1 ) {
00373                 float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo );
00374                 float3 force = Device__CalSpringForce_ModelStrand( 
00375                     Ks,             
00376                     Kd,             
00377                     Lrest,          
00378                     vertexNo,       
00379                     numOfVertices   
00380                 );
00381                 // Convert force to acceleration
00382                 float invMass = 1.0f / ptMass;
00383                 float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f );
00384                 float dt_sqrt = timeStep*timeStep;
00385                 float3 new_pos = Device__VerletIntegration( 
00386                     acceleration,   
00387                     current_pos,    
00388                     previous_pos,   
00389                     dt_sqrt,        
00390                     Kd              
00391                 );
00392                 
00393                 // Enforce constraints with TAPs_ADVANCED_SIMULATION
00394                 uint1 simFlags = tex1Dfetch( CudaTexSimFlagsList, vertexNo );
00395                 if ( simFlags.x > 0 ) {
00396                     float4 constraint_pos = tex1Dfetch( CudaTexPosConstraintList, vertexNo );
00397                     new_pos = Device__EnforceConstraints_ADVSIM( new_pos, constraint_pos );
00398 
00399                     //new_pos.x = constraint_pos.x;
00400                     //new_pos.y = constraint_pos.y;
00401                     //new_pos.z = constraint_pos.z;
00402                 }
00403 
00404                 //  WARNING: out data have to be set at the end, otherwise it won't work!!! ???
00405                 out_data[idx  ] = new_pos.x;
00406                 out_data[idx+1] = new_pos.y;
00407                 out_data[idx+2] = new_pos.z;
00408                 out_data[idx+3] = current_pos.w;
00409             }
00410             // If the suture vertex is fixed
00411             else {
00412                 out_data[idx  ] = current_pos.x;
00413                 out_data[idx+1] = current_pos.y;
00414                 out_data[idx+2] = current_pos.z;
00415                 out_data[idx+3] = current_pos.w;
00416             }
00417         }
00418         // Synchronize to make sure the data are loaded
00419         //__syncthreads();
00420         break;
00421 
00422         //-------------------------------------------------
00423         // Emulate Viscoelastic by mass spring connections
00424         case 1:
00425         {
00426             // Block index
00427             int bx = blockIdx.x;
00428             // Thread index
00429             int tx = threadIdx.x;
00430             // Vertex number
00431             int vertexNo = (numOfThreads * bx + tx);
00432             if ( vertexNo >= numOfVertices )    return;
00433             // Index for output
00434             int idx = (numOfThreads * bx + tx) * 4;
00435 
00436             // Fetch data from texture linear memory
00437             float4 current_pos  = tex1Dfetch( CudaTexVertexList, vertexNo );
00438             if ( current_pos.w == 1 ) {
00439                 float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo );
00440                 float3 force = Device__CalSpringForce_ModelStrand( 
00441                     Ks,             
00442                     Kd,             
00443                     Lrest,          
00444                     vertexNo,       
00445                     numOfVertices   
00446                 );
00447                 // Convert force to acceleration
00448                 float invMass = 1.0f / ptMass;
00449                 float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f );
00450                 float dt_sqrt = timeStep*timeStep;
00451                 // Calculate the new position
00452                 float3 new_pos = Device__VerletIntegration( 
00453                     acceleration,   
00454                     current_pos,    
00455                     previous_pos,   
00456                     dt_sqrt,        
00457                     Kd              
00458                 );
00459 
00460                 //  WARNING: out data have to be set at the end, otherwise it won't work!!! ???
00461                 out_data[idx  ] = new_pos.x;
00462                 out_data[idx+1] = new_pos.y;
00463                 out_data[idx+2] = new_pos.z;
00464                 out_data[idx+3] = current_pos.w;
00465             }
00466             else {
00467                 out_data[idx  ] = current_pos.x;
00468                 out_data[idx+1] = current_pos.y;
00469                 out_data[idx+2] = current_pos.z;
00470                 out_data[idx+3] = current_pos.w;
00471             }
00472         }
00473         // Synchronize to make sure the data are loaded
00474         //__syncthreads();
00475         break;
00476     }
00477 }
00478 
00479 
00480 // CUDA Kernel -- For ModelStrand AdvSim Function -- Enforce Constraint
00481 template <int COMPUTATION_CHOICE>
00482 __global__
00483 void Global__ModelStrand_AdvSim_Enforce_Constraint_CU ( 
00484     unsigned int numOfVertices,     
00485     unsigned int numOfThreads,      
00486     float currentTime,          
00487     float timeStep,             
00488     float ptMass,               
00489     float Ks,                   
00490     float Kd,                   
00491     float Lrest,                
00492     unsigned int * out_data_SimFlagsList,   
00493     float * out_data_PosConstraintList      
00494 )
00495 {
00496     switch ( COMPUTATION_CHOICE ) {
00497 
00498         //-------------------------------------------------
00499         // Strand's vertex is slidable
00500         // with TAPs_ADVANCED_SIMULATION
00501         case 101:
00502         {
00503             // Block index
00504             int bx = blockIdx.x;
00505             // Thread index
00506             int tx = threadIdx.x;
00507             // Vertex number
00508             int vertexNo = (numOfThreads * bx + tx);
00509             if ( vertexNo >= numOfVertices )    return;
00510             // Index for output
00511             int idx = vertexNo * 4;
00512 
00513             // Fetch data from texture linear memories
00514             float4 pos_j  = tex1Dfetch( CudaTexVertexList, vertexNo );
00515             uint1 simFlags  = tex1Dfetch( CudaTexSimFlagsList, vertexNo );
00516             float4 posConstraint = tex1Dfetch( CudaTexPosConstraintList, vertexNo );
00517 
00518             bool bDefault = false;
00519 
00520             // If the suture vertex is not fixed
00521             if ( pos_j.w == 1.0f ) {
00522 
00523                 // Values of simulation flags (declared in TAPsEnumList.hpp):
00524                 //   SIMULATION CONSTRAINTS
00525                 //   enum SimConstraints {
00526                 //      CLEARED     = 0,
00527                 //      FIXED       = 1,
00528                 //      ATTACHED    = 1 << 1,
00529                 //      PUNCTURED   = 1 << 2,
00530                 //      SLIDABLE    = 1 << 3,
00531                 //      DUMMY   
00532                 //  };
00533 
00534                 // CLEARED
00535                 if ( simFlags.x == 0 ) {
00536                     bDefault = true;
00537                 }
00538                 // FIXED
00539                 else if ( simFlags.x == 1 ) {
00540                     bDefault = true;
00541                 }
00542                 // ATTACHED
00543                 else if ( simFlags.x == 2 ) {
00544                     bDefault = true;
00545                 }
00546                 // PUNCTURED (n/a)
00547                 //else if ( simFlags.x == 4 ) {
00548                 //}
00549                 // SLIDABLE
00550                 else if ( simFlags.x == 8 ) {
00551                 //else if ( false ) {
00552                     if ( 0 < vertexNo && vertexNo < numOfVertices-2 ) {
00553                         int i = vertexNo-1;
00554                         int k = vertexNo+1;
00555                         float4 pos_i  = tex1Dfetch( CudaTexVertexList, i );
00556                         float4 pos_k  = tex1Dfetch( CudaTexVertexList, k );
00557                         float3 linkA = make_float3( pos_i.x - pos_j.x, pos_i.y - pos_j.y, pos_i.z - pos_j.z );
00558                         float3 linkB = make_float3( pos_k.x - pos_j.x, pos_k.y - pos_j.y, pos_k.z - pos_j.z );
00559                         float linkAlen = sqrt( linkA.x*linkA.x + linkA.y*linkA.y + linkA.z*linkA.z );
00560                         float linkBlen = sqrt( linkB.x*linkB.x + linkB.y*linkB.y + linkB.z*linkB.z );
00561 
00562                         float threshold = 1.025f;
00563                         //float threshold = 2.0f;
00564 
00565                         // Shift up
00566                         if ( linkAlen > linkBlen*threshold ) {
00567                             //uint1 simFlags_up  = tex1Dfetch( CudaTexSimFlagsList, vertexNo+1 );
00568                             // if simFlags of the right vertex is cleared
00569                             //if ( simFlags_up.x == 0 ) {
00570                                 out_data_SimFlagsList[vertexNo] = simFlags.x;
00571                                 out_data_PosConstraintList[idx  ] = posConstraint.x;
00572                                 out_data_PosConstraintList[idx+1] = posConstraint.y;
00573                                 out_data_PosConstraintList[idx+2] = posConstraint.z;
00574                                 out_data_PosConstraintList[idx+3] = posConstraint.w + TAPs_Const_Suture_ShiftUp;    // +20 to signify that the strand vertex must be shifted up
00575                             //}
00576                             //else {
00577                             //  bDefault = true;
00578                             //}
00579                         }
00580                         // Shift down
00581                         else if ( linkBlen > linkAlen*threshold ) {
00582                             //uint1 simFlags_down  = tex1Dfetch( CudaTexSimFlagsList, vertexNo-1 );
00583                             // if simFlags of the left vertex is cleared
00584                             //if ( simFlags_down.x == 0 ) {
00585                                 out_data_SimFlagsList[vertexNo] = simFlags.x;
00586                                 out_data_PosConstraintList[idx  ] = posConstraint.x;
00587                                 out_data_PosConstraintList[idx+1] = posConstraint.y;
00588                                 out_data_PosConstraintList[idx+2] = posConstraint.z;
00589                                 out_data_PosConstraintList[idx+3] = posConstraint.w + TAPs_Const_Suture_ShiftDown;  // +10 to signify that the strand vertex must be shifted down
00590                             //}
00591                             //else {
00592                             //  bDefault = true;
00593                             //}
00594                         }
00595                         else {
00596                             bDefault = true;
00597                         }
00598                     }
00599                     else {
00600                         bDefault = true;
00601                     }
00602                 }
00603                 // DEFAULT CASE
00604                 else {
00605                     bDefault = true;
00606                 }
00607             }
00608 
00609             // If the suture vertex is fixed
00610             else {
00611                 bDefault = true;
00612             }
00613 
00614             if ( bDefault ) {
00615                 out_data_SimFlagsList[vertexNo] = simFlags.x;
00616                 out_data_PosConstraintList[idx  ] = posConstraint.x;
00617                 out_data_PosConstraintList[idx+1] = posConstraint.y;
00618                 out_data_PosConstraintList[idx+2] = posConstraint.z;
00619                 out_data_PosConstraintList[idx+3] = posConstraint.w;
00620             }
00621         }
00622         // Synchronize to make sure the data are loaded
00623         //__syncthreads();
00624         break;
00625     }
00626 }
00627 
00628 
00629 // CUDA Wrapper Function -- For ModelStrand AdvSim Function
00630 void Global__ModelStrand_AdvSim ( 
00631     unsigned int cudaID,        
00632     unsigned int numOfVertices, 
00633     unsigned int numOfThreads,  
00634     float currentTime,          
00635     float timeStep,             
00636     int numOfSubSteps,          
00637     float ptMass,               
00638     float Ks,                   
00639     float Kd,                   
00640     float Lrest,                
00641     float * host_vertex_data,           
00642     float * host_prev_vertex_data       
00643     //float * host_home_vertex_data,        //!< host's home vertex data
00644 )
00645 {
00646     //printf( "FILE:%s LINE:%d Global__ModelStrand_AdvSim\n", __FILE__, __LINE__ );
00647     //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
00648     //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
00649     //printf( "Number of vertices: %d\n", numOfVertices );
00650     //printf( "Number of threads: %d\n", numOfThreads );
00651     //printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads );
00652 
00653     // Allocate device memory for out data
00654     unsigned int size = sizeof(float) * numOfVertices * 4;
00655     float * out_data;
00656     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
00657 
00658     // Copy Vertex List (and Previous Vertex List) from host data to device data, 
00659     // since vertices are dynamically changed.
00660     // While home vertices and connection list are unchanged.
00661     CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
00662     // The Previous Vertex List can be saved in CUDA memory.
00663     // Hence, the Previous Vertex List does not have to be updated.
00664     //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );
00665 
00666     // Kernel invocation
00667     unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
00668     dim3 Dg( numOfThreadBlocks, 1, 1 );
00669     dim3 Db( numOfThreads, 1, 1 );
00670     size_t Ns = 0;
00671     cudaStream_t S = 0;
00672 
00673     // Bind Textures
00674     DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
00675     DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
00676     //DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices );
00677 
00678     //*
00679     int CHOICE = 1;
00680 
00681     // MASS SPRING SYSTEM
00682     if ( CHOICE == 1 ) {
00683         float subTimeStep = timeStep / numOfSubSteps;
00684         for ( int i = 0; i < numOfSubSteps; ++i ) {
00685             // Call the CUDA kernel (Choice 1 -- Mass Spring System)
00686             Global__ModelStrand_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 
00687                 numOfVertices, numOfThreads, 
00688                 currentTime, subTimeStep, 
00689                 ptMass, Ks, Kd, Lrest, 
00690                 out_data 
00691             );
00692             // Swap pointers for Vertex List and Previous Vertex List
00693             // So that the Previous Vertex List is remain unchanged in the CUDA memory.
00694             DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
00695 
00696             // Copy data to the device's memory for (current) Vertex List
00697             CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
00698             currentTime += subTimeStep;
00699         }
00700     }
00701     //*/
00702 
00703     // Unbind Textures
00704     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
00705     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
00706     //DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList();
00707 
00708     // Check if kernel execution generated and error
00709     CUT_CHECK_ERROR( "Kernel execution failed!" );
00710 
00711     // Copy output data to host data Previous Vertex List
00712     // The host will have to swap its pointers (for current and previous vertex list)
00713     if ( CHOICE > 0 ) {
00714         CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
00715     }
00716 
00717     //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data );
00718 
00719     // Free device memory
00720     CUDA_SAFE_CALL( cudaFree( out_data ) );
00721 }
00722 
00723 
00724 // CUDA Wrapper Function -- For ModelStrand AdvSim Function
00725 // with TAPs_ADVANCED_SIMULATION
00726 void Global__ModelStrand_AdvSim_ADVSIM ( 
00727     unsigned int cudaID,        
00728     unsigned int numOfVertices, 
00729     unsigned int numOfThreads,  
00730     float currentTime,          
00731     float timeStep,             
00732     int numOfSubSteps,          
00733     float ptMass,               
00734     float Ks,                   
00735     float Kd,                   
00736     float Lrest,                
00737     float * host_vertex_data,           
00738     float * host_prev_vertex_data,      
00739     unsigned int * host_sim_flags_data, 
00740     float * host_pos_constraint_data    
00741 )
00742 {
00743     printf( "FILE:%s LINE:%d Global__ModelStrand_AdvSim_ADVSIM\n", __FILE__, __LINE__ );
00744     printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
00745     printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
00746     printf( "Number of vertices: %d\n", numOfVertices );
00747     printf( "Number of threads: %d\n", numOfThreads );
00748     printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads );
00749 
00750     // Allocate device memory for out data
00751     unsigned int size = sizeof(float) * numOfVertices * 4;
00752     float * out_data;
00753     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
00754 
00755     // Allocate device memory for out data for changing suture's simulation flags list
00756     unsigned int size_SimFlagsList = sizeof(unsigned int) * numOfVertices;
00757     unsigned int * out_data_SimFlagsList;
00758     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_SimFlagsList, size_SimFlagsList ) );
00759 
00760     // Allocate device memory for out data for changing suture's position constraint list
00761     float * out_data_PosConstraintList;
00762     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_PosConstraintList, size ) );
00763 
00764     // Copy Vertex List (and Previous Vertex List) from host data to device data, 
00765     // since vertices are dynamically changed.
00766     // While home vertices and connection list are unchanged.
00767     CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
00768     // The Previous Vertex List can be saved in CUDA memory.
00769     // Hence, the Previous Vertex List does not have to be updated.
00770     //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );
00771 
00772     CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetSimFlagsList(), host_sim_flags_data, sizeof(unsigned int)*numOfVertices, cudaMemcpyHostToDevice ) );
00773     CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPosConstraintList(), host_pos_constraint_data, size, cudaMemcpyHostToDevice ) );
00774 
00775     // Kernel invocation
00776     unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
00777     dim3 Dg( numOfThreadBlocks, 1, 1 );
00778     dim3 Db( numOfThreads, 1, 1 );
00779     size_t Ns = 0;
00780     cudaStream_t S = 0;
00781 
00782     // Bind Textures
00783     DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
00784     DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
00785     DATA_GlobalPool.DataForVertexList[cudaID]->BindSimFlagsList( numOfVertices );
00786     DATA_GlobalPool.DataForVertexList[cudaID]->BindPosConstraintList( numOfVertices );
00787 
00788     //*
00789     // Add 10 for TAPs_ADVANCED_SIMULATION
00790     int CHOICE = 100 + 1;
00791 
00792     // MASS SPRING SYSTEM
00793     if ( CHOICE == 101 ) {
00794         float subTimeStep = timeStep / numOfSubSteps;
00795         for ( int i = 0; i < numOfSubSteps; ++i ) {
00796             //printf( "delta time: %g\n", timeStep/numOfSubSteps );
00797 
00798             // Call the CUDA kernel (Choice 100+1 -- Mass Spring System)
00799             // with TAPs_ADVANCED_SIMULATION
00800             Global__ModelStrand_AdvSim_CU<101><<< Dg, Db, Ns, S >>>( 
00801                 numOfVertices, numOfThreads, 
00802                 currentTime, subTimeStep, 
00803                 ptMass, Ks, Kd, Lrest, 
00804                 out_data 
00805             );
00806             // Swap pointers for Vertex List and Previous Vertex List
00807             // So that the Previous Vertex List is remain unchanged in the CUDA memory.
00808             DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
00809 
00810             // Copy data to the device's memory for (current) Vertex List
00811             CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
00812             currentTime += subTimeStep;
00813         }
00814 
00815         // Call the CUDA kernel to enforce constraints
00816         // with TAPs_ADVANCED_SIMULATION
00817         Global__ModelStrand_AdvSim_Enforce_Constraint_CU<101><<< Dg, Db, Ns, S >>>( 
00818             numOfVertices, numOfThreads, 
00819             currentTime, subTimeStep, 
00820             ptMass, Ks, Kd, Lrest, 
00821             out_data_SimFlagsList, 
00822             out_data_PosConstraintList 
00823         );
00824     }
00825     //*/
00826 
00827     // Unbind Textures
00828     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
00829     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
00830     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindSimFlagsList();
00831     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPosConstraintList();
00832 
00833     // Check if kernel execution generated and error
00834     CUT_CHECK_ERROR( "Kernel execution failed!" );
00835 
00836     // Copy output data to host data Previous Vertex List
00837     // The host will have to swap its pointers (for current and previous vertex list)
00838     if ( CHOICE > 0 ) {
00839         CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
00840         CUDA_SAFE_CALL( cudaMemcpy( host_sim_flags_data, out_data_SimFlagsList, size_SimFlagsList, cudaMemcpyDeviceToHost ) );
00841         CUDA_SAFE_CALL( cudaMemcpy( host_pos_constraint_data, out_data_PosConstraintList, size, cudaMemcpyDeviceToHost ) );
00842     }
00843 
00844     /*
00845     // DEBUG
00846     {
00847         float * data = (float *)malloc( size );
00848         CUDA_SAFE_CALL( cudaMemcpy( data, out_data, size, cudaMemcpyDeviceToHost ) );
00849         for ( int i = 0, n = 0; i < 2; ++i, n+=4 ) {
00850             printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] );
00851         }
00852         for ( int i = numOfVertices-2, n = i*4; i < numOfVertices; ++i, n+=4 ) {
00853             printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] );
00854         }
00855         free( data );
00856     }
00857     */
00858 
00859     DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, NULL, NULL, host_sim_flags_data, host_pos_constraint_data );
00860 
00861     // Free device memory
00862     CUDA_SAFE_CALL( cudaFree( out_data ) );
00863     CUDA_SAFE_CALL( cudaFree( out_data_SimFlagsList ) );
00864     CUDA_SAFE_CALL( cudaFree( out_data_PosConstraintList ) );
00865 }
00866 
00867 //-----------------------------------------------------------------------------
00868 // For Model Suture
00869 //=============================================================================
00870 
00871 
00872 //=============================================================================
00873 // For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function
00874 //-----------------------------------------------------------------------------
00875 
00876 // CUDA Kernel -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function
00877 template <int COMPUTATION_CHOICE>
00878 __global__
00879 void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU ( 
00880     int numOfVertices,      
00881     int numOfThreads,       
00882     float currentTime,      
00883     float timeStep,         
00884     float * device_data,    
00885     float * out_data        
00886 )
00887 {
00888     // Block index
00889     int bx = blockIdx.x;
00890 
00891     // Thread index
00892     int tx = threadIdx.x;
00893 
00894     // Index
00895     int idx = (numOfThreads * bx + tx) * 3;
00896 
00897     // Use device shared memory to load the vertex position from device global memory
00898     /*__shared__*/ float vertex_pos[3];
00899     //vertex_pos = device_data[idx];
00900     vertex_pos[0] = device_data[idx  ];
00901     vertex_pos[1] = device_data[idx+1];
00902     vertex_pos[2] = device_data[idx+2];
00903 
00904     // Synchronize to make sure the data are loaded
00905     __syncthreads();
00906 
00907     //out_data[idx  ] = vertex_pos[idx  ];
00908     //out_data[idx+1] = vertex_pos[idx+1];
00909     //out_data[idx+2] = vertex_pos[idx+2];
00910     //out_data[idx  ] = 0.0f;
00911     //out_data[idx+1] = 0.0f;
00912     //out_data[idx+2] = 0.0f;
00913 
00914     float new_pos[3];
00915     //new_pos = vertex_pos + 0.0001f;
00916     new_pos[0] = vertex_pos[0] + 0.0001f;
00917     new_pos[1] = vertex_pos[1];
00918     new_pos[2] = vertex_pos[2];
00919 
00920     //__syncthreads();
00921 
00922     //out_data[idx] = new_pos;
00923     out_data[idx  ] = new_pos[0];
00924     out_data[idx+1] = new_pos[1];
00925     out_data[idx+2] = new_pos[2];
00926 
00927     //out_data[idx  ] = 0.0f;
00928     //out_data[idx+1] = 0.0f;
00929     //out_data[idx+2] = 0.0f;
00930 
00931     //device_data[idx  ] = device_data[idx  ] + 0.0001f;
00932     //device_data[idx+1] = device_data[idx+1];
00933     //device_data[idx+2] = device_data[idx+2];
00934 
00935 }
00936 
00937 
00938 // CUDA Wrapper Function -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function
00939 void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim ( 
00940     int numOfVertices,      
00941     int numOfThreads,       
00942     float currentTime,      
00943     float timeStep,         
00944     float * host_data       
00945 )
00946 {
00947     /*
00948     // CUDA device properties
00949     int dev_no;
00950     CUDA_SAFE_CALL( cudaGetDevice( &dev_no ) );
00951     cudaDeviceProp * prop;
00952     CUDA_SAFE_CALL( cudaGetDeviceProperties( prop, dev_no ) );
00953     //printf( "maxThreadsPerBlock: %d\n", prop->maxThreadsPerBlock );
00954     */
00955 
00956     // Kernel invocation
00957     unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
00958     dim3 Dg( numOfThreadBlocks, 1, 1 );
00959     dim3 Db( numOfThreads, 1, 1 );
00960     size_t Ns = 0;
00961     cudaStream_t S = 0;
00962 
00963     printf( "Number of vertices: %d\n", numOfVertices );
00964     printf( "Number of threads: %d\n", numOfThreads );
00965     printf( "Number of grids: %d\n", numOfVertices/numOfThreads );
00966 
00967     // Allocate device memory
00968     unsigned int size = sizeof(float) * numOfVertices * 3;
00969     float * device_data;
00970     CUDA_SAFE_CALL( cudaMalloc( (void **)&device_data, size ) );
00971     float * out_data;
00972     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
00973 
00974     // Copy host data to device data
00975     CUDA_SAFE_CALL( cudaMemcpy( device_data, host_data, size, cudaMemcpyHostToDevice ) );
00976 
00977     // Call the CUDA kernel
00978     Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU<1><<< Dg, Db, Ns, S >>>
00979     ( numOfVertices, numOfThreads, currentTime, timeStep, device_data, out_data );
00980 
00981     // Check if kernel execution generated and error
00982     CUT_CHECK_ERROR( "Kernel execution failed" );
00983 
00984     // Copy output data to host data
00985     CUDA_SAFE_CALL( cudaMemcpy( host_data, out_data, size, cudaMemcpyDeviceToHost ) );
00986 
00987 
00988     // Free device memory
00989     CUDA_SAFE_CALL( cudaFree( device_data ) );
00990     CUDA_SAFE_CALL( cudaFree( out_data ) );
00991 }
00992 
00993 //-----------------------------------------------------------------------------
00994 // For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function
00995 //=============================================================================
00996 
00997 
00998 //=============================================================================
00999 // For HETriMeshOneModelMultiParts AdvSim Function
01000 //-----------------------------------------------------------------------------
01001 
01002 // CUDA Kernel -- For HETriMeshOneModelMultiParts AdvSim Function
01003 __device__
01004 float3 Device__CalSpringForce_VertexList ( 
01005     float Ks,       
01006     float Kd,       
01007     float HKs,      
01008     float HKd,      
01009     int vertexNo,   
01010     unsigned int max_connection_size    
01011 )
01012 {
01013     float3 total_force = make_float3( 0.0f, 0.0f, 0.0f );
01014     float4 HX1 = tex1Dfetch( CudaTexHomeVertexList, vertexNo );
01015     float4 X1 = tex1Dfetch( CudaTexVertexList, vertexNo );
01016     float4 V1 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
01017     float4 HX2;
01018     float4 X2;
01019     float4 V2 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
01020     float3 force;
01021     float rest;
01022     int1 connectedVertexNo;
01023     int idx = vertexNo * max_connection_size;
01024 
01025     // The recommended way to round a single-precision floating-point operand to an
01026     // integer, with the result being a single-precision floating-point number is rintf(),
01027     // not roundf().
01028 
01029     //*
01030     for ( int i = 0; i < max_connection_size; ++i, ++idx ) {
01031         connectedVertexNo = tex1Dfetch( CudaTexVertexConnectionList, idx );
01032         if ( connectedVertexNo.x >= 0 ) {
01033             HX2 = tex1Dfetch( CudaTexHomeVertexList, connectedVertexNo.x );
01034             X2 = tex1Dfetch( CudaTexVertexList, connectedVertexNo.x );
01035 
01036             float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z );
01037             rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z );
01038 
01039             force = Device__CalSpringForce( 
01040                 Ks,     
01041                 Kd,     
01042                 rest,   
01043                 X1,     
01044                 X2,     
01045                 V1,     
01046                 V2      
01047             );
01048             total_force.x += force.x;
01049             total_force.y += force.y;
01050             total_force.z += force.z;
01051         }
01052     }
01053     //*/
01054 
01055     //*
01056     // Add force from Home Spring
01057     force = Device__CalSpringForce( 
01058         HKs,    
01059         HKd,    
01060         0.0f,   
01061         X1,     
01062         HX1,    
01063         V1,     
01064         V2      
01065     );
01066     total_force.x += force.x;
01067     total_force.y += force.y;
01068     total_force.z += force.z;
01069     //*/
01070 
01071     return total_force;
01072 }
01073 
01074 
01075 // CUDA Kernel -- For HETriMeshOneModelMultiParts AdvSim Function
01076 template <int COMPUTATION_CHOICE>
01077 __global__
01078 void Global__HETriMeshOneModelMultiParts_AdvSim_CU ( 
01079     unsigned int numOfVertices,     
01080     unsigned int numOfThreads,      
01081     float currentTime,          
01082     float timeStep,             
01083     float ptMass,               
01084     float Ks,                   
01085     float Kd,                   
01086     float HKs,                  
01087     float HKd,                  
01088     unsigned int max_connection_size,   
01089     float * out_data        
01090 )
01091 {
01092 
01093     switch ( COMPUTATION_CHOICE ) {
01094 
01095         // Emulate Viscoelastic by home vertex positions
01096         case 1:
01097         {
01098             // Block index
01099             int bx = blockIdx.x;
01100 
01101             // Thread index
01102             int tx = threadIdx.x;
01103 
01104             // Indices
01105             int idx = (numOfThreads * bx + tx) * 4;
01106             int vertexNo = (numOfThreads * bx + tx);
01107 
01108             // Fetch data from texture linear memory
01109             float4 homeVertexPos = tex1Dfetch( CudaTexHomeVertexList, vertexNo );
01110             float4 vertex_pos = tex1Dfetch( CudaTexVertexList, vertexNo );
01111 
01112             float3 len;
01113             len.x = homeVertexPos.x - vertex_pos.x;
01114             len.y = homeVertexPos.y - vertex_pos.y;
01115             len.z = homeVertexPos.z - vertex_pos.z;
01116 
01117             len.x *= timeStep;
01118             len.y *= timeStep;
01119             len.z *= timeStep;
01120 
01121             float3 new_pos;
01122             new_pos.x = vertex_pos.x + len.x;
01123             new_pos.y = vertex_pos.y + len.y;
01124             new_pos.z = vertex_pos.z + len.z;
01125 
01126             //out_data[idx] = new_pos;
01127             out_data[idx  ] = new_pos.x;
01128             out_data[idx+1] = new_pos.y;
01129             out_data[idx+2] = new_pos.z;
01130             out_data[idx+3] = vertex_pos.w;
01131         }
01132         break;
01133 
01134         // Emulate Viscoelastic by mass spring connections
01135         case 2:
01136         {
01137             // Block index
01138             int bx = blockIdx.x;
01139 
01140             // Thread index
01141             int tx = threadIdx.x;
01142 
01143             // Vertex number
01144             int vertexNo = (numOfThreads * bx + tx);
01145             if ( vertexNo >= numOfVertices ) return;
01146 
01147             // Indix for output
01148             int idx = (numOfThreads * bx + tx) * 4;
01149 
01150             // Fetch data from texture linear memory
01151             float4 current_pos  = tex1Dfetch( CudaTexVertexList, vertexNo );
01152             float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo );
01153 
01154             // Synchronize to make sure the data are loaded
01155             //__syncthreads();
01156 
01157             float3 force = Device__CalSpringForce_VertexList( 
01158                 Ks,                 
01159                 Kd,                 
01160                 HKs,                
01161                 HKd,                
01162                 vertexNo,           
01163                 max_connection_size 
01164             );
01165 
01166             // Convert force to acceleration
01167             float invMass = 1.0f / ptMass;
01168             float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f );
01169             float dt_sqrt = timeStep*timeStep;
01170 
01171             float3 new_pos = Device__VerletIntegration( 
01172                 acceleration,   
01173                 current_pos,    
01174                 previous_pos,   
01175                 dt_sqrt,        
01176                 Kd              
01177             );
01178 
01179             //  WARNING: out data have to be set at the end, otherwise it won't work!!! ???
01180             out_data[idx  ] = new_pos.x;
01181             out_data[idx+1] = new_pos.y;
01182             out_data[idx+2] = new_pos.z;
01183             out_data[idx+3] = current_pos.w;
01184         }
01185         break;
01186     }
01187 }
01188 
01189 
01190 // CUDA Wrapper Function -- For HETriMeshOneModelMultiParts AdvSim Function
01191 void Global__HETriMeshOneModelMultiParts_AdvSim ( 
01192     unsigned int cudaID,        
01193     unsigned int numOfVertices, 
01194     unsigned int numOfThreads,  
01195     float currentTime,          
01196     float timeStep,             
01197     int numOfSubSteps,          
01198     float ptMass,               
01199     float Ks,                   
01200     float Kd,                   
01201     float HKs,                  
01202     float HKd,                  
01203     float * host_vertex_data,           
01204     float * host_prev_vertex_data,      
01205     float * host_home_vertex_data,      
01206     int *   host_connection_list,       
01207     unsigned int max_connection_size    
01208 )
01209 {
01210     //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
01211     //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
01212 
01213     //printf( "Number of vertices: %d\n", numOfVertices );
01214     //printf( "Number of threads: %d\n", numOfThreads );
01215     //printf( "Number of grids: %d\n", numOfVertices/numOfThreads );
01216 
01217     // Allocate device memory for out data
01218     unsigned int size = sizeof(float) * numOfVertices * 4;
01219     float * out_data;
01220     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
01221 
01222     // Copy Vertex List (and Previous Vertex List) from host data to device data, 
01223     // since vertices are dynamically changed.
01224     // While home vertices and connection list are unchanged.
01225     CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
01226     // The Previous Vertex List can be saved in CUDA memory.
01227     // Hence, the Previous Vertex List does not have to be updated.
01228     //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );
01229 
01230     // Kernel invocation
01231     unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
01232     dim3 Dg( numOfThreadBlocks, 1, 1 );
01233     dim3 Db( numOfThreads, 1, 1 );
01234     size_t Ns = 0;
01235     cudaStream_t S = 0;
01236 
01237     // Bind Textures
01238     DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
01239     DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
01240     DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices );
01241     DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexConnectionList( numOfVertices, max_connection_size );
01242 
01243     int CHOICE = 2;
01244 
01245     // HOME VERTEX
01246     if ( CHOICE == 1 ) {
01247         // Call the CUDA kernel (Choice 1 -- Home Vertex Position Only)
01248         Global__HETriMeshOneModelMultiParts_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 
01249             numOfVertices, numOfThreads, 
01250             currentTime, timeStep, 
01251             ptMass, Ks, Kd, HKs, HKd, 
01252             max_connection_size, 
01253             out_data 
01254         );
01255         // Swap pointers for Vertex List and Previous Vertex List
01256         // So that the Previous Vertex List is remain unchanged in the CUDA memory.
01257         DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
01258     }
01259 
01260     // MASS SPRING SYSTEM
01261     else if ( CHOICE == 2 ) {
01262         float subTimeStep = timeStep / numOfSubSteps;
01263         for ( int i = 0; i < numOfSubSteps; ++i ) {
01264             //printf( "delta time: %g\n", timeStep/numOfSubSteps );
01265 
01266             // Call the CUDA kernel (Choice 2 -- Mass Spring System)
01267             Global__HETriMeshOneModelMultiParts_AdvSim_CU<2><<< Dg, Db, Ns, S >>>( 
01268                 numOfVertices, numOfThreads, 
01269                 currentTime, subTimeStep, 
01270                 ptMass, Ks, Kd, HKs, HKd, 
01271                 max_connection_size, 
01272                 out_data 
01273             );
01274             // Swap pointers for Vertex List and Previous Vertex List
01275             // So that the Previous Vertex List is remain unchanged in the CUDA memory.
01276             DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
01277 
01278             // Copy data to the device's memory for (current) Vertex List
01279             CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
01280             currentTime += subTimeStep;
01281         }
01282     }
01283 
01284     // Unbind Textures
01285     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
01286     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
01287     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList();
01288     DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexConnectionList();
01289 
01290     // Check if kernel execution generated and error
01291     CUT_CHECK_ERROR( "Kernel execution failed!" );
01292 
01293     // Copy output data to host data Previous Vertex List
01294     // The host will have to swap its pointers (for current and previous vertex list)
01295     if ( CHOICE > 0 ) {
01296         CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
01297     }
01298 
01299     //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, host_prev_vertex_data, host_home_vertex_data );
01300     //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug_ConnectionList( numOfVertices, max_connection_size, host_connection_list );
01301 
01302     // Free device memory
01303     CUDA_SAFE_CALL( cudaFree( out_data ) );
01304 }
01305 
01306 //-----------------------------------------------------------------------------
01307 // For HETriMeshOneModelMultiParts AdvSim Function
01308 //=============================================================================
01309 
01310 //-----------------------------------------------------------------------------
01311 //=============================================================================
01312 END_NAMESPACE_TAPs__CUDA
01313 //-----------------------------------------------------------------------------
01314 //34567890123456789012345678901234567890123456789012345678901234567890123456789
01315 //--+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines