TAPsCUDA.cu File Reference

#include "../Support/TAPsListOfAdvSimConsts.hpp"
#include <cstdlib>
#include <cstdio>
#include <cutil.h>
#include "TAPsCUDA_Def.cu"

Include dependency graph for TAPsCUDA.cu:

Go to the source code of this file.

Classes

class  TAPs_CUDA_CLASS_DATA_Pool
class  TAPs_CUDA_CLASS_DATA_Vertex_List

Defines

#define TAPs_CUDA_DATA_VERTEX_LIST_TEXTURE_FROM_LIN_MEM

Functions

bool AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List (TAPs_CUDA_CLASS_DATA_Vertex_List *dataObj)
 Add Vertex List Data Object to the CUDA Global Data Pool.
template<typename T>
__device__ void TAPsCUDA__device__F1 ()
template<typename T>
__global__ void TAPsCUDA__global__F1 ()
void TAPsCUDA__global__HETriMeshOneModelMultiParts_AdvSim (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float HKs, float HKd, float *host_vertex_data, float *host_prev_vertex_data, float *host_home_vertex_data, int *host_connection_list, unsigned int max_connection_size)
 For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function.
void TAPsCUDA__global__ModelStrand_AdvSim (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float Lrest, float *host_vertex_data, float *host_prev_vertex_data)
 For Suture Model (ModelStrand & ModelSuture).
void TAPsCUDA__global__ModelStrand_AdvSim_ADVSIM (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float Lrest, float *host_vertex_data, float *host_prev_vertex_data, unsigned int *host_sim_flags_data, float *host_pos_constraint_data)
void TAPsCUDA__global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim (int numOfVertices, int numOfThreads, float tCurrent, float tNext, float *host_data)
 For Suture Model (ModelStrand & ModelSuture).
template<typename T>
__host__ __device__ void TAPsCUDA__host__device__F1 ()
void TAPsCUDA__host__Init (int argc, char **argv)
 Initialize CUDA Device.
bool TAPsCUDA__InitailizeDataForSutureModel (unsigned int &cudaID, unsigned int numOfVertices, float *vertexList, float *prevVertexList)
 Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture).
bool TAPsCUDA__InitailizeDataForSutureModel_ADVSIM (unsigned int &cudaID, unsigned int numOfVertices, float *vertexList, float *prevVertexList, unsigned int *simFlagsList, float *posConstraintList)
 Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture).
bool TAPsCUDA__InitailizeDataForVertexList (unsigned int &cudaID, unsigned int numOfVertices, unsigned int max_connection_size, float *vertexList, float *prevVertexList, float *homeVertexList, int *vertexConnectionList)
 Initialize CUDA Data for a Mesh Model by Using Vertex List.

Variables

texture< float4,
1, cudaReadModeElementType > 
CudaTexHomeVertexList
 texture for list of home vertices
texture< float4,
1, cudaReadModeElementType > 
CudaTexPosConstraintList
 texture for list of position constraints (xyz) plus force ratio (w)
texture< float4,
1, cudaReadModeElementType > 
CudaTexPrevVertexList
 texture for list of previous vertices
texture< uint1,
1, cudaReadModeElementType > 
CudaTexSimFlagsList
 texture for list of simulation flags
texture< int1,
1, cudaReadModeElementType > 
CudaTexVertexConnectionList
 texture for list of vertex connections (x:=vertex#, y:=rest length, z:=stiffness, w:=damper)
texture< float4,
1, cudaReadModeElementType > 
CudaTexVertexList
 For HETriMeshOneModelMultiParts AdvSim Function.
TAPs_CUDA_CLASS_DATA_Pool TAPs_CUDA_DATA_GlobalPool
 CUDA Global Data Pool.


Define Documentation

#define TAPs_CUDA_DATA_VERTEX_LIST_TEXTURE_FROM_LIN_MEM

Definition at line 243 of file TAPsCUDA.cu.


Function Documentation

bool AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List ( TAPs_CUDA_CLASS_DATA_Vertex_List dataObj  ) 

Add Vertex List Data Object to the CUDA Global Data Pool.

Definition at line 150 of file TAPsCUDA_Def.cu.

00151 {
00152     //printf( "AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List\n" );
00153     //fflush( stdout );
00154 
00155     unsigned int current_size = TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool;
00156     if ( current_size == 0 ) {
00157         // SHOULD NEVER REACHED THIS!!!
00158         //TAPs_CUDA_DATA_GlobalPool.DataForVertexList = (TAPs_CUDA_CLASS_DATA_Vertex_List *)malloc( ++TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool );
00159         TAPs_CUDA_DATA_GlobalPool.DataForVertexList = new TAPs_CUDA_CLASS_DATA_Vertex_List * [ ++TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool ];
00160     }
00161     else {
00162         TAPs_CUDA_CLASS_DATA_Vertex_List ** old_list = TAPs_CUDA_DATA_GlobalPool.DataForVertexList;
00163         //TAPs_CUDA_DATA_GlobalPool.DataForVertexList = (TAPs_CUDA_CLASS_DATA_Vertex_List *)malloc( ++TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool );
00164         TAPs_CUDA_DATA_GlobalPool.DataForVertexList = new TAPs_CUDA_CLASS_DATA_Vertex_List * [ ++TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool ];
00165         for ( int i = 0; i < current_size; ++i ) {
00166             TAPs_CUDA_DATA_GlobalPool.DataForVertexList[ i ] = old_list[ i ];
00167         }
00168         //free ( old_list );
00169         delete [] old_list;
00170     }
00171     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[ current_size ] = dataObj;
00172     return true;
00173 }

template<typename T>
__device__ void TAPsCUDA__device__F1 (  )  [inline]

__device__F1 __device__ means a function that is executed on the device and callable from the device only.

Definition at line 116 of file TAPsCUDA.cu.

00116 {}

template<typename T>
__global__ void TAPsCUDA__global__F1 (  )  [inline]

__global__F1 __global__ means a function as being a kernel, which is executed on the device and callable from the host only.

Definition at line 110 of file TAPsCUDA.cu.

00110 {}

void TAPsCUDA__global__HETriMeshOneModelMultiParts_AdvSim ( unsigned int  cudaID,
unsigned int  numOfVertices,
unsigned int  numOfThreads,
float  currentTime,
float  timeStep,
int  numOfSubSteps,
float  ptMass,
float  Ks,
float  Kd,
float  HKs,
float  HKd,
float *  host_vertex_data,
float *  host_prev_vertex_data,
float *  host_home_vertex_data,
int *  host_connection_list,
unsigned int  max_connection_size 
)

For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function.

For HETriMeshOneModelMultiParts AdvSim Function CUDA Wrapper Function -- For HETriMeshOneModelMultiParts AdvSim Function

Parameters:
cudaID  CUDA ID
numOfVertices  number of vertices
numOfThreads  number of threads per CUDA thread block
currentTime  current time
timeStep  time step
numOfSubSteps  number of sub-steps
ptMass  point mass
Ks  spring stiffness
Kd  spring damper
HKs  home spring stiffness
HKd  home spring damper
host_vertex_data  host's vertex data
host_prev_vertex_data  host's previous vertex data
host_home_vertex_data  host's home vertex data
host_connection_list  host's connection index list
max_connection_size  maximum connection size

Definition at line 1217 of file TAPsCUDA_Def.cu.

01235 {
01236     //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
01237     //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
01238 
01239     //printf( "Number of vertices: %d\n", numOfVertices );
01240     //printf( "Number of threads: %d\n", numOfThreads );
01241     //printf( "Number of grids: %d\n", numOfVertices/numOfThreads );
01242 
01243     // Allocate device memory for out data
01244     unsigned int size = sizeof(float) * numOfVertices * 4;
01245     float * out_data;
01246     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
01247 
01248     // Copy Vertex List (and Previous Vertex List) from host data to device data, 
01249     // since vertices are dynamically changed.
01250     // While home vertices and connection list are unchanged.
01251     CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
01252     // The Previous Vertex List can be saved in CUDA memory.
01253     // Hence, the Previous Vertex List does not have to be updated.
01254     //CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );
01255 
01256     // Kernel invocation
01257     unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
01258     dim3 Dg( numOfThreadBlocks, 1, 1 );
01259     dim3 Db( numOfThreads, 1, 1 );
01260     size_t Ns = 0;
01261     cudaStream_t S = 0;
01262 
01263     // Bind Textures
01264     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
01265     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
01266     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices );
01267     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexConnectionList( numOfVertices, max_connection_size );
01268 
01269     int CHOICE = 2;
01270 
01271     // HOME VERTEX
01272     if ( CHOICE == 1 ) {
01273         // Call the CUDA kernel (Choice 1 -- Home Vertex Position Only)
01274         TAPsCUDA__global__HETriMeshOneModelMultiParts_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 
01275             numOfVertices, numOfThreads, 
01276             currentTime, timeStep, 
01277             ptMass, Ks, Kd, HKs, HKd, 
01278             max_connection_size, 
01279             out_data 
01280         );
01281         // Swap pointers for Vertex List and Previous Vertex List
01282         // So that the Previous Vertex List is remain unchanged in the CUDA memory.
01283         TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
01284     }
01285 
01286     // MASS SPRING SYSTEM
01287     else if ( CHOICE == 2 ) {
01288         float subTimeStep = timeStep / numOfSubSteps;
01289         for ( int i = 0; i < numOfSubSteps; ++i ) {
01290             //printf( "delta time: %g\n", timeStep/numOfSubSteps );
01291 
01292             // Call the CUDA kernel (Choice 2 -- Mass Spring System)
01293             TAPsCUDA__global__HETriMeshOneModelMultiParts_AdvSim_CU<2><<< Dg, Db, Ns, S >>>( 
01294                 numOfVertices, numOfThreads, 
01295                 currentTime, subTimeStep, 
01296                 ptMass, Ks, Kd, HKs, HKd, 
01297                 max_connection_size, 
01298                 out_data 
01299             );
01300             // Swap pointers for Vertex List and Previous Vertex List
01301             // So that the Previous Vertex List is remain unchanged in the CUDA memory.
01302             TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
01303 
01304             // Copy data to the device's memory for (current) Vertex List
01305             CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
01306             currentTime += subTimeStep;
01307         }
01308     }
01309 
01310     // Unbind Textures
01311     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
01312     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
01313     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList();
01314     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexConnectionList();
01315 
01316     // Check if kernel execution generated and error
01317     CUT_CHECK_ERROR( "Kernel execution failed!" );
01318 
01319     // Copy output data to host data Previous Vertex List
01320     // The host will have to swap its pointers (for current and previous vertex list)
01321     if ( CHOICE > 0 ) {
01322         CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
01323     }
01324 
01325     //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, host_prev_vertex_data, host_home_vertex_data );
01326     //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug_ConnectionList( numOfVertices, max_connection_size, host_connection_list );
01327 
01328     // Free device memory
01329     CUDA_SAFE_CALL( cudaFree( out_data ) );
01330 }

void TAPsCUDA__global__ModelStrand_AdvSim ( unsigned int  cudaID,
unsigned int  numOfVertices,
unsigned int  numOfThreads,
float  currentTime,
float  timeStep,
int  numOfSubSteps,
float  ptMass,
float  Ks,
float  Kd,
float  Lrest,
float *  host_vertex_data,
float *  host_prev_vertex_data 
)

For Suture Model (ModelStrand & ModelSuture).

CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function

Parameters:
cudaID  CUDA ID
numOfVertices  number of vertices
numOfThreads  number of threads per CUDA thread block
currentTime  current time
timeStep  time step
numOfSubSteps  number of sub-steps
ptMass  point mass
Ks  spring stiffness
Kd  spring damper
Lrest  spring rest length
host_vertex_data  host's vertex data
host_prev_vertex_data  host's previous vertex data

Definition at line 652 of file TAPsCUDA_Def.cu.

00667 {
00668     //printf( "FILE:%s LINE:%d TAPsCUDA__global__ModelStrand_AdvSim\n", __FILE__, __LINE__ );
00669     //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
00670     //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
00671     //printf( "Number of vertices: %d\n", numOfVertices );
00672     //printf( "Number of threads: %d\n", numOfThreads );
00673     //printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads );
00674 
00675     // Allocate device memory for out data
00676     unsigned int size = sizeof(float) * numOfVertices * 4;
00677     float * out_data;
00678     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
00679 
00680     // Copy Vertex List (and Previous Vertex List) from host data to device data, 
00681     // since vertices are dynamically changed.
00682     // While home vertices and connection list are unchanged.
00683     CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
00684     // The Previous Vertex List can be saved in CUDA memory.
00685     // Hence, the Previous Vertex List does not have to be updated.
00686     //CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );
00687 
00688     // Kernel invocation
00689     unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
00690     dim3 Dg( numOfThreadBlocks, 1, 1 );
00691     dim3 Db( numOfThreads, 1, 1 );
00692     size_t Ns = 0;
00693     cudaStream_t S = 0;
00694 
00695     // Bind Textures
00696     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
00697     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
00698     //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices );
00699 
00700     //*
00701     int CHOICE = 1;
00702 
00703     // MASS SPRING SYSTEM
00704     if ( CHOICE == 1 ) {
00705         float subTimeStep = timeStep / numOfSubSteps;
00706         for ( int i = 0; i < numOfSubSteps; ++i ) {
00707             // Call the CUDA kernel (Choice 1 -- Mass Spring System)
00708             TAPsCUDA__global__ModelStrand_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 
00709                 numOfVertices, numOfThreads, 
00710                 currentTime, subTimeStep, 
00711                 ptMass, Ks, Kd, Lrest, 
00712                 out_data 
00713             );
00714             // Swap pointers for Vertex List and Previous Vertex List
00715             // So that the Previous Vertex List is remain unchanged in the CUDA memory.
00716             TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
00717 
00718             // Copy data to the device's memory for (current) Vertex List
00719             CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
00720             currentTime += subTimeStep;
00721         }
00722     }
00723     //*/
00724 
00725     // Unbind Textures
00726     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
00727     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
00728     //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList();
00729 
00730     // Check if kernel execution generated and error
00731     CUT_CHECK_ERROR( "Kernel execution failed!" );
00732 
00733     // Copy output data to host data Previous Vertex List
00734     // The host will have to swap its pointers (for current and previous vertex list)
00735     if ( CHOICE > 0 ) {
00736         CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
00737     }
00738 
00739     //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data );
00740 
00741     // Free device memory
00742     CUDA_SAFE_CALL( cudaFree( out_data ) );
00743 }

void TAPsCUDA__global__ModelStrand_AdvSim_ADVSIM ( unsigned int  cudaID,
unsigned int  numOfVertices,
unsigned int  numOfThreads,
float  currentTime,
float  timeStep,
int  numOfSubSteps,
float  ptMass,
float  Ks,
float  Kd,
float  Lrest,
float *  host_vertex_data,
float *  host_prev_vertex_data,
unsigned int *  host_sim_flags_data,
float *  host_pos_constraint_data 
)

CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function with TAPs_ADVANCED_SIMULATION

CUDA Wrapper Function -- For ModelStrand AdvSim Function with TAPs_ADVANCED_SIMULATION

Parameters:
cudaID  CUDA ID
numOfVertices  number of vertices
numOfThreads  number of threads per CUDA thread block
currentTime  current time
timeStep  time step
numOfSubSteps  number of sub-steps
ptMass  point mass
Ks  spring stiffness
Kd  spring damper
Lrest  spring rest length
host_vertex_data  host's vertex data
host_prev_vertex_data  host's previous vertex data
host_sim_flags_data  host's simulation flags data
host_pos_constraint_data  host's position constraint data

Definition at line 748 of file TAPsCUDA_Def.cu.

00764 {
00765     printf( "FILE:%s LINE:%d TAPsCUDA__global__ModelStrand_AdvSim_ADVSIM\n", __FILE__, __LINE__ );
00766     printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
00767     printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
00768     printf( "Number of vertices: %d\n", numOfVertices );
00769     printf( "Number of threads: %d\n", numOfThreads );
00770     printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads );
00771 
00772     // Allocate device memory for out data
00773     unsigned int size = sizeof(float) * numOfVertices * 4;
00774     float * out_data;
00775     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
00776 
00777     // Allocate device memory for out data for changing suture's simulation flags list
00778     unsigned int size_SimFlagsList = sizeof(unsigned int) * numOfVertices;
00779     unsigned int * out_data_SimFlagsList;
00780     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_SimFlagsList, size_SimFlagsList ) );
00781 
00782     // Allocate device memory for out data for changing suture's position constraint list
00783     float * out_data_PosConstraintList;
00784     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_PosConstraintList, size ) );
00785 
00786     // Copy Vertex List (and Previous Vertex List) from host data to device data, 
00787     // since vertices are dynamically changed.
00788     // While home vertices and connection list are unchanged.
00789     CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
00790     // The Previous Vertex List can be saved in CUDA memory.
00791     // Hence, the Previous Vertex List does not have to be updated.
00792     //CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );
00793 
00794     CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetSimFlagsList(), host_sim_flags_data, sizeof(unsigned int)*numOfVertices, cudaMemcpyHostToDevice ) );
00795     CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetPosConstraintList(), host_pos_constraint_data, size, cudaMemcpyHostToDevice ) );
00796 
00797     // Kernel invocation
00798     unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
00799     dim3 Dg( numOfThreadBlocks, 1, 1 );
00800     dim3 Db( numOfThreads, 1, 1 );
00801     size_t Ns = 0;
00802     cudaStream_t S = 0;
00803 
00804     // Bind Textures
00805     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
00806     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
00807     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindSimFlagsList( numOfVertices );
00808     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindPosConstraintList( numOfVertices );
00809 
00810     //*
00811     // Add 10 for TAPs_ADVANCED_SIMULATION
00812     int CHOICE = 100 + 1;
00813 
00814     // MASS SPRING SYSTEM
00815     if ( CHOICE == 101 ) {
00816         float subTimeStep = timeStep / numOfSubSteps;
00817         for ( int i = 0; i < numOfSubSteps; ++i ) {
00818             //printf( "delta time: %g\n", timeStep/numOfSubSteps );
00819 
00820             // Call the CUDA kernel (Choice 100+1 -- Mass Spring System)
00821             // with TAPs_ADVANCED_SIMULATION
00822             TAPsCUDA__global__ModelStrand_AdvSim_CU<101><<< Dg, Db, Ns, S >>>( 
00823                 numOfVertices, numOfThreads, 
00824                 currentTime, subTimeStep, 
00825                 ptMass, Ks, Kd, Lrest, 
00826                 out_data 
00827             );
00828             // Swap pointers for Vertex List and Previous Vertex List
00829             // So that the Previous Vertex List is remain unchanged in the CUDA memory.
00830             TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
00831 
00832             // Copy data to the device's memory for (current) Vertex List
00833             CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
00834             currentTime += subTimeStep;
00835         }
00836 
00837         // Call the CUDA kernel to enforce constraints
00838         // with TAPs_ADVANCED_SIMULATION
00839         TAPsCUDA__global__ModelStrand_AdvSim_Enforce_Constraint_CU<101><<< Dg, Db, Ns, S >>>( 
00840             numOfVertices, numOfThreads, 
00841             currentTime, subTimeStep, 
00842             ptMass, Ks, Kd, Lrest, 
00843             out_data_SimFlagsList, 
00844             out_data_PosConstraintList 
00845         );
00846     }
00847     //*/
00848 
00849     // Unbind Textures
00850     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
00851     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
00852     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindSimFlagsList();
00853     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPosConstraintList();
00854 
00855     // Check if kernel execution generated and error
00856     CUT_CHECK_ERROR( "Kernel execution failed!" );
00857 
00858     // Copy output data to host data Previous Vertex List
00859     // The host will have to swap its pointers (for current and previous vertex list)
00860     if ( CHOICE > 0 ) {
00861         CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
00862         CUDA_SAFE_CALL( cudaMemcpy( host_sim_flags_data, out_data_SimFlagsList, size_SimFlagsList, cudaMemcpyDeviceToHost ) );
00863         CUDA_SAFE_CALL( cudaMemcpy( host_pos_constraint_data, out_data_PosConstraintList, size, cudaMemcpyDeviceToHost ) );
00864     }
00865 
00866     /*
00867     // DEBUG
00868     {
00869         float * data = (float *)malloc( size );
00870         CUDA_SAFE_CALL( cudaMemcpy( data, out_data, size, cudaMemcpyDeviceToHost ) );
00871         for ( int i = 0, n = 0; i < 2; ++i, n+=4 ) {
00872             printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] );
00873         }
00874         for ( int i = numOfVertices-2, n = i*4; i < numOfVertices; ++i, n+=4 ) {
00875             printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] );
00876         }
00877         free( data );
00878     }
00879     //*/
00880 
00881     TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, NULL, NULL, host_sim_flags_data, host_pos_constraint_data );
00882 
00883     // Free device memory
00884     CUDA_SAFE_CALL( cudaFree( out_data ) );
00885     CUDA_SAFE_CALL( cudaFree( out_data_SimFlagsList ) );
00886     CUDA_SAFE_CALL( cudaFree( out_data_PosConstraintList ) );
00887 }

void TAPsCUDA__global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim ( int  numOfVertices,
int  numOfThreads,
float  currentTime,
float  timeStep,
float *  host_data 
)

For Suture Model (ModelStrand & ModelSuture).

For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function CUDA Wrapper Function -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function

Parameters:
numOfVertices  number of vertices
numOfThreads  number of threads per CUDA thread block
currentTime  current time
timeStep  time step
host_data  host data

Definition at line 963 of file TAPsCUDA_Def.cu.

00970 {
00971     /*
00972     // CUDA device properties
00973     int dev_no;
00974     CUDA_SAFE_CALL( cudaGetDevice( &dev_no ) );
00975     cudaDeviceProp * prop;
00976     CUDA_SAFE_CALL( cudaGetDeviceProperties( prop, dev_no ) );
00977     //printf( "maxThreadsPerBlock: %d\n", prop->maxThreadsPerBlock );
00978     //*/
00979 
00980     // Kernel invocation
00981     unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
00982     dim3 Dg( numOfThreadBlocks, 1, 1 );
00983     dim3 Db( numOfThreads, 1, 1 );
00984     size_t Ns = 0;
00985     cudaStream_t S = 0;
00986 
00987     printf( "Number of vertices: %d\n", numOfVertices );
00988     printf( "Number of threads: %d\n", numOfThreads );
00989     printf( "Number of grids: %d\n", numOfVertices/numOfThreads );
00990 
00991     // Allocate device memory
00992     unsigned int size = sizeof(float) * numOfVertices * 3;
00993     float * device_data;
00994     CUDA_SAFE_CALL( cudaMalloc( (void **)&device_data, size ) );
00995     float * out_data;
00996     CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
00997 
00998     // Copy host data to device data
00999     CUDA_SAFE_CALL( cudaMemcpy( device_data, host_data, size, cudaMemcpyHostToDevice ) );
01000 
01001     // Call the CUDA kernel
01002     TAPsCUDA__global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU<1><<< Dg, Db, Ns, S >>>
01003     ( numOfVertices, numOfThreads, currentTime, timeStep, device_data, out_data );
01004 
01005     // Check if kernel execution generated and error
01006     CUT_CHECK_ERROR( "Kernel execution failed" );
01007 
01008     // Copy output data to host data
01009     CUDA_SAFE_CALL( cudaMemcpy( host_data, out_data, size, cudaMemcpyDeviceToHost ) );
01010 
01011 
01012     // Free device memory
01013     CUDA_SAFE_CALL( cudaFree( device_data ) );
01014     CUDA_SAFE_CALL( cudaFree( out_data ) );
01015 }

template<typename T>
__host__ __device__ void TAPsCUDA__host__device__F1 (  )  [inline]

__host__device__F1 __host__ and __device__ means a function that is compiled for both the host and the device.

Definition at line 122 of file TAPsCUDA.cu.

00122 {}

void TAPsCUDA__host__Init ( int  argc,
char **  argv 
)

Initialize CUDA Device.

To use CUDA (Compute Unified Device Architecture by NVIDIA)
Add G:G: Files Corporation CUDA SDK/common/inc; to the "(C/C++) Additional Include Directories".
Add G:;G: Files Corporation CUDA SDK/common/lib; to the "(Linker) Additional Library Directories".
Add this file to the project solution.
Configure the "Configuration Properties -- General -- Tool" of this file to "Custom Build Tool".
In "Custom Build Step -- General", set the "Command Line" to [Debug Version -- In the program directory] "$(CUDA_BIN_PATH)/nvcc.exe" -ccbin "$(VC80_InstallDir)/bin" -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I"G:G: Files Corporation CUDA SDK/common/inc" -o $(InputName).obj [Debug Version -- In the TAPs/Cuda directory] "$(CUDA_BIN_PATH)/nvcc.exe" -ccbin "$(VC80_InstallDir)/bin" -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MTd -I"G:G: Files Corporation CUDA SDK/common/inc" -o $(InputName).obj G:/TAPs/Cuda/ [Release Version -- In the TAPs/Cuda directory] "$(CUDA_BIN_PATH)/nvcc.exe" -ccbin "$(VC80_InstallDir)/bin" -c -Xcompiler /EHsc,/nologo,/MD -I"G:G: Files Corporation CUDA SDK/common/inc" -o $(InputName).obj G:/TAPs/Cuda/
In "Custom Build Step -- General", set the "Description" to Performing Custom Build Step
In "Custom Build Step -- General", set the "Outputs" to $(InputName).obj
In "Custom Build Step -- General", set the "Additional Dependencies" to _Def.cu
From CUDA Programming Guide
Any call to a __global__ function must specify the execution configuration for that call.
The execution configuration defines the dimension of the grid and blocks that will be used to execute the function on the device, as well as the associated stream (see Section 4.5.1.5 for a description of streams). It is specified by inserting an expression of the form <<< Dg, Db, Ns, S >>> between the function name and the parenthesized argument list, where:
  • Dg is of type dim3 (see Section 4.3.1.2) and specifies the dimension and size of the grid, such that Dg.x * Dg.y equals the number of blocks being launched; Dg.z is unused;
  • Db is of type dim3 (see Section 4.3.1.2) and specifies the dimension and size of each block, such that Db.x * Db.y * Db.z equals the number of threads per block;
  • Ns is of type size_t and specifies the number of bytes in shared memory that is dynamically allocated per block for this call in addition to the statically allocated memory; this dynamically allocated memory is used by any of the variables declared as an external array as mentioned in Section 4.2.2.3; Ns is an optional argument which defaults to 0;
  • S is of type cudaStream_t and specifies the associated stream; S is an optional argument which defaults to 0.
Initialize CUDA Device.

Initialize CUDA Device

Definition at line 21 of file TAPsCUDA_Def.cu.

00022 {
00023     printf( "START: CUT_DEVICE_INIT\n" );
00024     CUT_DEVICE_INIT(argc, argv);
00025     printf( "END:   CUT_DEVICE_INIT\n" );
00026 }

bool TAPsCUDA__InitailizeDataForSutureModel ( unsigned int &  cudaID,
unsigned int  numOfVertices,
float *  vertexList,
float *  prevVertexList 
)

Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture).

Parameters:
cudaID  CUDA ID for the object
numOfVertices  number of vertices
vertexList  list of xyzw vertices
prevVertexList  list of previous xyzw vertices

Definition at line 68 of file TAPsCUDA_Def.cu.

00074 {
00075     // Here Suture Model uses Vertex List Data
00076     // However, it has implicit connection (each vertex connect to the previous and next vertices).
00077     // So the connection list is set to zero.
00078 
00079     // Home vertex is unused for now.  Plan is to use home vertex for sticking suture to a surface.
00080 
00081     // Assign CUDA ID to the object
00082     // The object has to use cudaID to communicate with the CUDA.
00083     cudaID = TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool;
00084 
00085     // Allocate CUDA data for the object.
00086     TAPs_CUDA_CLASS_DATA_Vertex_List * newData = new TAPs_CUDA_CLASS_DATA_Vertex_List( numOfVertices, false, 0, true );
00087     AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List( newData );
00088 
00089     // Size of memory for (xyzw) vertices
00090     int size = numOfVertices * 4 * sizeof(float);
00091 
00092     // Copy Vertex List from host data to device data
00093     CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
00094     // Copy Previous Vertex List from host data to device data
00095     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
00096     // Copy Home Vertex List from host data to device data
00097     //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
00098     // Copy Vertex Connection List from host data to device data
00100 
00101     return true;
00102 }

bool TAPsCUDA__InitailizeDataForSutureModel_ADVSIM ( unsigned int &  cudaID,
unsigned int  numOfVertices,
float *  vertexList,
float *  prevVertexList,
unsigned int *  simFlagsList,
float *  posConstraintList 
)

Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture).

Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture) with TAPs_ADVANCED_SIMULATION

Parameters:
cudaID  CUDA ID for the object
numOfVertices  number of vertices
vertexList  list of xyzw vertices
prevVertexList  list of previous xyzw vertices
simFlagsList  list of simulation flags
posConstraintList  list of position constraints

Definition at line 106 of file TAPsCUDA_Def.cu.

00114 {
00115     // Here Suture Model uses Vertex List Data
00116     // However, it has implicit connection (each vertex connect to the previous and next vertices).
00117     // So the connection list is set to zero.
00118 
00119     // Home vertex is unused for now.  Plan is to use home vertex for sticking suture to a surface.
00120 
00121     // Assign CUDA ID to the object
00122     // The object has to use cudaID to communicate with the CUDA.
00123     cudaID = TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool;
00124 
00125     // Allocate CUDA data for the object.
00126     TAPs_CUDA_CLASS_DATA_Vertex_List * newData = new TAPs_CUDA_CLASS_DATA_Vertex_List( numOfVertices, false, 0, true );
00127     AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List( newData );
00128 
00129     // Size of memory for (xyzw) vertices
00130     int size = numOfVertices * 4 * sizeof(float);
00131 
00132     // Copy Vertex List from host data to device data
00133     CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
00134     // Copy Previous Vertex List from host data to device data
00135     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
00136     // Copy Home Vertex List from host data to device data
00137     //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
00138     // Copy Vertex Connection List from host data to device data
00139     //CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) );
00140     // Copy Simulation Flags List from host data to device data
00141     CUDA_SAFE_CALL( cudaMemcpy( newData->GetSimFlagsList(), simFlagsList, numOfVertices*sizeof(unsigned int), cudaMemcpyHostToDevice ) );
00142     // Copy Position Constraint List from host data to device data
00143     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPosConstraintList(), posConstraintList, size, cudaMemcpyHostToDevice ) );
00144 
00145     return true;
00146 }

bool TAPsCUDA__InitailizeDataForVertexList ( unsigned int &  cudaID,
unsigned int  numOfVertices,
unsigned int  max_connection_size,
float *  vertexList,
float *  prevVertexList,
float *  homeVertexList,
int *  vertexConnectionList 
)

Initialize CUDA Data for a Mesh Model by Using Vertex List.

Parameters:
cudaID  CUDA ID for the object
numOfVertices  number of vertices
max_connection_size  maximum connection size
vertexList  list of xyz vertices
prevVertexList  list of previous xyzw vertices
homeVertexList  list of xyz home vertices
vertexConnectionList  vertex connection (1D array of 2D array -- vertex id and vertex connections)

Definition at line 30 of file TAPsCUDA_Def.cu.

00039 {
00040     //printf( "TAPsCUDA__InitailizeDataForVertexList\n" );
00041     //fflush( stdout );
00042 
00043     // Assign CUDA ID to the object
00044     // The object has to use cudaID to communicate with the CUDA.
00045     cudaID = TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool;
00046 
00047     // Allocate CUDA data for the object.
00048     TAPs_CUDA_CLASS_DATA_Vertex_List * newData = new TAPs_CUDA_CLASS_DATA_Vertex_List( numOfVertices, true, max_connection_size, false );
00049     AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List( newData );
00050 
00051     // Size of memory for (xyzw) vertices
00052     int size = numOfVertices * 4 * sizeof(float);
00053 
00054     // Copy Vertex List from host data to device data
00055     CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
00056     // Copy Previous Vertex List from host data to device data
00057     CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
00058     // Copy Home Vertex List from host data to device data
00059     CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
00060     // Copy Vertex Connection List from host data to device data
00061     CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) );
00062 
00063     return true;
00064 }


Variable Documentation

texture<float4, 1, cudaReadModeElementType> CudaTexHomeVertexList

texture for list of home vertices

Definition at line 236 of file TAPsCUDA.cu.

texture<float4, 1, cudaReadModeElementType> CudaTexPosConstraintList

texture for list of position constraints (xyz) plus force ratio (w)

Definition at line 241 of file TAPsCUDA.cu.

texture<float4, 1, cudaReadModeElementType> CudaTexPrevVertexList

texture for list of previous vertices

Definition at line 235 of file TAPsCUDA.cu.

texture<uint1, 1, cudaReadModeElementType> CudaTexSimFlagsList

texture for list of simulation flags

Definition at line 240 of file TAPsCUDA.cu.

texture<int1, 1, cudaReadModeElementType> CudaTexVertexConnectionList

texture for list of vertex connections (x:=vertex#, y:=rest length, z:=stiffness, w:=damper)

Definition at line 237 of file TAPsCUDA.cu.

texture<float4, 1, cudaReadModeElementType> CudaTexVertexList

For HETriMeshOneModelMultiParts AdvSim Function.

CUDA texture dimension supports only 1-, 2-, and 4-component vector types, but not 3.

Use texture from CUDA Linear Memory by calling cudaBindTexture(...). CUDA Linear Memory is allocated by calling cudaMalloc(...). To enter or change the data in the Linear Memory, must use cudaMemcpy(...). and free by calling cudaFree(...).

Use texture from CUDA Arrays by calling cudaBindTextureToArray(...). CUDA Array is allocated by calling cudaMallocArray(...). To enter or change the data in the Array, must use cudaMemcpyToArray(...). and free by calling cudaFreeArray(...).

The texture reference has to be global, otherwise NVCC will generate error! Texture References texture for list of vertices

Definition at line 234 of file TAPsCUDA.cu.

CUDA Global Data Pool.

Definition at line 523 of file TAPsCUDA.cu.


Generated on Mon Oct 13 11:32:50 2008 for TAPs by  doxygen 1.5.6