![]() |
TAPs 0.7.7.3
|
#include "TAPsCUDA_VertexListMSM.cu"
Include dependency graph for TAPsCUDA_VertexListMSM_Def.cu:
This graph shows which files directly or indirectly include this file:Go to the source code of this file.
Functions | |
| BEGIN_NAMESPACE_TAPs__CUDA bool | InitailizeDataForVertexList (unsigned int &cudaID, unsigned int numOfVertices, unsigned int max_connection_size, float *vertexList, float *prevVertexList, float *homeVertexList, int *vertexConnectionList) |
| For HETriMeshOneModelMultiParts AdvSim Function. | |
| bool | InitailizeDataForSutureModel (unsigned int &cudaID, unsigned int numOfVertices, float *vertexList, float *prevVertexList) |
| Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture) | |
| bool | InitailizeDataForSutureModel_ADVSIM (unsigned int &cudaID, unsigned int numOfVertices, float *vertexList, float *prevVertexList, unsigned int *simFlagsList, float *posConstraintList) |
| __device__ float3 | Device__CalSpringForce (float Ks, float Kd, float rest, float4 X1, float4 X2, float4 V1, float4 V2) |
| __device__ float3 | Device__VerletIntegration (float4 A1, float4 X1, float4 X0, float dt_sqrt, float damper) |
| Verlet Integration. | |
| __device__ float3 | Device__EnforceConstraints_ADVSIM (float3 vertexA, float4 vertexB) |
| __device__ float3 | Device__CalSpringForce_ModelStrand (float Ks, float Kd, float Lrest, int vertexNo, unsigned int numOfVertices) |
| template<int COMPUTATION_CHOICE> | |
| __global__ void | Global__ModelStrand_AdvSim_CU (unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, float ptMass, float Ks, float Kd, float Lrest, float *out_data) |
| template<int COMPUTATION_CHOICE> | |
| __global__ void | Global__ModelStrand_AdvSim_Enforce_Constraint_CU (unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, float ptMass, float Ks, float Kd, float Lrest, unsigned int *out_data_SimFlagsList, float *out_data_PosConstraintList) |
| void | Global__ModelStrand_AdvSim (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float Lrest, float *host_vertex_data, float *host_prev_vertex_data) |
| For Suture Model (ModelStrand & ModelSuture) | |
| void | Global__ModelStrand_AdvSim_ADVSIM (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float Lrest, float *host_vertex_data, float *host_prev_vertex_data, unsigned int *host_sim_flags_data, float *host_pos_constraint_data) |
| template<int COMPUTATION_CHOICE> | |
| __global__ void | Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU (int numOfVertices, int numOfThreads, float currentTime, float timeStep, float *device_data, float *out_data) |
| void | Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim (int numOfVertices, int numOfThreads, float currentTime, float timeStep, float *host_data) |
| For Suture Model (ModelStrand & ModelSuture) | |
| __device__ float3 | Device__CalSpringForce_VertexList (float Ks, float Kd, float HKs, float HKd, int vertexNo, unsigned int max_connection_size) |
| template<int COMPUTATION_CHOICE> | |
| __global__ void | Global__HETriMeshOneModelMultiParts_AdvSim_CU (unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, float ptMass, float Ks, float Kd, float HKs, float HKd, unsigned int max_connection_size, float *out_data) |
| void | Global__HETriMeshOneModelMultiParts_AdvSim (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float HKs, float HKd, float *host_vertex_data, float *host_prev_vertex_data, float *host_home_vertex_data, int *host_connection_list, unsigned int max_connection_size) |
| For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function. | |
| __device__ float3 Device__CalSpringForce | ( | float | Ks, |
| float | Kd, | ||
| float | rest, | ||
| float4 | X1, | ||
| float4 | X2, | ||
| float4 | V1, | ||
| float4 | V2 | ||
| ) |
| Ks | spring stiffness |
| Kd | spring damper |
| rest | spring rest length |
| X1 | position of particle one |
| X2 | position of particle two |
| V1 | velocity of particle one |
| V2 | velocity of particle two |
Definition at line 156 of file TAPsCUDA_VertexListMSM_Def.cu.
Referenced by Device__CalSpringForce_ModelStrand(), and Device__CalSpringForce_VertexList().
{
// Formula from Physically Based Modeling (Siggraph 2001 Course Note)
// F1 = -{Ks(l - r) + Kd[(V1-V2)*L]/l}*L/l
// F2 = -F1
// where Ks = spring stiffness, Kd = spring damping,
// V1 and V2 are velocity of particles linked by the spring
// r = spring rest length
// L = vector of difference of positions of particle#1 and #2
// l = magnitude of L
//return ( ( m_tK * (scl - m_tL) ) + ( m_tD * V * cL ) ) * cL;
float3 Xdif = make_float3( X1.x-X2.x, X1.y-X2.y, X1.z-X2.z );
float3 Vdif = make_float3( V1.x-V2.x, V1.y-V2.y, V1.z-V2.z );
//float len = length( Diff ); // length is an inline function in CUDA (cutil_math.h)
float len = sqrtf( Xdif.x*Xdif.x + Xdif.y*Xdif.y + Xdif.z*Xdif.z );
if ( len < 1.0E-16f ) {
return make_float3( 0.0f, 0.0f, 0.0f );
}
float invLen = 1.0f / len;
float3 Dir = make_float3( Xdif.x*invLen, Xdif.y*invLen, Xdif.z*invLen );
float Vdif_Dir = Vdif.x*Dir.x + Vdif.y*Dir.y + Vdif.z*Dir.z;
float fmag = -( Ks*(len-rest) + Kd*Vdif_Dir );
float3 force = make_float3( fmag*Dir.x, fmag*Dir.y, fmag*Dir.z );
return force;
}
Here is the caller graph for this function:| __device__ float3 Device__CalSpringForce_ModelStrand | ( | float | Ks, |
| float | Kd, | ||
| float | Lrest, | ||
| int | vertexNo, | ||
| unsigned int | numOfVertices | ||
| ) |
< spring stiffness
< spring damper
< spring rest length
< position of particle one
< position of particle two
< velocity of particle one
< velocity of particle two
< spring stiffness
< spring damper
< spring rest length
< position of particle one
< position of particle two
< velocity of particle one
< velocity of particle two
| Ks | spring stiffness |
| Kd | spring damper |
| Lrest | spring rest length |
| vertexNo | vertex number |
| numOfVertices | number of vertices |
Definition at line 247 of file TAPsCUDA_VertexListMSM_Def.cu.
References CudaTexVertexList, and Device__CalSpringForce().
Referenced by Global__ModelStrand_AdvSim_CU().
{
float3 total_force = make_float3( 0.0f, 0.0f, 0.0f );
//float4 HX1 = tex1Dfetch( CudaTexHomeVertexList, vertexNo );
float4 X1 = tex1Dfetch( CudaTexVertexList, vertexNo );
float4 V1 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
//float4 HX2;
float4 X2;
float4 V2 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
float3 force;
// The recommended way to round a single-precision floating-point operand to an
// integer, with the result being a single-precision floating-point number is rintf(),
// not roundf().
// Cal force due to connection with the previous vertex
if ( vertexNo > 0 ) {
int prevVertex = vertexNo-1;
//HX2 = tex1Dfetch( CudaTexHomeVertexList, prevVertex );
X2 = tex1Dfetch( CudaTexVertexList, prevVertex );
//float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z );
//rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z );
force = Device__CalSpringForce(
Ks,
Kd,
Lrest,
X1,
X2,
V1,
V2
);
total_force.x += force.x;
total_force.y += force.y;
total_force.z += force.z;
}
// Cal force due to connection with the next vertex
if ( vertexNo < numOfVertices - 1 ) {
int nextVertex = vertexNo+1;
//HX2 = tex1Dfetch( CudaTexHomeVertexList, prevVertex );
X2 = tex1Dfetch( CudaTexVertexList, nextVertex );
//float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z );
//rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z );
force = Device__CalSpringForce(
Ks,
Kd,
Lrest,
X1,
X2,
V1,
V2
);
total_force.x += force.x;
total_force.y += force.y;
total_force.z += force.z;
}
/*
// Add force from Home Spring
force = Device__CalSpringForce(
Ks, //!< spring stiffness
Kd, //!< spring damper
0.0f, //!< spring rest length
X1, //!< position of particle one
HX1, //!< position of particle two
V1, //!< velocity of particle one
V2 //!< velocity of particle two
);
total_force.x += force.x;
total_force.y += force.y;
total_force.z += force.z;
*/
return total_force;
}
Here is the call graph for this function:
Here is the caller graph for this function:| __device__ float3 Device__CalSpringForce_VertexList | ( | float | Ks, |
| float | Kd, | ||
| float | HKs, | ||
| float | HKd, | ||
| int | vertexNo, | ||
| unsigned int | max_connection_size | ||
| ) |
< spring stiffness
< spring damper
< spring rest length
< position of particle one
< position of particle two
< velocity of particle one
< velocity of particle two
< spring stiffness
< spring damper
< spring rest length
< position of particle one
< position of particle two
< velocity of particle one
< velocity of particle two
| Ks | spring stiffness |
| Kd | spring damper |
| HKs | home spring stiffness |
| HKd | home spring damper |
| vertexNo | vertex number |
| max_connection_size | maximum connection size |
Definition at line 1004 of file TAPsCUDA_VertexListMSM_Def.cu.
References CudaTexHomeVertexList, CudaTexVertexConnectionList, CudaTexVertexList, and Device__CalSpringForce().
Referenced by Global__HETriMeshOneModelMultiParts_AdvSim_CU().
{
float3 total_force = make_float3( 0.0f, 0.0f, 0.0f );
float4 HX1 = tex1Dfetch( CudaTexHomeVertexList, vertexNo );
float4 X1 = tex1Dfetch( CudaTexVertexList, vertexNo );
float4 V1 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
float4 HX2;
float4 X2;
float4 V2 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
float3 force;
float rest;
int1 connectedVertexNo;
int idx = vertexNo * max_connection_size;
// The recommended way to round a single-precision floating-point operand to an
// integer, with the result being a single-precision floating-point number is rintf(),
// not roundf().
//*
for ( int i = 0; i < max_connection_size; ++i, ++idx ) {
connectedVertexNo = tex1Dfetch( CudaTexVertexConnectionList, idx );
if ( connectedVertexNo.x >= 0 ) {
HX2 = tex1Dfetch( CudaTexHomeVertexList, connectedVertexNo.x );
X2 = tex1Dfetch( CudaTexVertexList, connectedVertexNo.x );
float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z );
rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z );
force = Device__CalSpringForce(
Ks,
Kd,
rest,
X1,
X2,
V1,
V2
);
total_force.x += force.x;
total_force.y += force.y;
total_force.z += force.z;
}
}
//*/
//*
// Add force from Home Spring
force = Device__CalSpringForce(
HKs,
HKd,
0.0f,
X1,
HX1,
V1,
V2
);
total_force.x += force.x;
total_force.y += force.y;
total_force.z += force.z;
//*/
return total_force;
}
Here is the call graph for this function:
Here is the caller graph for this function:| __device__ float3 Device__EnforceConstraints_ADVSIM | ( | float3 | vertexA, |
| float4 | vertexB | ||
| ) |
| vertexA | vertex A |
| vertexB | vertex B (xyz) + [force]ratio [0-1](w); where 1 means vertexB completely follow vertexA |
Definition at line 220 of file TAPsCUDA_VertexListMSM_Def.cu.
Referenced by Global__ModelStrand_AdvSim_CU().
{
float one_ratio = 1.0f - vertexB.w;
float3 enforced_pos;
enforced_pos.x = vertexB.w*vertexA.x + one_ratio*vertexB.x;
enforced_pos.y = vertexB.w*vertexA.y + one_ratio*vertexB.y;
enforced_pos.z = vertexB.w*vertexA.z + one_ratio*vertexB.z;
return enforced_pos;
}
Here is the caller graph for this function:| __device__ float3 Device__VerletIntegration | ( | float4 | A1, |
| float4 | X1, | ||
| float4 | X0, | ||
| float | dt_sqrt, | ||
| float | damper | ||
| ) |
Verlet Integration.
| A1 | acceleration |
| X1 | current position |
| X0 | previous position |
| dt_sqrt | square of delta time |
| damper | damping value |
Definition at line 198 of file TAPsCUDA_VertexListMSM_Def.cu.
Referenced by Global__HETriMeshOneModelMultiParts_AdvSim_CU(), and Global__ModelStrand_AdvSim_CU().
{
// F = ma; a = F/m;
// x(t+dt) = 2x(t) - x(t-dt) + a(t)*{dt}^2 + O({dt}^4)
float3 new_pos;
new_pos.x = 2.0f*X1.x - X0.x + A1.x*dt_sqrt - damper*(X1.x - X0.x);
new_pos.y = 2.0f*X1.y - X0.y + A1.y*dt_sqrt - damper*(X1.y - X0.y);
new_pos.z = 2.0f*X1.z - X0.z + A1.z*dt_sqrt - damper*(X1.z - X0.z);
return new_pos;
}
Here is the caller graph for this function:| void Global__HETriMeshOneModelMultiParts_AdvSim | ( | unsigned int | cudaID, |
| unsigned int | numOfVertices, | ||
| unsigned int | numOfThreads, | ||
| float | currentTime, | ||
| float | timeStep, | ||
| int | numOfSubSteps, | ||
| float | ptMass, | ||
| float | Ks, | ||
| float | Kd, | ||
| float | HKs, | ||
| float | HKd, | ||
| float * | host_vertex_data, | ||
| float * | host_prev_vertex_data, | ||
| float * | host_home_vertex_data, | ||
| int * | host_connection_list, | ||
| unsigned int | max_connection_size | ||
| ) |
For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function.
CUDA Wrapper Function -- For HETriMeshOneModelMultiParts AdvSim Function.
For HETriMeshOneModelMultiParts AdvSim Function CUDA Wrapper Function -- For HETriMeshOneModelMultiParts AdvSim Function
| cudaID | CUDA ID |
| numOfVertices | number of vertices |
| numOfThreads | number of threads per CUDA thread block |
| currentTime | current time |
| timeStep | time step |
| numOfSubSteps | number of sub-steps |
| ptMass | point mass |
| Ks | spring stiffness |
| Kd | spring damper |
| HKs | home spring stiffness |
| HKd | home spring damper |
| host_vertex_data | host's vertex data |
| host_prev_vertex_data | host's previous vertex data |
| host_home_vertex_data | host's home vertex data |
| host_connection_list | host's connection index list |
| max_connection_size | maximum connection size |
Definition at line 1191 of file TAPsCUDA_VertexListMSM_Def.cu.
References DATA_Vertex_List::BindHomeVertexList(), DATA_Vertex_List::BindPrevVertexList(), DATA_Vertex_List::BindVertexConnectionList(), DATA_Vertex_List::BindVertexList(), DATA_GlobalPool, DATA_Pool::DataForVertexList, DATA_Vertex_List::GetVertexList(), DATA_Vertex_List::SwapVertexList(), DATA_Vertex_List::UnbindHomeVertexList(), DATA_Vertex_List::UnbindPrevVertexList(), DATA_Vertex_List::UnbindVertexConnectionList(), and DATA_Vertex_List::UnbindVertexList().
Referenced by HETriMeshOneModelMultiParts< T >::AdvanceSimulation().
{
//printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
//printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
//printf( "Number of vertices: %d\n", numOfVertices );
//printf( "Number of threads: %d\n", numOfThreads );
//printf( "Number of grids: %d\n", numOfVertices/numOfThreads );
// Allocate device memory for out data
unsigned int size = sizeof(float) * numOfVertices * 4;
float * out_data;
CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
// Copy Vertex List (and Previous Vertex List) from host data to device data,
// since vertices are dynamically changed.
// While home vertices and connection list are unchanged.
CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
// The Previous Vertex List can be saved in CUDA memory.
// Hence, the Previous Vertex List does not have to be updated.
//CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );
// Kernel invocation
unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
dim3 Dg( numOfThreadBlocks, 1, 1 );
dim3 Db( numOfThreads, 1, 1 );
size_t Ns = 0;
cudaStream_t S = 0;
// Bind Textures
DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices );
DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexConnectionList( numOfVertices, max_connection_size );
int CHOICE = 2;
// HOME VERTEX
if ( CHOICE == 1 ) {
// Call the CUDA kernel (Choice 1 -- Home Vertex Position Only)
Global__HETriMeshOneModelMultiParts_AdvSim_CU<1><<< Dg, Db, Ns, S >>>(
numOfVertices, numOfThreads,
currentTime, timeStep,
ptMass, Ks, Kd, HKs, HKd,
max_connection_size,
out_data
);
// Swap pointers for Vertex List and Previous Vertex List
// So that the Previous Vertex List is remain unchanged in the CUDA memory.
DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
}
// MASS SPRING SYSTEM
else if ( CHOICE == 2 ) {
float subTimeStep = timeStep / numOfSubSteps;
for ( int i = 0; i < numOfSubSteps; ++i ) {
//printf( "delta time: %g\n", timeStep/numOfSubSteps );
// Call the CUDA kernel (Choice 2 -- Mass Spring System)
Global__HETriMeshOneModelMultiParts_AdvSim_CU<2><<< Dg, Db, Ns, S >>>(
numOfVertices, numOfThreads,
currentTime, subTimeStep,
ptMass, Ks, Kd, HKs, HKd,
max_connection_size,
out_data
);
// Swap pointers for Vertex List and Previous Vertex List
// So that the Previous Vertex List is remain unchanged in the CUDA memory.
DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
// Copy data to the device's memory for (current) Vertex List
CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
currentTime += subTimeStep;
}
}
// Unbind Textures
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList();
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexConnectionList();
// Check if kernel execution generated and error
CUT_CHECK_ERROR( "Kernel execution failed!" );
// Copy output data to host data Previous Vertex List
// The host will have to swap its pointers (for current and previous vertex list)
if ( CHOICE > 0 ) {
CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
}
//DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, host_prev_vertex_data, host_home_vertex_data );
//DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug_ConnectionList( numOfVertices, max_connection_size, host_connection_list );
// Free device memory
CUDA_SAFE_CALL( cudaFree( out_data ) );
}
Here is the call graph for this function:
Here is the caller graph for this function:| __global__ void Global__HETriMeshOneModelMultiParts_AdvSim_CU | ( | unsigned int | numOfVertices, |
| unsigned int | numOfThreads, | ||
| float | currentTime, | ||
| float | timeStep, | ||
| float | ptMass, | ||
| float | Ks, | ||
| float | Kd, | ||
| float | HKs, | ||
| float | HKd, | ||
| unsigned int | max_connection_size, | ||
| float * | out_data | ||
| ) |
< spring stiffness
< spring damper
< home spring stiffness
< home spring damper
< vertex number
< maximum connection size
< acceleration
< current position
< previous position
< square of delta time
< damping value
| numOfVertices | number of vertices |
| numOfThreads | number of threads |
| currentTime | current time |
| timeStep | time step |
| ptMass | point mass |
| Ks | spring stiffness |
| Kd | spring damper |
| HKs | home spring stiffness |
| HKd | home spring damper |
| max_connection_size | maximum connection size |
| out_data | output data |
Definition at line 1078 of file TAPsCUDA_VertexListMSM_Def.cu.
References CudaTexHomeVertexList, CudaTexPrevVertexList, CudaTexVertexList, Device__CalSpringForce_VertexList(), and Device__VerletIntegration().
{
switch ( COMPUTATION_CHOICE ) {
// Emulate Viscoelastic by home vertex positions
case 1:
{
// Block index
int bx = blockIdx.x;
// Thread index
int tx = threadIdx.x;
// Indices
int idx = (numOfThreads * bx + tx) * 4;
int vertexNo = (numOfThreads * bx + tx);
// Fetch data from texture linear memory
float4 homeVertexPos = tex1Dfetch( CudaTexHomeVertexList, vertexNo );
float4 vertex_pos = tex1Dfetch( CudaTexVertexList, vertexNo );
float3 len;
len.x = homeVertexPos.x - vertex_pos.x;
len.y = homeVertexPos.y - vertex_pos.y;
len.z = homeVertexPos.z - vertex_pos.z;
len.x *= timeStep;
len.y *= timeStep;
len.z *= timeStep;
float3 new_pos;
new_pos.x = vertex_pos.x + len.x;
new_pos.y = vertex_pos.y + len.y;
new_pos.z = vertex_pos.z + len.z;
//out_data[idx] = new_pos;
out_data[idx ] = new_pos.x;
out_data[idx+1] = new_pos.y;
out_data[idx+2] = new_pos.z;
out_data[idx+3] = vertex_pos.w;
}
break;
// Emulate Viscoelastic by mass spring connections
case 2:
{
// Block index
int bx = blockIdx.x;
// Thread index
int tx = threadIdx.x;
// Vertex number
int vertexNo = (numOfThreads * bx + tx);
if ( vertexNo >= numOfVertices ) return;
// Indix for output
int idx = (numOfThreads * bx + tx) * 4;
// Fetch data from texture linear memory
float4 current_pos = tex1Dfetch( CudaTexVertexList, vertexNo );
float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo );
// Synchronize to make sure the data are loaded
//__syncthreads();
float3 force = Device__CalSpringForce_VertexList(
Ks,
Kd,
HKs,
HKd,
vertexNo,
max_connection_size
);
// Convert force to acceleration
float invMass = 1.0f / ptMass;
float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f );
float dt_sqrt = timeStep*timeStep;
float3 new_pos = Device__VerletIntegration(
acceleration,
current_pos,
previous_pos,
dt_sqrt,
Kd
);
// WARNING: out data have to be set at the end, otherwise it won't work!!! ???
out_data[idx ] = new_pos.x;
out_data[idx+1] = new_pos.y;
out_data[idx+2] = new_pos.z;
out_data[idx+3] = current_pos.w;
}
break;
}
}
Here is the call graph for this function:| void Global__ModelStrand_AdvSim | ( | unsigned int | cudaID, |
| unsigned int | numOfVertices, | ||
| unsigned int | numOfThreads, | ||
| float | currentTime, | ||
| float | timeStep, | ||
| int | numOfSubSteps, | ||
| float | ptMass, | ||
| float | Ks, | ||
| float | Kd, | ||
| float | Lrest, | ||
| float * | host_vertex_data, | ||
| float * | host_prev_vertex_data | ||
| ) |
For Suture Model (ModelStrand & ModelSuture)
CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function.
CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function
| cudaID | CUDA ID |
| numOfVertices | number of vertices |
| numOfThreads | number of threads per CUDA thread block |
| currentTime | current time |
| timeStep | time step |
| numOfSubSteps | number of sub-steps |
| ptMass | point mass |
| Ks | spring stiffness |
| Kd | spring damper |
| Lrest | spring rest length |
| host_vertex_data | host's vertex data |
| host_prev_vertex_data | host's previous vertex data |
Definition at line 630 of file TAPsCUDA_VertexListMSM_Def.cu.
References DATA_Vertex_List::BindPrevVertexList(), DATA_Vertex_List::BindVertexList(), DATA_GlobalPool, DATA_Pool::DataForVertexList, DATA_Vertex_List::GetVertexList(), DATA_Vertex_List::SwapVertexList(), DATA_Vertex_List::UnbindPrevVertexList(), and DATA_Vertex_List::UnbindVertexList().
{
//printf( "FILE:%s LINE:%d Global__ModelStrand_AdvSim\n", __FILE__, __LINE__ );
//printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
//printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
//printf( "Number of vertices: %d\n", numOfVertices );
//printf( "Number of threads: %d\n", numOfThreads );
//printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads );
// Allocate device memory for out data
unsigned int size = sizeof(float) * numOfVertices * 4;
float * out_data;
CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
// Copy Vertex List (and Previous Vertex List) from host data to device data,
// since vertices are dynamically changed.
// While home vertices and connection list are unchanged.
CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
// The Previous Vertex List can be saved in CUDA memory.
// Hence, the Previous Vertex List does not have to be updated.
//CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );
// Kernel invocation
unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
dim3 Dg( numOfThreadBlocks, 1, 1 );
dim3 Db( numOfThreads, 1, 1 );
size_t Ns = 0;
cudaStream_t S = 0;
// Bind Textures
DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
//DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices );
//*
int CHOICE = 1;
// MASS SPRING SYSTEM
if ( CHOICE == 1 ) {
float subTimeStep = timeStep / numOfSubSteps;
for ( int i = 0; i < numOfSubSteps; ++i ) {
// Call the CUDA kernel (Choice 1 -- Mass Spring System)
Global__ModelStrand_AdvSim_CU<1><<< Dg, Db, Ns, S >>>(
numOfVertices, numOfThreads,
currentTime, subTimeStep,
ptMass, Ks, Kd, Lrest,
out_data
);
// Swap pointers for Vertex List and Previous Vertex List
// So that the Previous Vertex List is remain unchanged in the CUDA memory.
DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
// Copy data to the device's memory for (current) Vertex List
CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
currentTime += subTimeStep;
}
}
//*/
// Unbind Textures
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
//DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList();
// Check if kernel execution generated and error
CUT_CHECK_ERROR( "Kernel execution failed!" );
// Copy output data to host data Previous Vertex List
// The host will have to swap its pointers (for current and previous vertex list)
if ( CHOICE > 0 ) {
CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
}
//DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data );
// Free device memory
CUDA_SAFE_CALL( cudaFree( out_data ) );
}
Here is the call graph for this function:| void Global__ModelStrand_AdvSim_ADVSIM | ( | unsigned int | cudaID, |
| unsigned int | numOfVertices, | ||
| unsigned int | numOfThreads, | ||
| float | currentTime, | ||
| float | timeStep, | ||
| int | numOfSubSteps, | ||
| float | ptMass, | ||
| float | Ks, | ||
| float | Kd, | ||
| float | Lrest, | ||
| float * | host_vertex_data, | ||
| float * | host_prev_vertex_data, | ||
| unsigned int * | host_sim_flags_data, | ||
| float * | host_pos_constraint_data | ||
| ) |
CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function with TAPs_ADVANCED_SIMULATION
| cudaID | CUDA ID |
| numOfVertices | number of vertices |
| numOfThreads | number of threads per CUDA thread block |
| currentTime | current time |
| timeStep | time step |
| numOfSubSteps | number of sub-steps |
| ptMass | point mass |
| Ks | spring stiffness |
| Kd | spring damper |
| Lrest | spring rest length |
| host_vertex_data | host's vertex data |
| host_prev_vertex_data | host's previous vertex data |
| host_sim_flags_data | host's simulation flags data |
| host_pos_constraint_data | host's position constraint data |
Definition at line 726 of file TAPsCUDA_VertexListMSM_Def.cu.
References DATA_Vertex_List::BindPosConstraintList(), DATA_Vertex_List::BindPrevVertexList(), DATA_Vertex_List::BindSimFlagsList(), DATA_Vertex_List::BindVertexList(), DATA_GlobalPool, DATA_Pool::DataForVertexList, DATA_Vertex_List::GetPosConstraintList(), DATA_Vertex_List::GetSimFlagsList(), DATA_Vertex_List::GetVertexList(), DATA_Vertex_List::PrintDebug(), DATA_Vertex_List::SwapVertexList(), DATA_Vertex_List::UnbindPosConstraintList(), DATA_Vertex_List::UnbindPrevVertexList(), DATA_Vertex_List::UnbindSimFlagsList(), and DATA_Vertex_List::UnbindVertexList().
{
printf( "FILE:%s LINE:%d Global__ModelStrand_AdvSim_ADVSIM\n", __FILE__, __LINE__ );
printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
printf( "Number of vertices: %d\n", numOfVertices );
printf( "Number of threads: %d\n", numOfThreads );
printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads );
// Allocate device memory for out data
unsigned int size = sizeof(float) * numOfVertices * 4;
float * out_data;
CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
// Allocate device memory for out data for changing suture's simulation flags list
unsigned int size_SimFlagsList = sizeof(unsigned int) * numOfVertices;
unsigned int * out_data_SimFlagsList;
CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_SimFlagsList, size_SimFlagsList ) );
// Allocate device memory for out data for changing suture's position constraint list
float * out_data_PosConstraintList;
CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_PosConstraintList, size ) );
// Copy Vertex List (and Previous Vertex List) from host data to device data,
// since vertices are dynamically changed.
// While home vertices and connection list are unchanged.
CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
// The Previous Vertex List can be saved in CUDA memory.
// Hence, the Previous Vertex List does not have to be updated.
//CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );
CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetSimFlagsList(), host_sim_flags_data, sizeof(unsigned int)*numOfVertices, cudaMemcpyHostToDevice ) );
CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPosConstraintList(), host_pos_constraint_data, size, cudaMemcpyHostToDevice ) );
// Kernel invocation
unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
dim3 Dg( numOfThreadBlocks, 1, 1 );
dim3 Db( numOfThreads, 1, 1 );
size_t Ns = 0;
cudaStream_t S = 0;
// Bind Textures
DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
DATA_GlobalPool.DataForVertexList[cudaID]->BindSimFlagsList( numOfVertices );
DATA_GlobalPool.DataForVertexList[cudaID]->BindPosConstraintList( numOfVertices );
//*
// Add 10 for TAPs_ADVANCED_SIMULATION
int CHOICE = 100 + 1;
// MASS SPRING SYSTEM
if ( CHOICE == 101 ) {
float subTimeStep = timeStep / numOfSubSteps;
for ( int i = 0; i < numOfSubSteps; ++i ) {
//printf( "delta time: %g\n", timeStep/numOfSubSteps );
// Call the CUDA kernel (Choice 100+1 -- Mass Spring System)
// with TAPs_ADVANCED_SIMULATION
Global__ModelStrand_AdvSim_CU<101><<< Dg, Db, Ns, S >>>(
numOfVertices, numOfThreads,
currentTime, subTimeStep,
ptMass, Ks, Kd, Lrest,
out_data
);
// Swap pointers for Vertex List and Previous Vertex List
// So that the Previous Vertex List is remain unchanged in the CUDA memory.
DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
// Copy data to the device's memory for (current) Vertex List
CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
currentTime += subTimeStep;
}
// Call the CUDA kernel to enforce constraints
// with TAPs_ADVANCED_SIMULATION
Global__ModelStrand_AdvSim_Enforce_Constraint_CU<101><<< Dg, Db, Ns, S >>>(
numOfVertices, numOfThreads,
currentTime, subTimeStep,
ptMass, Ks, Kd, Lrest,
out_data_SimFlagsList,
out_data_PosConstraintList
);
}
//*/
// Unbind Textures
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindSimFlagsList();
DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPosConstraintList();
// Check if kernel execution generated and error
CUT_CHECK_ERROR( "Kernel execution failed!" );
// Copy output data to host data Previous Vertex List
// The host will have to swap its pointers (for current and previous vertex list)
if ( CHOICE > 0 ) {
CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
CUDA_SAFE_CALL( cudaMemcpy( host_sim_flags_data, out_data_SimFlagsList, size_SimFlagsList, cudaMemcpyDeviceToHost ) );
CUDA_SAFE_CALL( cudaMemcpy( host_pos_constraint_data, out_data_PosConstraintList, size, cudaMemcpyDeviceToHost ) );
}
/*
// DEBUG
{
float * data = (float *)malloc( size );
CUDA_SAFE_CALL( cudaMemcpy( data, out_data, size, cudaMemcpyDeviceToHost ) );
for ( int i = 0, n = 0; i < 2; ++i, n+=4 ) {
printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] );
}
for ( int i = numOfVertices-2, n = i*4; i < numOfVertices; ++i, n+=4 ) {
printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] );
}
free( data );
}
*/
DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, NULL, NULL, host_sim_flags_data, host_pos_constraint_data );
// Free device memory
CUDA_SAFE_CALL( cudaFree( out_data ) );
CUDA_SAFE_CALL( cudaFree( out_data_SimFlagsList ) );
CUDA_SAFE_CALL( cudaFree( out_data_PosConstraintList ) );
}
Here is the call graph for this function:| __global__ void Global__ModelStrand_AdvSim_CU | ( | unsigned int | numOfVertices, |
| unsigned int | numOfThreads, | ||
| float | currentTime, | ||
| float | timeStep, | ||
| float | ptMass, | ||
| float | Ks, | ||
| float | Kd, | ||
| float | Lrest, | ||
| float * | out_data | ||
| ) |
< spring stiffness
< spring damper
< spring rest length
< vertex number
< number of vertices
< acceleration
< current position
< previous position
< square of delta time
< damping value
< spring stiffness
< spring damper
< spring rest length
< vertex number
< number of vertices
< acceleration
< current position
< previous position
< square of delta time
< damping value
| numOfVertices | number of vertices |
| numOfThreads | number of threads |
| currentTime | current time |
| timeStep | time step |
| ptMass | point mass |
| Ks | spring stiffness |
| Kd | spring damper |
| Lrest | spring rest length |
| out_data | output data |
Definition at line 336 of file TAPsCUDA_VertexListMSM_Def.cu.
References CudaTexPosConstraintList, CudaTexPrevVertexList, CudaTexSimFlagsList, CudaTexVertexList, Device__CalSpringForce_ModelStrand(), Device__EnforceConstraints_ADVSIM(), and Device__VerletIntegration().
{
switch ( COMPUTATION_CHOICE ) {
//-------------------------------------------------
// Emulate Viscoelastic by mass spring connections
// with TAPs_ADVANCED_SIMULATION
case 101:
{
// Block index
int bx = blockIdx.x;
// Thread index
int tx = threadIdx.x;
// Vertex number
int vertexNo = (numOfThreads * bx + tx);
if ( vertexNo >= numOfVertices ) return;
// Index for output
int idx = (numOfThreads * bx + tx) * 4;
// Fetch data from texture linear memory
float4 current_pos = tex1Dfetch( CudaTexVertexList, vertexNo );
// If the suture vertex is not fixed
if ( current_pos.w == 1 ) {
float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo );
float3 force = Device__CalSpringForce_ModelStrand(
Ks,
Kd,
Lrest,
vertexNo,
numOfVertices
);
// Convert force to acceleration
float invMass = 1.0f / ptMass;
float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f );
float dt_sqrt = timeStep*timeStep;
float3 new_pos = Device__VerletIntegration(
acceleration,
current_pos,
previous_pos,
dt_sqrt,
Kd
);
// Enforce constraints with TAPs_ADVANCED_SIMULATION
uint1 simFlags = tex1Dfetch( CudaTexSimFlagsList, vertexNo );
if ( simFlags.x > 0 ) {
float4 constraint_pos = tex1Dfetch( CudaTexPosConstraintList, vertexNo );
new_pos = Device__EnforceConstraints_ADVSIM( new_pos, constraint_pos );
//new_pos.x = constraint_pos.x;
//new_pos.y = constraint_pos.y;
//new_pos.z = constraint_pos.z;
}
// WARNING: out data have to be set at the end, otherwise it won't work!!! ???
out_data[idx ] = new_pos.x;
out_data[idx+1] = new_pos.y;
out_data[idx+2] = new_pos.z;
out_data[idx+3] = current_pos.w;
}
// If the suture vertex is fixed
else {
out_data[idx ] = current_pos.x;
out_data[idx+1] = current_pos.y;
out_data[idx+2] = current_pos.z;
out_data[idx+3] = current_pos.w;
}
}
// Synchronize to make sure the data are loaded
//__syncthreads();
break;
//-------------------------------------------------
// Emulate Viscoelastic by mass spring connections
case 1:
{
// Block index
int bx = blockIdx.x;
// Thread index
int tx = threadIdx.x;
// Vertex number
int vertexNo = (numOfThreads * bx + tx);
if ( vertexNo >= numOfVertices ) return;
// Index for output
int idx = (numOfThreads * bx + tx) * 4;
// Fetch data from texture linear memory
float4 current_pos = tex1Dfetch( CudaTexVertexList, vertexNo );
if ( current_pos.w == 1 ) {
float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo );
float3 force = Device__CalSpringForce_ModelStrand(
Ks,
Kd,
Lrest,
vertexNo,
numOfVertices
);
// Convert force to acceleration
float invMass = 1.0f / ptMass;
float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f );
float dt_sqrt = timeStep*timeStep;
// Calculate the new position
float3 new_pos = Device__VerletIntegration(
acceleration,
current_pos,
previous_pos,
dt_sqrt,
Kd
);
// WARNING: out data have to be set at the end, otherwise it won't work!!! ???
out_data[idx ] = new_pos.x;
out_data[idx+1] = new_pos.y;
out_data[idx+2] = new_pos.z;
out_data[idx+3] = current_pos.w;
}
else {
out_data[idx ] = current_pos.x;
out_data[idx+1] = current_pos.y;
out_data[idx+2] = current_pos.z;
out_data[idx+3] = current_pos.w;
}
}
// Synchronize to make sure the data are loaded
//__syncthreads();
break;
}
}
Here is the call graph for this function:| __global__ void Global__ModelStrand_AdvSim_Enforce_Constraint_CU | ( | unsigned int | numOfVertices, |
| unsigned int | numOfThreads, | ||
| float | currentTime, | ||
| float | timeStep, | ||
| float | ptMass, | ||
| float | Ks, | ||
| float | Kd, | ||
| float | Lrest, | ||
| unsigned int * | out_data_SimFlagsList, | ||
| float * | out_data_PosConstraintList | ||
| ) |
| numOfVertices | number of vertices |
| numOfThreads | number of threads |
| currentTime | current time |
| timeStep | time step |
| ptMass | point mass |
| Ks | spring stiffness |
| Kd | spring damper |
| Lrest | spring rest length |
| out_data_SimFlagsList | output data for simulation flags |
| out_data_PosConstraintList | output data for position constaint |
Definition at line 483 of file TAPsCUDA_VertexListMSM_Def.cu.
References CudaTexPosConstraintList, CudaTexSimFlagsList, CudaTexVertexList, TAPs_Const_Suture_ShiftDown, and TAPs_Const_Suture_ShiftUp.
{
switch ( COMPUTATION_CHOICE ) {
//-------------------------------------------------
// Strand's vertex is slidable
// with TAPs_ADVANCED_SIMULATION
case 101:
{
// Block index
int bx = blockIdx.x;
// Thread index
int tx = threadIdx.x;
// Vertex number
int vertexNo = (numOfThreads * bx + tx);
if ( vertexNo >= numOfVertices ) return;
// Index for output
int idx = vertexNo * 4;
// Fetch data from texture linear memories
float4 pos_j = tex1Dfetch( CudaTexVertexList, vertexNo );
uint1 simFlags = tex1Dfetch( CudaTexSimFlagsList, vertexNo );
float4 posConstraint = tex1Dfetch( CudaTexPosConstraintList, vertexNo );
bool bDefault = false;
// If the suture vertex is not fixed
if ( pos_j.w == 1.0f ) {
// Values of simulation flags (declared in TAPsEnumList.hpp):
// SIMULATION CONSTRAINTS
// enum SimConstraints {
// CLEARED = 0,
// FIXED = 1,
// ATTACHED = 1 << 1,
// PUNCTURED = 1 << 2,
// SLIDABLE = 1 << 3,
// DUMMY
// };
// CLEARED
if ( simFlags.x == 0 ) {
bDefault = true;
}
// FIXED
else if ( simFlags.x == 1 ) {
bDefault = true;
}
// ATTACHED
else if ( simFlags.x == 2 ) {
bDefault = true;
}
// PUNCTURED (n/a)
//else if ( simFlags.x == 4 ) {
//}
// SLIDABLE
else if ( simFlags.x == 8 ) {
//else if ( false ) {
if ( 0 < vertexNo && vertexNo < numOfVertices-2 ) {
int i = vertexNo-1;
int k = vertexNo+1;
float4 pos_i = tex1Dfetch( CudaTexVertexList, i );
float4 pos_k = tex1Dfetch( CudaTexVertexList, k );
float3 linkA = make_float3( pos_i.x - pos_j.x, pos_i.y - pos_j.y, pos_i.z - pos_j.z );
float3 linkB = make_float3( pos_k.x - pos_j.x, pos_k.y - pos_j.y, pos_k.z - pos_j.z );
float linkAlen = sqrt( linkA.x*linkA.x + linkA.y*linkA.y + linkA.z*linkA.z );
float linkBlen = sqrt( linkB.x*linkB.x + linkB.y*linkB.y + linkB.z*linkB.z );
float threshold = 1.025f;
//float threshold = 2.0f;
// Shift up
if ( linkAlen > linkBlen*threshold ) {
//uint1 simFlags_up = tex1Dfetch( CudaTexSimFlagsList, vertexNo+1 );
// if simFlags of the right vertex is cleared
//if ( simFlags_up.x == 0 ) {
out_data_SimFlagsList[vertexNo] = simFlags.x;
out_data_PosConstraintList[idx ] = posConstraint.x;
out_data_PosConstraintList[idx+1] = posConstraint.y;
out_data_PosConstraintList[idx+2] = posConstraint.z;
out_data_PosConstraintList[idx+3] = posConstraint.w + TAPs_Const_Suture_ShiftUp; // +20 to signify that the strand vertex must be shifted up
//}
//else {
// bDefault = true;
//}
}
// Shift down
else if ( linkBlen > linkAlen*threshold ) {
//uint1 simFlags_down = tex1Dfetch( CudaTexSimFlagsList, vertexNo-1 );
// if simFlags of the left vertex is cleared
//if ( simFlags_down.x == 0 ) {
out_data_SimFlagsList[vertexNo] = simFlags.x;
out_data_PosConstraintList[idx ] = posConstraint.x;
out_data_PosConstraintList[idx+1] = posConstraint.y;
out_data_PosConstraintList[idx+2] = posConstraint.z;
out_data_PosConstraintList[idx+3] = posConstraint.w + TAPs_Const_Suture_ShiftDown; // +10 to signify that the strand vertex must be shifted down
//}
//else {
// bDefault = true;
//}
}
else {
bDefault = true;
}
}
else {
bDefault = true;
}
}
// DEFAULT CASE
else {
bDefault = true;
}
}
// If the suture vertex is fixed
else {
bDefault = true;
}
if ( bDefault ) {
out_data_SimFlagsList[vertexNo] = simFlags.x;
out_data_PosConstraintList[idx ] = posConstraint.x;
out_data_PosConstraintList[idx+1] = posConstraint.y;
out_data_PosConstraintList[idx+2] = posConstraint.z;
out_data_PosConstraintList[idx+3] = posConstraint.w;
}
}
// Synchronize to make sure the data are loaded
//__syncthreads();
break;
}
}
| void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim | ( | int | numOfVertices, |
| int | numOfThreads, | ||
| float | tCurrent, | ||
| float | tNext, | ||
| float * | host_data | ||
| ) |
For Suture Model (ModelStrand & ModelSuture)
CUDA Wrapper Function -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function.
For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function CUDA Wrapper Function -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function
| numOfVertices | number of vertices |
| numOfThreads | number of threads per CUDA thread block |
| currentTime | current time |
| timeStep | time step |
| host_data | host data |
Definition at line 939 of file TAPsCUDA_VertexListMSM_Def.cu.
Referenced by SimPropForMultiPartMeshModel_HalfEdge< T >::AdvSim().
{
/*
// CUDA device properties
int dev_no;
CUDA_SAFE_CALL( cudaGetDevice( &dev_no ) );
cudaDeviceProp * prop;
CUDA_SAFE_CALL( cudaGetDeviceProperties( prop, dev_no ) );
//printf( "maxThreadsPerBlock: %d\n", prop->maxThreadsPerBlock );
*/
// Kernel invocation
unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
dim3 Dg( numOfThreadBlocks, 1, 1 );
dim3 Db( numOfThreads, 1, 1 );
size_t Ns = 0;
cudaStream_t S = 0;
printf( "Number of vertices: %d\n", numOfVertices );
printf( "Number of threads: %d\n", numOfThreads );
printf( "Number of grids: %d\n", numOfVertices/numOfThreads );
// Allocate device memory
unsigned int size = sizeof(float) * numOfVertices * 3;
float * device_data;
CUDA_SAFE_CALL( cudaMalloc( (void **)&device_data, size ) );
float * out_data;
CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );
// Copy host data to device data
CUDA_SAFE_CALL( cudaMemcpy( device_data, host_data, size, cudaMemcpyHostToDevice ) );
// Call the CUDA kernel
Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU<1><<< Dg, Db, Ns, S >>>
( numOfVertices, numOfThreads, currentTime, timeStep, device_data, out_data );
// Check if kernel execution generated and error
CUT_CHECK_ERROR( "Kernel execution failed" );
// Copy output data to host data
CUDA_SAFE_CALL( cudaMemcpy( host_data, out_data, size, cudaMemcpyDeviceToHost ) );
// Free device memory
CUDA_SAFE_CALL( cudaFree( device_data ) );
CUDA_SAFE_CALL( cudaFree( out_data ) );
}
Here is the caller graph for this function:| __global__ void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU | ( | int | numOfVertices, |
| int | numOfThreads, | ||
| float | currentTime, | ||
| float | timeStep, | ||
| float * | device_data, | ||
| float * | out_data | ||
| ) |
| numOfVertices | number of vertices |
| numOfThreads | number of threads per CUDA thread block |
| currentTime | current time |
| timeStep | time step |
| device_data | device data |
| out_data | output data |
Definition at line 879 of file TAPsCUDA_VertexListMSM_Def.cu.
{
// Block index
int bx = blockIdx.x;
// Thread index
int tx = threadIdx.x;
// Index
int idx = (numOfThreads * bx + tx) * 3;
// Use device shared memory to load the vertex position from device global memory
/*__shared__*/ float vertex_pos[3];
//vertex_pos = device_data[idx];
vertex_pos[0] = device_data[idx ];
vertex_pos[1] = device_data[idx+1];
vertex_pos[2] = device_data[idx+2];
// Synchronize to make sure the data are loaded
__syncthreads();
//out_data[idx ] = vertex_pos[idx ];
//out_data[idx+1] = vertex_pos[idx+1];
//out_data[idx+2] = vertex_pos[idx+2];
//out_data[idx ] = 0.0f;
//out_data[idx+1] = 0.0f;
//out_data[idx+2] = 0.0f;
float new_pos[3];
//new_pos = vertex_pos + 0.0001f;
new_pos[0] = vertex_pos[0] + 0.0001f;
new_pos[1] = vertex_pos[1];
new_pos[2] = vertex_pos[2];
//__syncthreads();
//out_data[idx] = new_pos;
out_data[idx ] = new_pos[0];
out_data[idx+1] = new_pos[1];
out_data[idx+2] = new_pos[2];
//out_data[idx ] = 0.0f;
//out_data[idx+1] = 0.0f;
//out_data[idx+2] = 0.0f;
//device_data[idx ] = device_data[idx ] + 0.0001f;
//device_data[idx+1] = device_data[idx+1];
//device_data[idx+2] = device_data[idx+2];
}
| bool InitailizeDataForSutureModel | ( | unsigned int & | cudaID, |
| unsigned int | numOfVertices, | ||
| float * | vertexList, | ||
| float * | prevVertexList | ||
| ) |
Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture)
| cudaID | CUDA ID for the object |
| numOfVertices | number of vertices |
| vertexList | list of xyzw vertices |
| prevVertexList | list of previous xyzw vertices |
Definition at line 60 of file TAPsCUDA_VertexListMSM_Def.cu.
References AddToGlobal_Pool_Of_DATA_Vertex_List(), DATA_GlobalPool, DATA_Vertex_List::GetPrevVertexList(), DATA_Vertex_List::GetVertexList(), and DATA_Pool::SizeOfGlobal_Pool.
{
// Here Suture Model uses Vertex List Data
// However, it has implicit connection (each vertex connect to the previous and next vertices).
// So the connection list is set to zero.
// Home vertex is unused for now. Plan is to use home vertex for sticking suture to a surface.
// Assign CUDA ID to the object
// The object has to use cudaID to communicate with the CUDA.
cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;
// Allocate CUDA data for the object.
DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, false, 0, true );
AddToGlobal_Pool_Of_DATA_Vertex_List( newData );
// Size of memory for (xyzw) vertices
int size = numOfVertices * 4 * sizeof(float);
// Copy Vertex List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
// Copy Previous Vertex List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
// Copy Home Vertex List from host data to device data
//CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
// Copy Vertex Connection List from host data to device data
return true;
}
Here is the call graph for this function:| bool InitailizeDataForSutureModel_ADVSIM | ( | unsigned int & | cudaID, |
| unsigned int | numOfVertices, | ||
| float * | vertexList, | ||
| float * | prevVertexList, | ||
| unsigned int * | simFlagsList, | ||
| float * | posConstraintList | ||
| ) |
Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture) with TAPs_ADVANCED_SIMULATION
| cudaID | CUDA ID for the object |
| numOfVertices | number of vertices |
| vertexList | list of xyzw vertices |
| prevVertexList | list of previous xyzw vertices |
| simFlagsList | list of simulation flags |
| posConstraintList | list of position constraints (xyz) plus force ratio (w) |
Definition at line 99 of file TAPsCUDA_VertexListMSM_Def.cu.
References AddToGlobal_Pool_Of_DATA_Vertex_List(), DATA_GlobalPool, DATA_Vertex_List::GetPosConstraintList(), DATA_Vertex_List::GetPrevVertexList(), DATA_Vertex_List::GetSimFlagsList(), DATA_Vertex_List::GetVertexList(), and DATA_Pool::SizeOfGlobal_Pool.
{
// Here Suture Model uses Vertex List Data
// However, it has implicit connection (each vertex connect to the previous and next vertices).
// So the connection list is set to zero.
// Home vertex is unused for now. Plan is to use home vertex for sticking suture to a surface.
// Assign CUDA ID to the object
// The object has to use cudaID to communicate with the CUDA.
cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;
// Allocate CUDA data for the object.
DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, false, 0, true );
AddToGlobal_Pool_Of_DATA_Vertex_List( newData );
// Size of memory for (xyzw) vertices
int size = numOfVertices * 4 * sizeof(float);
// Copy Vertex List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
// Copy Previous Vertex List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
// Copy Home Vertex List from host data to device data
//CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
// Copy Vertex Connection List from host data to device data
//CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) );
// Copy Simulation Flags List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetSimFlagsList(), simFlagsList, numOfVertices*sizeof(unsigned int), cudaMemcpyHostToDevice ) );
// Copy Position Constraint List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetPosConstraintList(), posConstraintList, size, cudaMemcpyHostToDevice ) );
return true;
}
Here is the call graph for this function:| BEGIN_NAMESPACE_TAPs__CUDA bool InitailizeDataForVertexList | ( | unsigned int & | cudaID, |
| unsigned int | numOfVertices, | ||
| unsigned int | max_connection_size, | ||
| float * | vertexList, | ||
| float * | prevVertexList, | ||
| float * | homeVertexList, | ||
| int * | vertexConnectionList | ||
| ) |
For HETriMeshOneModelMultiParts AdvSim Function.
Initialize CUDA Data for a Mesh Model by Using Vertex List.
Initialize CUDA Data for a Mesh Model by Using Vertex List
| cudaID | CUDA ID for the object |
| numOfVertices | number of vertices |
| max_connection_size | maximum connection size |
| vertexList | list of xyz vertices |
| prevVertexList | list of previous xyzw vertices |
| homeVertexList | list of xyz home vertices |
| vertexConnectionList | vertex connection (1D array of 2D array -- vertex id and vertex connections) |
Definition at line 21 of file TAPsCUDA_VertexListMSM_Def.cu.
References AddToGlobal_Pool_Of_DATA_Vertex_List(), DATA_GlobalPool, DATA_Vertex_List::GetHomeVertexList(), DATA_Vertex_List::GetPrevVertexList(), DATA_Vertex_List::GetVertexConnectionList(), DATA_Vertex_List::GetVertexList(), and DATA_Pool::SizeOfGlobal_Pool.
Referenced by HETriMeshOneModelMultiParts< T >::CUDA_Initialize_All().
{
//printf( "InitailizeDataForVertexList\n" );
//fflush( stdout );
// Assign CUDA ID to the object
// The object has to use cudaID to communicate with the CUDA.
cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;
// Allocate CUDA data for the object.
DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, true, max_connection_size, false );
AddToGlobal_Pool_Of_DATA_Vertex_List( newData );
// Size of memory for (xyzw) vertices
int size = numOfVertices * 4 * sizeof(float);
// Copy Vertex List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
// Copy Previous Vertex List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
// Copy Home Vertex List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
// Copy Vertex Connection List from host data to device data
CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) );
return true;
}
Here is the call graph for this function:
Here is the caller graph for this function: