TAPs 0.7.7.3
TAPsCUDA_VertexListMSM_Def.cu File Reference
Include dependency graph for TAPsCUDA_VertexListMSM_Def.cu:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Functions

BEGIN_NAMESPACE_TAPs__CUDA bool InitailizeDataForVertexList (unsigned int &cudaID, unsigned int numOfVertices, unsigned int max_connection_size, float *vertexList, float *prevVertexList, float *homeVertexList, int *vertexConnectionList)
 For HETriMeshOneModelMultiParts AdvSim Function.
bool InitailizeDataForSutureModel (unsigned int &cudaID, unsigned int numOfVertices, float *vertexList, float *prevVertexList)
 Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture)
bool InitailizeDataForSutureModel_ADVSIM (unsigned int &cudaID, unsigned int numOfVertices, float *vertexList, float *prevVertexList, unsigned int *simFlagsList, float *posConstraintList)
__device__ float3 Device__CalSpringForce (float Ks, float Kd, float rest, float4 X1, float4 X2, float4 V1, float4 V2)
__device__ float3 Device__VerletIntegration (float4 A1, float4 X1, float4 X0, float dt_sqrt, float damper)
 Verlet Integration.
__device__ float3 Device__EnforceConstraints_ADVSIM (float3 vertexA, float4 vertexB)
__device__ float3 Device__CalSpringForce_ModelStrand (float Ks, float Kd, float Lrest, int vertexNo, unsigned int numOfVertices)
template<int COMPUTATION_CHOICE>
__global__ void Global__ModelStrand_AdvSim_CU (unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, float ptMass, float Ks, float Kd, float Lrest, float *out_data)
template<int COMPUTATION_CHOICE>
__global__ void Global__ModelStrand_AdvSim_Enforce_Constraint_CU (unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, float ptMass, float Ks, float Kd, float Lrest, unsigned int *out_data_SimFlagsList, float *out_data_PosConstraintList)
void Global__ModelStrand_AdvSim (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float Lrest, float *host_vertex_data, float *host_prev_vertex_data)
 For Suture Model (ModelStrand & ModelSuture)
void Global__ModelStrand_AdvSim_ADVSIM (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float Lrest, float *host_vertex_data, float *host_prev_vertex_data, unsigned int *host_sim_flags_data, float *host_pos_constraint_data)
template<int COMPUTATION_CHOICE>
__global__ void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU (int numOfVertices, int numOfThreads, float currentTime, float timeStep, float *device_data, float *out_data)
void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim (int numOfVertices, int numOfThreads, float currentTime, float timeStep, float *host_data)
 For Suture Model (ModelStrand & ModelSuture)
__device__ float3 Device__CalSpringForce_VertexList (float Ks, float Kd, float HKs, float HKd, int vertexNo, unsigned int max_connection_size)
template<int COMPUTATION_CHOICE>
__global__ void Global__HETriMeshOneModelMultiParts_AdvSim_CU (unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, float ptMass, float Ks, float Kd, float HKs, float HKd, unsigned int max_connection_size, float *out_data)
void Global__HETriMeshOneModelMultiParts_AdvSim (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float HKs, float HKd, float *host_vertex_data, float *host_prev_vertex_data, float *host_home_vertex_data, int *host_connection_list, unsigned int max_connection_size)
 For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function.

Function Documentation

__device__ float3 Device__CalSpringForce ( float  Ks,
float  Kd,
float  rest,
float4  X1,
float4  X2,
float4  V1,
float4  V2 
)
Parameters:
Ksspring stiffness
Kdspring damper
restspring rest length
X1position of particle one
X2position of particle two
V1velocity of particle one
V2velocity of particle two

Definition at line 156 of file TAPsCUDA_VertexListMSM_Def.cu.

Referenced by Device__CalSpringForce_ModelStrand(), and Device__CalSpringForce_VertexList().

{
    // Formula from Physically Based Modeling (Siggraph 2001 Course Note)
    // F1 = -{Ks(l - r) + Kd[(V1-V2)*L]/l}*L/l
    // F2 = -F1
    //   where Ks = spring stiffness, Kd = spring damping,
    //     V1 and V2 are velocity of particles linked by the spring
    //     r = spring rest length
    //     L = vector of difference of positions of particle#1 and #2
    //     l = magnitude of L
    //return ( ( m_tK * (scl - m_tL) ) + ( m_tD * V * cL ) ) * cL;

    float3 Xdif = make_float3( X1.x-X2.x, X1.y-X2.y, X1.z-X2.z );
    float3 Vdif = make_float3( V1.x-V2.x, V1.y-V2.y, V1.z-V2.z );

    //float len = length( Diff );   // length is an inline function in CUDA (cutil_math.h)
    float len = sqrtf( Xdif.x*Xdif.x + Xdif.y*Xdif.y + Xdif.z*Xdif.z );
    if ( len < 1.0E-16f ) {
        return make_float3( 0.0f, 0.0f, 0.0f );
    }
    float invLen = 1.0f / len;
    float3 Dir = make_float3( Xdif.x*invLen, Xdif.y*invLen, Xdif.z*invLen );

    float Vdif_Dir = Vdif.x*Dir.x + Vdif.y*Dir.y + Vdif.z*Dir.z;

    float fmag = -( Ks*(len-rest) + Kd*Vdif_Dir );
    float3 force = make_float3( fmag*Dir.x, fmag*Dir.y, fmag*Dir.z );

    return force;
}

Here is the caller graph for this function:

__device__ float3 Device__CalSpringForce_ModelStrand ( float  Ks,
float  Kd,
float  Lrest,
int  vertexNo,
unsigned int  numOfVertices 
)

< spring stiffness

< spring damper

< spring rest length

< position of particle one

< position of particle two

< velocity of particle one

< velocity of particle two

< spring stiffness

< spring damper

< spring rest length

< position of particle one

< position of particle two

< velocity of particle one

< velocity of particle two

Parameters:
Ksspring stiffness
Kdspring damper
Lrestspring rest length
vertexNovertex number
numOfVerticesnumber of vertices

Definition at line 247 of file TAPsCUDA_VertexListMSM_Def.cu.

References CudaTexVertexList, and Device__CalSpringForce().

Referenced by Global__ModelStrand_AdvSim_CU().

{
    float3 total_force = make_float3( 0.0f, 0.0f, 0.0f );
    //float4 HX1 = tex1Dfetch( CudaTexHomeVertexList, vertexNo );
    float4 X1 = tex1Dfetch( CudaTexVertexList, vertexNo );
    float4 V1 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
    //float4 HX2;
    float4 X2;
    float4 V2 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
    float3 force;

    // The recommended way to round a single-precision floating-point operand to an
    // integer, with the result being a single-precision floating-point number is rintf(),
    // not roundf().

    // Cal force due to connection with the previous vertex
    if ( vertexNo > 0 ) {
        int prevVertex = vertexNo-1;
        //HX2 = tex1Dfetch( CudaTexHomeVertexList, prevVertex );
        X2 = tex1Dfetch( CudaTexVertexList, prevVertex );

        //float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z );
        //rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z );

        force = Device__CalSpringForce( 
            Ks,     
            Kd,     
            Lrest,  
            X1,     
            X2,     
            V1,     
            V2      
        );
        total_force.x += force.x;
        total_force.y += force.y;
        total_force.z += force.z;
    }

    // Cal force due to connection with the next vertex
    if ( vertexNo < numOfVertices - 1 ) {
        int nextVertex = vertexNo+1;
        //HX2 = tex1Dfetch( CudaTexHomeVertexList, prevVertex );
        X2 = tex1Dfetch( CudaTexVertexList, nextVertex );

        //float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z );
        //rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z );

        force = Device__CalSpringForce( 
            Ks,     
            Kd,     
            Lrest,  
            X1,     
            X2,     
            V1,     
            V2      
        );
        total_force.x += force.x;
        total_force.y += force.y;
        total_force.z += force.z;
    }

    /*
    // Add force from Home Spring
    force = Device__CalSpringForce( 
        Ks,     //!< spring stiffness
        Kd,     //!< spring damper
        0.0f,   //!< spring rest length
        X1,     //!< position of particle one
        HX1,    //!< position of particle two
        V1,     //!< velocity of particle one
        V2      //!< velocity of particle two
    );
    total_force.x += force.x;
    total_force.y += force.y;
    total_force.z += force.z;
    */

    return total_force;
}

Here is the call graph for this function:

Here is the caller graph for this function:

__device__ float3 Device__CalSpringForce_VertexList ( float  Ks,
float  Kd,
float  HKs,
float  HKd,
int  vertexNo,
unsigned int  max_connection_size 
)

< spring stiffness

< spring damper

< spring rest length

< position of particle one

< position of particle two

< velocity of particle one

< velocity of particle two

< spring stiffness

< spring damper

< spring rest length

< position of particle one

< position of particle two

< velocity of particle one

< velocity of particle two

Parameters:
Ksspring stiffness
Kdspring damper
HKshome spring stiffness
HKdhome spring damper
vertexNovertex number
max_connection_sizemaximum connection size

Definition at line 1004 of file TAPsCUDA_VertexListMSM_Def.cu.

References CudaTexHomeVertexList, CudaTexVertexConnectionList, CudaTexVertexList, and Device__CalSpringForce().

Referenced by Global__HETriMeshOneModelMultiParts_AdvSim_CU().

{
    float3 total_force = make_float3( 0.0f, 0.0f, 0.0f );
    float4 HX1 = tex1Dfetch( CudaTexHomeVertexList, vertexNo );
    float4 X1 = tex1Dfetch( CudaTexVertexList, vertexNo );
    float4 V1 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
    float4 HX2;
    float4 X2;
    float4 V2 = make_float4( 0.0f, 0.0f, 0.0f, 0.0f );
    float3 force;
    float rest;
    int1 connectedVertexNo;
    int idx = vertexNo * max_connection_size;

    // The recommended way to round a single-precision floating-point operand to an
    // integer, with the result being a single-precision floating-point number is rintf(),
    // not roundf().

    //*
    for ( int i = 0; i < max_connection_size; ++i, ++idx ) {
        connectedVertexNo = tex1Dfetch( CudaTexVertexConnectionList, idx );
        if ( connectedVertexNo.x >= 0 ) {
            HX2 = tex1Dfetch( CudaTexHomeVertexList, connectedVertexNo.x );
            X2 = tex1Dfetch( CudaTexVertexList, connectedVertexNo.x );

            float3 Dif = make_float3( HX1.x-HX2.x, HX1.y-HX2.y, HX1.z-HX2.z );
            rest = sqrtf( Dif.x*Dif.x + Dif.y*Dif.y + Dif.z*Dif.z );

            force = Device__CalSpringForce( 
                Ks,     
                Kd,     
                rest,   
                X1,     
                X2,     
                V1,     
                V2      
            );
            total_force.x += force.x;
            total_force.y += force.y;
            total_force.z += force.z;
        }
    }
    //*/

    //*
    // Add force from Home Spring
    force = Device__CalSpringForce( 
        HKs,    
        HKd,    
        0.0f,   
        X1,     
        HX1,    
        V1,     
        V2      
    );
    total_force.x += force.x;
    total_force.y += force.y;
    total_force.z += force.z;
    //*/

    return total_force;
}

Here is the call graph for this function:

Here is the caller graph for this function:

__device__ float3 Device__EnforceConstraints_ADVSIM ( float3  vertexA,
float4  vertexB 
)
Parameters:
vertexAvertex A
vertexBvertex B (xyz) + [force]ratio [0-1](w); where 1 means vertexB completely follow vertexA

Definition at line 220 of file TAPsCUDA_VertexListMSM_Def.cu.

Referenced by Global__ModelStrand_AdvSim_CU().

{
    float one_ratio = 1.0f - vertexB.w;
    float3 enforced_pos;
    enforced_pos.x = vertexB.w*vertexA.x + one_ratio*vertexB.x;
    enforced_pos.y = vertexB.w*vertexA.y + one_ratio*vertexB.y;
    enforced_pos.z = vertexB.w*vertexA.z + one_ratio*vertexB.z;
    return enforced_pos;
}

Here is the caller graph for this function:

__device__ float3 Device__VerletIntegration ( float4  A1,
float4  X1,
float4  X0,
float  dt_sqrt,
float  damper 
)

Verlet Integration.

Parameters:
A1acceleration
X1current position
X0previous position
dt_sqrtsquare of delta time
damperdamping value

Definition at line 198 of file TAPsCUDA_VertexListMSM_Def.cu.

Referenced by Global__HETriMeshOneModelMultiParts_AdvSim_CU(), and Global__ModelStrand_AdvSim_CU().

{
    // F = ma; a = F/m;
    // x(t+dt) = 2x(t) - x(t-dt) + a(t)*{dt}^2 + O({dt}^4)
    float3 new_pos;
    new_pos.x = 2.0f*X1.x - X0.x + A1.x*dt_sqrt - damper*(X1.x - X0.x);
    new_pos.y = 2.0f*X1.y - X0.y + A1.y*dt_sqrt - damper*(X1.y - X0.y);
    new_pos.z = 2.0f*X1.z - X0.z + A1.z*dt_sqrt - damper*(X1.z - X0.z);
    return new_pos;
}

Here is the caller graph for this function:

void Global__HETriMeshOneModelMultiParts_AdvSim ( unsigned int  cudaID,
unsigned int  numOfVertices,
unsigned int  numOfThreads,
float  currentTime,
float  timeStep,
int  numOfSubSteps,
float  ptMass,
float  Ks,
float  Kd,
float  HKs,
float  HKd,
float *  host_vertex_data,
float *  host_prev_vertex_data,
float *  host_home_vertex_data,
int *  host_connection_list,
unsigned int  max_connection_size 
)

For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function.

CUDA Wrapper Function -- For HETriMeshOneModelMultiParts AdvSim Function.

For HETriMeshOneModelMultiParts AdvSim Function CUDA Wrapper Function -- For HETriMeshOneModelMultiParts AdvSim Function

Parameters:
cudaIDCUDA ID
numOfVerticesnumber of vertices
numOfThreadsnumber of threads per CUDA thread block
currentTimecurrent time
timeSteptime step
numOfSubStepsnumber of sub-steps
ptMasspoint mass
Ksspring stiffness
Kdspring damper
HKshome spring stiffness
HKdhome spring damper
host_vertex_datahost's vertex data
host_prev_vertex_datahost's previous vertex data
host_home_vertex_datahost's home vertex data
host_connection_listhost's connection index list
max_connection_sizemaximum connection size

Definition at line 1191 of file TAPsCUDA_VertexListMSM_Def.cu.

References DATA_Vertex_List::BindHomeVertexList(), DATA_Vertex_List::BindPrevVertexList(), DATA_Vertex_List::BindVertexConnectionList(), DATA_Vertex_List::BindVertexList(), DATA_GlobalPool, DATA_Pool::DataForVertexList, DATA_Vertex_List::GetVertexList(), DATA_Vertex_List::SwapVertexList(), DATA_Vertex_List::UnbindHomeVertexList(), DATA_Vertex_List::UnbindPrevVertexList(), DATA_Vertex_List::UnbindVertexConnectionList(), and DATA_Vertex_List::UnbindVertexList().

Referenced by HETriMeshOneModelMultiParts< T >::AdvanceSimulation().

{
    //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
    //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );

    //printf( "Number of vertices: %d\n", numOfVertices );
    //printf( "Number of threads: %d\n", numOfThreads );
    //printf( "Number of grids: %d\n", numOfVertices/numOfThreads );

    // Allocate device memory for out data
    unsigned int size = sizeof(float) * numOfVertices * 4;
    float * out_data;
    CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );

    // Copy Vertex List (and Previous Vertex List) from host data to device data, 
    // since vertices are dynamically changed.
    // While home vertices and connection list are unchanged.
    CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
    // The Previous Vertex List can be saved in CUDA memory.
    // Hence, the Previous Vertex List does not have to be updated.
    //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );

    // Kernel invocation
    unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
    dim3 Dg( numOfThreadBlocks, 1, 1 );
    dim3 Db( numOfThreads, 1, 1 );
    size_t Ns = 0;
    cudaStream_t S = 0;

    // Bind Textures
    DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
    DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
    DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices );
    DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexConnectionList( numOfVertices, max_connection_size );

    int CHOICE = 2;

    // HOME VERTEX
    if ( CHOICE == 1 ) {
        // Call the CUDA kernel (Choice 1 -- Home Vertex Position Only)
        Global__HETriMeshOneModelMultiParts_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 
            numOfVertices, numOfThreads, 
            currentTime, timeStep, 
            ptMass, Ks, Kd, HKs, HKd, 
            max_connection_size, 
            out_data 
        );
        // Swap pointers for Vertex List and Previous Vertex List
        // So that the Previous Vertex List is remain unchanged in the CUDA memory.
        DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();
    }

    // MASS SPRING SYSTEM
    else if ( CHOICE == 2 ) {
        float subTimeStep = timeStep / numOfSubSteps;
        for ( int i = 0; i < numOfSubSteps; ++i ) {
            //printf( "delta time: %g\n", timeStep/numOfSubSteps );

            // Call the CUDA kernel (Choice 2 -- Mass Spring System)
            Global__HETriMeshOneModelMultiParts_AdvSim_CU<2><<< Dg, Db, Ns, S >>>( 
                numOfVertices, numOfThreads, 
                currentTime, subTimeStep, 
                ptMass, Ks, Kd, HKs, HKd, 
                max_connection_size, 
                out_data 
            );
            // Swap pointers for Vertex List and Previous Vertex List
            // So that the Previous Vertex List is remain unchanged in the CUDA memory.
            DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();

            // Copy data to the device's memory for (current) Vertex List
            CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
            currentTime += subTimeStep;
        }
    }

    // Unbind Textures
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList();
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexConnectionList();

    // Check if kernel execution generated and error
    CUT_CHECK_ERROR( "Kernel execution failed!" );

    // Copy output data to host data Previous Vertex List
    // The host will have to swap its pointers (for current and previous vertex list)
    if ( CHOICE > 0 ) {
        CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
    }

    //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, host_prev_vertex_data, host_home_vertex_data );
    //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug_ConnectionList( numOfVertices, max_connection_size, host_connection_list );

    // Free device memory
    CUDA_SAFE_CALL( cudaFree( out_data ) );
}

Here is the call graph for this function:

Here is the caller graph for this function:

template<int COMPUTATION_CHOICE>
__global__ void Global__HETriMeshOneModelMultiParts_AdvSim_CU ( unsigned int  numOfVertices,
unsigned int  numOfThreads,
float  currentTime,
float  timeStep,
float  ptMass,
float  Ks,
float  Kd,
float  HKs,
float  HKd,
unsigned int  max_connection_size,
float *  out_data 
)

< spring stiffness

< spring damper

< home spring stiffness

< home spring damper

< vertex number

< maximum connection size

< acceleration

< current position

< previous position

< square of delta time

< damping value

Parameters:
numOfVerticesnumber of vertices
numOfThreadsnumber of threads
currentTimecurrent time
timeSteptime step
ptMasspoint mass
Ksspring stiffness
Kdspring damper
HKshome spring stiffness
HKdhome spring damper
max_connection_sizemaximum connection size
out_dataoutput data

Definition at line 1078 of file TAPsCUDA_VertexListMSM_Def.cu.

References CudaTexHomeVertexList, CudaTexPrevVertexList, CudaTexVertexList, Device__CalSpringForce_VertexList(), and Device__VerletIntegration().

{

    switch ( COMPUTATION_CHOICE ) {

        // Emulate Viscoelastic by home vertex positions
        case 1:
        {
            // Block index
            int bx = blockIdx.x;

            // Thread index
            int tx = threadIdx.x;

            // Indices
            int idx = (numOfThreads * bx + tx) * 4;
            int vertexNo = (numOfThreads * bx + tx);

            // Fetch data from texture linear memory
            float4 homeVertexPos = tex1Dfetch( CudaTexHomeVertexList, vertexNo );
            float4 vertex_pos = tex1Dfetch( CudaTexVertexList, vertexNo );

            float3 len;
            len.x = homeVertexPos.x - vertex_pos.x;
            len.y = homeVertexPos.y - vertex_pos.y;
            len.z = homeVertexPos.z - vertex_pos.z;

            len.x *= timeStep;
            len.y *= timeStep;
            len.z *= timeStep;

            float3 new_pos;
            new_pos.x = vertex_pos.x + len.x;
            new_pos.y = vertex_pos.y + len.y;
            new_pos.z = vertex_pos.z + len.z;

            //out_data[idx] = new_pos;
            out_data[idx  ] = new_pos.x;
            out_data[idx+1] = new_pos.y;
            out_data[idx+2] = new_pos.z;
            out_data[idx+3] = vertex_pos.w;
        }
        break;

        // Emulate Viscoelastic by mass spring connections
        case 2:
        {
            // Block index
            int bx = blockIdx.x;

            // Thread index
            int tx = threadIdx.x;

            // Vertex number
            int vertexNo = (numOfThreads * bx + tx);
            if ( vertexNo >= numOfVertices ) return;

            // Indix for output
            int idx = (numOfThreads * bx + tx) * 4;

            // Fetch data from texture linear memory
            float4 current_pos  = tex1Dfetch( CudaTexVertexList, vertexNo );
            float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo );

            // Synchronize to make sure the data are loaded
            //__syncthreads();

            float3 force = Device__CalSpringForce_VertexList( 
                Ks,                 
                Kd,                 
                HKs,                
                HKd,                
                vertexNo,           
                max_connection_size 
            );

            // Convert force to acceleration
            float invMass = 1.0f / ptMass;
            float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f );
            float dt_sqrt = timeStep*timeStep;

            float3 new_pos = Device__VerletIntegration( 
                acceleration,   
                current_pos,    
                previous_pos,   
                dt_sqrt,        
                Kd              
            );

            //  WARNING: out data have to be set at the end, otherwise it won't work!!! ???
            out_data[idx  ] = new_pos.x;
            out_data[idx+1] = new_pos.y;
            out_data[idx+2] = new_pos.z;
            out_data[idx+3] = current_pos.w;
        }
        break;
    }
}

Here is the call graph for this function:

void Global__ModelStrand_AdvSim ( unsigned int  cudaID,
unsigned int  numOfVertices,
unsigned int  numOfThreads,
float  currentTime,
float  timeStep,
int  numOfSubSteps,
float  ptMass,
float  Ks,
float  Kd,
float  Lrest,
float *  host_vertex_data,
float *  host_prev_vertex_data 
)

For Suture Model (ModelStrand & ModelSuture)

CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function.

CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function

Parameters:
cudaIDCUDA ID
numOfVerticesnumber of vertices
numOfThreadsnumber of threads per CUDA thread block
currentTimecurrent time
timeSteptime step
numOfSubStepsnumber of sub-steps
ptMasspoint mass
Ksspring stiffness
Kdspring damper
Lrestspring rest length
host_vertex_datahost's vertex data
host_prev_vertex_datahost's previous vertex data

Definition at line 630 of file TAPsCUDA_VertexListMSM_Def.cu.

References DATA_Vertex_List::BindPrevVertexList(), DATA_Vertex_List::BindVertexList(), DATA_GlobalPool, DATA_Pool::DataForVertexList, DATA_Vertex_List::GetVertexList(), DATA_Vertex_List::SwapVertexList(), DATA_Vertex_List::UnbindPrevVertexList(), and DATA_Vertex_List::UnbindVertexList().

{
    //printf( "FILE:%s LINE:%d Global__ModelStrand_AdvSim\n", __FILE__, __LINE__ );
    //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
    //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
    //printf( "Number of vertices: %d\n", numOfVertices );
    //printf( "Number of threads: %d\n", numOfThreads );
    //printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads );

    // Allocate device memory for out data
    unsigned int size = sizeof(float) * numOfVertices * 4;
    float * out_data;
    CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );

    // Copy Vertex List (and Previous Vertex List) from host data to device data, 
    // since vertices are dynamically changed.
    // While home vertices and connection list are unchanged.
    CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
    // The Previous Vertex List can be saved in CUDA memory.
    // Hence, the Previous Vertex List does not have to be updated.
    //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );

    // Kernel invocation
    unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
    dim3 Dg( numOfThreadBlocks, 1, 1 );
    dim3 Db( numOfThreads, 1, 1 );
    size_t Ns = 0;
    cudaStream_t S = 0;

    // Bind Textures
    DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
    DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
    //DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices );

    //*
    int CHOICE = 1;

    // MASS SPRING SYSTEM
    if ( CHOICE == 1 ) {
        float subTimeStep = timeStep / numOfSubSteps;
        for ( int i = 0; i < numOfSubSteps; ++i ) {
            // Call the CUDA kernel (Choice 1 -- Mass Spring System)
            Global__ModelStrand_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 
                numOfVertices, numOfThreads, 
                currentTime, subTimeStep, 
                ptMass, Ks, Kd, Lrest, 
                out_data 
            );
            // Swap pointers for Vertex List and Previous Vertex List
            // So that the Previous Vertex List is remain unchanged in the CUDA memory.
            DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();

            // Copy data to the device's memory for (current) Vertex List
            CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
            currentTime += subTimeStep;
        }
    }
    //*/

    // Unbind Textures
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
    //DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList();

    // Check if kernel execution generated and error
    CUT_CHECK_ERROR( "Kernel execution failed!" );

    // Copy output data to host data Previous Vertex List
    // The host will have to swap its pointers (for current and previous vertex list)
    if ( CHOICE > 0 ) {
        CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
    }

    //DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data );

    // Free device memory
    CUDA_SAFE_CALL( cudaFree( out_data ) );
}

Here is the call graph for this function:

void Global__ModelStrand_AdvSim_ADVSIM ( unsigned int  cudaID,
unsigned int  numOfVertices,
unsigned int  numOfThreads,
float  currentTime,
float  timeStep,
int  numOfSubSteps,
float  ptMass,
float  Ks,
float  Kd,
float  Lrest,
float *  host_vertex_data,
float *  host_prev_vertex_data,
unsigned int *  host_sim_flags_data,
float *  host_pos_constraint_data 
)

CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function with TAPs_ADVANCED_SIMULATION

Parameters:
cudaIDCUDA ID
numOfVerticesnumber of vertices
numOfThreadsnumber of threads per CUDA thread block
currentTimecurrent time
timeSteptime step
numOfSubStepsnumber of sub-steps
ptMasspoint mass
Ksspring stiffness
Kdspring damper
Lrestspring rest length
host_vertex_datahost's vertex data
host_prev_vertex_datahost's previous vertex data
host_sim_flags_datahost's simulation flags data
host_pos_constraint_datahost's position constraint data

Definition at line 726 of file TAPsCUDA_VertexListMSM_Def.cu.

References DATA_Vertex_List::BindPosConstraintList(), DATA_Vertex_List::BindPrevVertexList(), DATA_Vertex_List::BindSimFlagsList(), DATA_Vertex_List::BindVertexList(), DATA_GlobalPool, DATA_Pool::DataForVertexList, DATA_Vertex_List::GetPosConstraintList(), DATA_Vertex_List::GetSimFlagsList(), DATA_Vertex_List::GetVertexList(), DATA_Vertex_List::PrintDebug(), DATA_Vertex_List::SwapVertexList(), DATA_Vertex_List::UnbindPosConstraintList(), DATA_Vertex_List::UnbindPrevVertexList(), DATA_Vertex_List::UnbindSimFlagsList(), and DATA_Vertex_List::UnbindVertexList().

{
    printf( "FILE:%s LINE:%d Global__ModelStrand_AdvSim_ADVSIM\n", __FILE__, __LINE__ );
    printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep );
    printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd );
    printf( "Number of vertices: %d\n", numOfVertices );
    printf( "Number of threads: %d\n", numOfThreads );
    printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads );

    // Allocate device memory for out data
    unsigned int size = sizeof(float) * numOfVertices * 4;
    float * out_data;
    CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );

    // Allocate device memory for out data for changing suture's simulation flags list
    unsigned int size_SimFlagsList = sizeof(unsigned int) * numOfVertices;
    unsigned int * out_data_SimFlagsList;
    CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_SimFlagsList, size_SimFlagsList ) );

    // Allocate device memory for out data for changing suture's position constraint list
    float * out_data_PosConstraintList;
    CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_PosConstraintList, size ) );

    // Copy Vertex List (and Previous Vertex List) from host data to device data, 
    // since vertices are dynamically changed.
    // While home vertices and connection list are unchanged.
    CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) );
    // The Previous Vertex List can be saved in CUDA memory.
    // Hence, the Previous Vertex List does not have to be updated.
    //CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) );

    CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetSimFlagsList(), host_sim_flags_data, sizeof(unsigned int)*numOfVertices, cudaMemcpyHostToDevice ) );
    CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetPosConstraintList(), host_pos_constraint_data, size, cudaMemcpyHostToDevice ) );

    // Kernel invocation
    unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
    dim3 Dg( numOfThreadBlocks, 1, 1 );
    dim3 Db( numOfThreads, 1, 1 );
    size_t Ns = 0;
    cudaStream_t S = 0;

    // Bind Textures
    DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices );
    DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices );
    DATA_GlobalPool.DataForVertexList[cudaID]->BindSimFlagsList( numOfVertices );
    DATA_GlobalPool.DataForVertexList[cudaID]->BindPosConstraintList( numOfVertices );

    //*
    // Add 10 for TAPs_ADVANCED_SIMULATION
    int CHOICE = 100 + 1;

    // MASS SPRING SYSTEM
    if ( CHOICE == 101 ) {
        float subTimeStep = timeStep / numOfSubSteps;
        for ( int i = 0; i < numOfSubSteps; ++i ) {
            //printf( "delta time: %g\n", timeStep/numOfSubSteps );

            // Call the CUDA kernel (Choice 100+1 -- Mass Spring System)
            // with TAPs_ADVANCED_SIMULATION
            Global__ModelStrand_AdvSim_CU<101><<< Dg, Db, Ns, S >>>( 
                numOfVertices, numOfThreads, 
                currentTime, subTimeStep, 
                ptMass, Ks, Kd, Lrest, 
                out_data 
            );
            // Swap pointers for Vertex List and Previous Vertex List
            // So that the Previous Vertex List is remain unchanged in the CUDA memory.
            DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList();

            // Copy data to the device's memory for (current) Vertex List
            CUDA_SAFE_CALL( cudaMemcpy( DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) );
            currentTime += subTimeStep;
        }

        // Call the CUDA kernel to enforce constraints
        // with TAPs_ADVANCED_SIMULATION
        Global__ModelStrand_AdvSim_Enforce_Constraint_CU<101><<< Dg, Db, Ns, S >>>( 
            numOfVertices, numOfThreads, 
            currentTime, subTimeStep, 
            ptMass, Ks, Kd, Lrest, 
            out_data_SimFlagsList, 
            out_data_PosConstraintList 
        );
    }
    //*/

    // Unbind Textures
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList();
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList();
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindSimFlagsList();
    DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPosConstraintList();

    // Check if kernel execution generated and error
    CUT_CHECK_ERROR( "Kernel execution failed!" );

    // Copy output data to host data Previous Vertex List
    // The host will have to swap its pointers (for current and previous vertex list)
    if ( CHOICE > 0 ) {
        CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) );
        CUDA_SAFE_CALL( cudaMemcpy( host_sim_flags_data, out_data_SimFlagsList, size_SimFlagsList, cudaMemcpyDeviceToHost ) );
        CUDA_SAFE_CALL( cudaMemcpy( host_pos_constraint_data, out_data_PosConstraintList, size, cudaMemcpyDeviceToHost ) );
    }

    /*
    // DEBUG
    {
        float * data = (float *)malloc( size );
        CUDA_SAFE_CALL( cudaMemcpy( data, out_data, size, cudaMemcpyDeviceToHost ) );
        for ( int i = 0, n = 0; i < 2; ++i, n+=4 ) {
            printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] );
        }
        for ( int i = numOfVertices-2, n = i*4; i < numOfVertices; ++i, n+=4 ) {
            printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] );
        }
        free( data );
    }
    */

    DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, NULL, NULL, host_sim_flags_data, host_pos_constraint_data );

    // Free device memory
    CUDA_SAFE_CALL( cudaFree( out_data ) );
    CUDA_SAFE_CALL( cudaFree( out_data_SimFlagsList ) );
    CUDA_SAFE_CALL( cudaFree( out_data_PosConstraintList ) );
}

Here is the call graph for this function:

template<int COMPUTATION_CHOICE>
__global__ void Global__ModelStrand_AdvSim_CU ( unsigned int  numOfVertices,
unsigned int  numOfThreads,
float  currentTime,
float  timeStep,
float  ptMass,
float  Ks,
float  Kd,
float  Lrest,
float *  out_data 
)

< spring stiffness

< spring damper

< spring rest length

< vertex number

< number of vertices

< acceleration

< current position

< previous position

< square of delta time

< damping value

< spring stiffness

< spring damper

< spring rest length

< vertex number

< number of vertices

< acceleration

< current position

< previous position

< square of delta time

< damping value

Parameters:
numOfVerticesnumber of vertices
numOfThreadsnumber of threads
currentTimecurrent time
timeSteptime step
ptMasspoint mass
Ksspring stiffness
Kdspring damper
Lrestspring rest length
out_dataoutput data

Definition at line 336 of file TAPsCUDA_VertexListMSM_Def.cu.

References CudaTexPosConstraintList, CudaTexPrevVertexList, CudaTexSimFlagsList, CudaTexVertexList, Device__CalSpringForce_ModelStrand(), Device__EnforceConstraints_ADVSIM(), and Device__VerletIntegration().

{
    switch ( COMPUTATION_CHOICE ) {

        //-------------------------------------------------
        // Emulate Viscoelastic by mass spring connections
        // with TAPs_ADVANCED_SIMULATION
        case 101:
        {
            // Block index
            int bx = blockIdx.x;

            // Thread index
            int tx = threadIdx.x;

            // Vertex number
            int vertexNo = (numOfThreads * bx + tx);
            if ( vertexNo >= numOfVertices )    return;

            // Index for output
            int idx = (numOfThreads * bx + tx) * 4;

            // Fetch data from texture linear memory
            float4 current_pos  = tex1Dfetch( CudaTexVertexList, vertexNo );

            // If the suture vertex is not fixed
            if ( current_pos.w == 1 ) {
                float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo );
                float3 force = Device__CalSpringForce_ModelStrand( 
                    Ks,             
                    Kd,             
                    Lrest,          
                    vertexNo,       
                    numOfVertices   
                );
                // Convert force to acceleration
                float invMass = 1.0f / ptMass;
                float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f );
                float dt_sqrt = timeStep*timeStep;
                float3 new_pos = Device__VerletIntegration( 
                    acceleration,   
                    current_pos,    
                    previous_pos,   
                    dt_sqrt,        
                    Kd              
                );
                
                // Enforce constraints with TAPs_ADVANCED_SIMULATION
                uint1 simFlags = tex1Dfetch( CudaTexSimFlagsList, vertexNo );
                if ( simFlags.x > 0 ) {
                    float4 constraint_pos = tex1Dfetch( CudaTexPosConstraintList, vertexNo );
                    new_pos = Device__EnforceConstraints_ADVSIM( new_pos, constraint_pos );

                    //new_pos.x = constraint_pos.x;
                    //new_pos.y = constraint_pos.y;
                    //new_pos.z = constraint_pos.z;
                }

                //  WARNING: out data have to be set at the end, otherwise it won't work!!! ???
                out_data[idx  ] = new_pos.x;
                out_data[idx+1] = new_pos.y;
                out_data[idx+2] = new_pos.z;
                out_data[idx+3] = current_pos.w;
            }
            // If the suture vertex is fixed
            else {
                out_data[idx  ] = current_pos.x;
                out_data[idx+1] = current_pos.y;
                out_data[idx+2] = current_pos.z;
                out_data[idx+3] = current_pos.w;
            }
        }
        // Synchronize to make sure the data are loaded
        //__syncthreads();
        break;

        //-------------------------------------------------
        // Emulate Viscoelastic by mass spring connections
        case 1:
        {
            // Block index
            int bx = blockIdx.x;
            // Thread index
            int tx = threadIdx.x;
            // Vertex number
            int vertexNo = (numOfThreads * bx + tx);
            if ( vertexNo >= numOfVertices )    return;
            // Index for output
            int idx = (numOfThreads * bx + tx) * 4;

            // Fetch data from texture linear memory
            float4 current_pos  = tex1Dfetch( CudaTexVertexList, vertexNo );
            if ( current_pos.w == 1 ) {
                float4 previous_pos = tex1Dfetch( CudaTexPrevVertexList, vertexNo );
                float3 force = Device__CalSpringForce_ModelStrand( 
                    Ks,             
                    Kd,             
                    Lrest,          
                    vertexNo,       
                    numOfVertices   
                );
                // Convert force to acceleration
                float invMass = 1.0f / ptMass;
                float4 acceleration = make_float4( force.x*invMass, force.y*invMass, force.z*invMass, 0.0f );
                float dt_sqrt = timeStep*timeStep;
                // Calculate the new position
                float3 new_pos = Device__VerletIntegration( 
                    acceleration,   
                    current_pos,    
                    previous_pos,   
                    dt_sqrt,        
                    Kd              
                );

                //  WARNING: out data have to be set at the end, otherwise it won't work!!! ???
                out_data[idx  ] = new_pos.x;
                out_data[idx+1] = new_pos.y;
                out_data[idx+2] = new_pos.z;
                out_data[idx+3] = current_pos.w;
            }
            else {
                out_data[idx  ] = current_pos.x;
                out_data[idx+1] = current_pos.y;
                out_data[idx+2] = current_pos.z;
                out_data[idx+3] = current_pos.w;
            }
        }
        // Synchronize to make sure the data are loaded
        //__syncthreads();
        break;
    }
}

Here is the call graph for this function:

template<int COMPUTATION_CHOICE>
__global__ void Global__ModelStrand_AdvSim_Enforce_Constraint_CU ( unsigned int  numOfVertices,
unsigned int  numOfThreads,
float  currentTime,
float  timeStep,
float  ptMass,
float  Ks,
float  Kd,
float  Lrest,
unsigned int *  out_data_SimFlagsList,
float *  out_data_PosConstraintList 
)
Parameters:
numOfVerticesnumber of vertices
numOfThreadsnumber of threads
currentTimecurrent time
timeSteptime step
ptMasspoint mass
Ksspring stiffness
Kdspring damper
Lrestspring rest length
out_data_SimFlagsListoutput data for simulation flags
out_data_PosConstraintListoutput data for position constaint

Definition at line 483 of file TAPsCUDA_VertexListMSM_Def.cu.

References CudaTexPosConstraintList, CudaTexSimFlagsList, CudaTexVertexList, TAPs_Const_Suture_ShiftDown, and TAPs_Const_Suture_ShiftUp.

{
    switch ( COMPUTATION_CHOICE ) {

        //-------------------------------------------------
        // Strand's vertex is slidable
        // with TAPs_ADVANCED_SIMULATION
        case 101:
        {
            // Block index
            int bx = blockIdx.x;
            // Thread index
            int tx = threadIdx.x;
            // Vertex number
            int vertexNo = (numOfThreads * bx + tx);
            if ( vertexNo >= numOfVertices )    return;
            // Index for output
            int idx = vertexNo * 4;

            // Fetch data from texture linear memories
            float4 pos_j  = tex1Dfetch( CudaTexVertexList, vertexNo );
            uint1 simFlags  = tex1Dfetch( CudaTexSimFlagsList, vertexNo );
            float4 posConstraint = tex1Dfetch( CudaTexPosConstraintList, vertexNo );

            bool bDefault = false;

            // If the suture vertex is not fixed
            if ( pos_j.w == 1.0f ) {

                // Values of simulation flags (declared in TAPsEnumList.hpp):
                //   SIMULATION CONSTRAINTS
                //   enum SimConstraints {
                //      CLEARED     = 0,
                //      FIXED       = 1,
                //      ATTACHED    = 1 << 1,
                //      PUNCTURED   = 1 << 2,
                //      SLIDABLE    = 1 << 3,
                //      DUMMY   
                //  };

                // CLEARED
                if ( simFlags.x == 0 ) {
                    bDefault = true;
                }
                // FIXED
                else if ( simFlags.x == 1 ) {
                    bDefault = true;
                }
                // ATTACHED
                else if ( simFlags.x == 2 ) {
                    bDefault = true;
                }
                // PUNCTURED (n/a)
                //else if ( simFlags.x == 4 ) {
                //}
                // SLIDABLE
                else if ( simFlags.x == 8 ) {
                //else if ( false ) {
                    if ( 0 < vertexNo && vertexNo < numOfVertices-2 ) {
                        int i = vertexNo-1;
                        int k = vertexNo+1;
                        float4 pos_i  = tex1Dfetch( CudaTexVertexList, i );
                        float4 pos_k  = tex1Dfetch( CudaTexVertexList, k );
                        float3 linkA = make_float3( pos_i.x - pos_j.x, pos_i.y - pos_j.y, pos_i.z - pos_j.z );
                        float3 linkB = make_float3( pos_k.x - pos_j.x, pos_k.y - pos_j.y, pos_k.z - pos_j.z );
                        float linkAlen = sqrt( linkA.x*linkA.x + linkA.y*linkA.y + linkA.z*linkA.z );
                        float linkBlen = sqrt( linkB.x*linkB.x + linkB.y*linkB.y + linkB.z*linkB.z );

                        float threshold = 1.025f;
                        //float threshold = 2.0f;

                        // Shift up
                        if ( linkAlen > linkBlen*threshold ) {
                            //uint1 simFlags_up  = tex1Dfetch( CudaTexSimFlagsList, vertexNo+1 );
                            // if simFlags of the right vertex is cleared
                            //if ( simFlags_up.x == 0 ) {
                                out_data_SimFlagsList[vertexNo] = simFlags.x;
                                out_data_PosConstraintList[idx  ] = posConstraint.x;
                                out_data_PosConstraintList[idx+1] = posConstraint.y;
                                out_data_PosConstraintList[idx+2] = posConstraint.z;
                                out_data_PosConstraintList[idx+3] = posConstraint.w + TAPs_Const_Suture_ShiftUp;    // +20 to signify that the strand vertex must be shifted up
                            //}
                            //else {
                            //  bDefault = true;
                            //}
                        }
                        // Shift down
                        else if ( linkBlen > linkAlen*threshold ) {
                            //uint1 simFlags_down  = tex1Dfetch( CudaTexSimFlagsList, vertexNo-1 );
                            // if simFlags of the left vertex is cleared
                            //if ( simFlags_down.x == 0 ) {
                                out_data_SimFlagsList[vertexNo] = simFlags.x;
                                out_data_PosConstraintList[idx  ] = posConstraint.x;
                                out_data_PosConstraintList[idx+1] = posConstraint.y;
                                out_data_PosConstraintList[idx+2] = posConstraint.z;
                                out_data_PosConstraintList[idx+3] = posConstraint.w + TAPs_Const_Suture_ShiftDown;  // +10 to signify that the strand vertex must be shifted down
                            //}
                            //else {
                            //  bDefault = true;
                            //}
                        }
                        else {
                            bDefault = true;
                        }
                    }
                    else {
                        bDefault = true;
                    }
                }
                // DEFAULT CASE
                else {
                    bDefault = true;
                }
            }

            // If the suture vertex is fixed
            else {
                bDefault = true;
            }

            if ( bDefault ) {
                out_data_SimFlagsList[vertexNo] = simFlags.x;
                out_data_PosConstraintList[idx  ] = posConstraint.x;
                out_data_PosConstraintList[idx+1] = posConstraint.y;
                out_data_PosConstraintList[idx+2] = posConstraint.z;
                out_data_PosConstraintList[idx+3] = posConstraint.w;
            }
        }
        // Synchronize to make sure the data are loaded
        //__syncthreads();
        break;
    }
}
void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim ( int  numOfVertices,
int  numOfThreads,
float  tCurrent,
float  tNext,
float *  host_data 
)

For Suture Model (ModelStrand & ModelSuture)

CUDA Wrapper Function -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function.

For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function CUDA Wrapper Function -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function

Parameters:
numOfVerticesnumber of vertices
numOfThreadsnumber of threads per CUDA thread block
currentTimecurrent time
timeSteptime step
host_datahost data

Definition at line 939 of file TAPsCUDA_VertexListMSM_Def.cu.

Referenced by SimPropForMultiPartMeshModel_HalfEdge< T >::AdvSim().

{
    /*
    // CUDA device properties
    int dev_no;
    CUDA_SAFE_CALL( cudaGetDevice( &dev_no ) );
    cudaDeviceProp * prop;
    CUDA_SAFE_CALL( cudaGetDeviceProperties( prop, dev_no ) );
    //printf( "maxThreadsPerBlock: %d\n", prop->maxThreadsPerBlock );
    */

    // Kernel invocation
    unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads;
    dim3 Dg( numOfThreadBlocks, 1, 1 );
    dim3 Db( numOfThreads, 1, 1 );
    size_t Ns = 0;
    cudaStream_t S = 0;

    printf( "Number of vertices: %d\n", numOfVertices );
    printf( "Number of threads: %d\n", numOfThreads );
    printf( "Number of grids: %d\n", numOfVertices/numOfThreads );

    // Allocate device memory
    unsigned int size = sizeof(float) * numOfVertices * 3;
    float * device_data;
    CUDA_SAFE_CALL( cudaMalloc( (void **)&device_data, size ) );
    float * out_data;
    CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) );

    // Copy host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( device_data, host_data, size, cudaMemcpyHostToDevice ) );

    // Call the CUDA kernel
    Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU<1><<< Dg, Db, Ns, S >>>
    ( numOfVertices, numOfThreads, currentTime, timeStep, device_data, out_data );

    // Check if kernel execution generated and error
    CUT_CHECK_ERROR( "Kernel execution failed" );

    // Copy output data to host data
    CUDA_SAFE_CALL( cudaMemcpy( host_data, out_data, size, cudaMemcpyDeviceToHost ) );


    // Free device memory
    CUDA_SAFE_CALL( cudaFree( device_data ) );
    CUDA_SAFE_CALL( cudaFree( out_data ) );
}

Here is the caller graph for this function:

template<int COMPUTATION_CHOICE>
__global__ void Global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU ( int  numOfVertices,
int  numOfThreads,
float  currentTime,
float  timeStep,
float *  device_data,
float *  out_data 
)
Parameters:
numOfVerticesnumber of vertices
numOfThreadsnumber of threads per CUDA thread block
currentTimecurrent time
timeSteptime step
device_datadevice data
out_dataoutput data

Definition at line 879 of file TAPsCUDA_VertexListMSM_Def.cu.

{
    // Block index
    int bx = blockIdx.x;

    // Thread index
    int tx = threadIdx.x;

    // Index
    int idx = (numOfThreads * bx + tx) * 3;

    // Use device shared memory to load the vertex position from device global memory
    /*__shared__*/ float vertex_pos[3];
    //vertex_pos = device_data[idx];
    vertex_pos[0] = device_data[idx  ];
    vertex_pos[1] = device_data[idx+1];
    vertex_pos[2] = device_data[idx+2];

    // Synchronize to make sure the data are loaded
    __syncthreads();

    //out_data[idx  ] = vertex_pos[idx  ];
    //out_data[idx+1] = vertex_pos[idx+1];
    //out_data[idx+2] = vertex_pos[idx+2];
    //out_data[idx  ] = 0.0f;
    //out_data[idx+1] = 0.0f;
    //out_data[idx+2] = 0.0f;

    float new_pos[3];
    //new_pos = vertex_pos + 0.0001f;
    new_pos[0] = vertex_pos[0] + 0.0001f;
    new_pos[1] = vertex_pos[1];
    new_pos[2] = vertex_pos[2];

    //__syncthreads();

    //out_data[idx] = new_pos;
    out_data[idx  ] = new_pos[0];
    out_data[idx+1] = new_pos[1];
    out_data[idx+2] = new_pos[2];

    //out_data[idx  ] = 0.0f;
    //out_data[idx+1] = 0.0f;
    //out_data[idx+2] = 0.0f;

    //device_data[idx  ] = device_data[idx  ] + 0.0001f;
    //device_data[idx+1] = device_data[idx+1];
    //device_data[idx+2] = device_data[idx+2];

}
bool InitailizeDataForSutureModel ( unsigned int &  cudaID,
unsigned int  numOfVertices,
float *  vertexList,
float *  prevVertexList 
)

Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture)

Parameters:
cudaIDCUDA ID for the object
numOfVerticesnumber of vertices
vertexListlist of xyzw vertices
prevVertexListlist of previous xyzw vertices

Definition at line 60 of file TAPsCUDA_VertexListMSM_Def.cu.

References AddToGlobal_Pool_Of_DATA_Vertex_List(), DATA_GlobalPool, DATA_Vertex_List::GetPrevVertexList(), DATA_Vertex_List::GetVertexList(), and DATA_Pool::SizeOfGlobal_Pool.

{
    // Here Suture Model uses Vertex List Data
    // However, it has implicit connection (each vertex connect to the previous and next vertices).
    // So the connection list is set to zero.

    // Home vertex is unused for now.  Plan is to use home vertex for sticking suture to a surface.

    // Assign CUDA ID to the object
    // The object has to use cudaID to communicate with the CUDA.
    cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;

    // Allocate CUDA data for the object.
    DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, false, 0, true );
    AddToGlobal_Pool_Of_DATA_Vertex_List( newData );

    // Size of memory for (xyzw) vertices
    int size = numOfVertices * 4 * sizeof(float);

    // Copy Vertex List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
    // Copy Previous Vertex List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
    // Copy Home Vertex List from host data to device data
    //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
    // Copy Vertex Connection List from host data to device data

    return true;
}

Here is the call graph for this function:

bool InitailizeDataForSutureModel_ADVSIM ( unsigned int &  cudaID,
unsigned int  numOfVertices,
float *  vertexList,
float *  prevVertexList,
unsigned int *  simFlagsList,
float *  posConstraintList 
)

Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture) with TAPs_ADVANCED_SIMULATION

Parameters:
cudaIDCUDA ID for the object
numOfVerticesnumber of vertices
vertexListlist of xyzw vertices
prevVertexListlist of previous xyzw vertices
simFlagsListlist of simulation flags
posConstraintListlist of position constraints (xyz) plus force ratio (w)

Definition at line 99 of file TAPsCUDA_VertexListMSM_Def.cu.

References AddToGlobal_Pool_Of_DATA_Vertex_List(), DATA_GlobalPool, DATA_Vertex_List::GetPosConstraintList(), DATA_Vertex_List::GetPrevVertexList(), DATA_Vertex_List::GetSimFlagsList(), DATA_Vertex_List::GetVertexList(), and DATA_Pool::SizeOfGlobal_Pool.

{
    // Here Suture Model uses Vertex List Data
    // However, it has implicit connection (each vertex connect to the previous and next vertices).
    // So the connection list is set to zero.

    // Home vertex is unused for now.  Plan is to use home vertex for sticking suture to a surface.

    // Assign CUDA ID to the object
    // The object has to use cudaID to communicate with the CUDA.
    cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;

    // Allocate CUDA data for the object.
    DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, false, 0, true );
    AddToGlobal_Pool_Of_DATA_Vertex_List( newData );

    // Size of memory for (xyzw) vertices
    int size = numOfVertices * 4 * sizeof(float);

    // Copy Vertex List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
    // Copy Previous Vertex List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
    // Copy Home Vertex List from host data to device data
    //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
    // Copy Vertex Connection List from host data to device data
    //CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) );
    // Copy Simulation Flags List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetSimFlagsList(), simFlagsList, numOfVertices*sizeof(unsigned int), cudaMemcpyHostToDevice ) );
    // Copy Position Constraint List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetPosConstraintList(), posConstraintList, size, cudaMemcpyHostToDevice ) );

    return true;
}

Here is the call graph for this function:

BEGIN_NAMESPACE_TAPs__CUDA bool InitailizeDataForVertexList ( unsigned int &  cudaID,
unsigned int  numOfVertices,
unsigned int  max_connection_size,
float *  vertexList,
float *  prevVertexList,
float *  homeVertexList,
int *  vertexConnectionList 
)

For HETriMeshOneModelMultiParts AdvSim Function.

Initialize CUDA Data for a Mesh Model by Using Vertex List.

Initialize CUDA Data for a Mesh Model by Using Vertex List

Parameters:
cudaIDCUDA ID for the object
numOfVerticesnumber of vertices
max_connection_sizemaximum connection size
vertexListlist of xyz vertices
prevVertexListlist of previous xyzw vertices
homeVertexListlist of xyz home vertices
vertexConnectionListvertex connection (1D array of 2D array -- vertex id and vertex connections)

Definition at line 21 of file TAPsCUDA_VertexListMSM_Def.cu.

References AddToGlobal_Pool_Of_DATA_Vertex_List(), DATA_GlobalPool, DATA_Vertex_List::GetHomeVertexList(), DATA_Vertex_List::GetPrevVertexList(), DATA_Vertex_List::GetVertexConnectionList(), DATA_Vertex_List::GetVertexList(), and DATA_Pool::SizeOfGlobal_Pool.

Referenced by HETriMeshOneModelMultiParts< T >::CUDA_Initialize_All().

{
    //printf( "InitailizeDataForVertexList\n" );
    //fflush( stdout );

    // Assign CUDA ID to the object
    // The object has to use cudaID to communicate with the CUDA.
    cudaID = DATA_GlobalPool.SizeOfGlobal_Pool;

    // Allocate CUDA data for the object.
    DATA_Vertex_List * newData = new DATA_Vertex_List( numOfVertices, true, max_connection_size, false );
    AddToGlobal_Pool_Of_DATA_Vertex_List( newData );

    // Size of memory for (xyzw) vertices
    int size = numOfVertices * 4 * sizeof(float);

    // Copy Vertex List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) );
    // Copy Previous Vertex List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) );
    // Copy Home Vertex List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) );
    // Copy Vertex Connection List from host data to device data
    CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) );

    return true;
}

Here is the call graph for this function:

Here is the caller graph for this function:

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines