#include "../Support/TAPsListOfAdvSimConsts.hpp"#include <cstdlib>#include <cstdio>#include <cutil.h>#include "TAPsCUDA_Def.cu"

Go to the source code of this file.
Classes | |
| class | TAPs_CUDA_CLASS_DATA_Pool |
| class | TAPs_CUDA_CLASS_DATA_Vertex_List |
Defines | |
| #define | TAPs_CUDA_DATA_VERTEX_LIST_TEXTURE_FROM_LIN_MEM |
Functions | |
| bool | AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List (TAPs_CUDA_CLASS_DATA_Vertex_List *dataObj) |
| Add Vertex List Data Object to the CUDA Global Data Pool. | |
| template<typename T> | |
| __device__ void | TAPsCUDA__device__F1 () |
| template<typename T> | |
| __global__ void | TAPsCUDA__global__F1 () |
| void | TAPsCUDA__global__HETriMeshOneModelMultiParts_AdvSim (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float HKs, float HKd, float *host_vertex_data, float *host_prev_vertex_data, float *host_home_vertex_data, int *host_connection_list, unsigned int max_connection_size) |
| For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function. | |
| void | TAPsCUDA__global__ModelStrand_AdvSim (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float Lrest, float *host_vertex_data, float *host_prev_vertex_data) |
| For Suture Model (ModelStrand & ModelSuture). | |
| void | TAPsCUDA__global__ModelStrand_AdvSim_ADVSIM (unsigned int cudaID, unsigned int numOfVertices, unsigned int numOfThreads, float currentTime, float timeStep, int numOfSubSteps, float ptMass, float Ks, float Kd, float Lrest, float *host_vertex_data, float *host_prev_vertex_data, unsigned int *host_sim_flags_data, float *host_pos_constraint_data) |
| void | TAPsCUDA__global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim (int numOfVertices, int numOfThreads, float tCurrent, float tNext, float *host_data) |
| For Suture Model (ModelStrand & ModelSuture). | |
| template<typename T> | |
| __host__ __device__ void | TAPsCUDA__host__device__F1 () |
| void | TAPsCUDA__host__Init (int argc, char **argv) |
| Initialize CUDA Device. | |
| bool | TAPsCUDA__InitailizeDataForSutureModel (unsigned int &cudaID, unsigned int numOfVertices, float *vertexList, float *prevVertexList) |
| Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture). | |
| bool | TAPsCUDA__InitailizeDataForSutureModel_ADVSIM (unsigned int &cudaID, unsigned int numOfVertices, float *vertexList, float *prevVertexList, unsigned int *simFlagsList, float *posConstraintList) |
| Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture). | |
| bool | TAPsCUDA__InitailizeDataForVertexList (unsigned int &cudaID, unsigned int numOfVertices, unsigned int max_connection_size, float *vertexList, float *prevVertexList, float *homeVertexList, int *vertexConnectionList) |
| Initialize CUDA Data for a Mesh Model by Using Vertex List. | |
Variables | |
| texture< float4, 1, cudaReadModeElementType > | CudaTexHomeVertexList |
| texture for list of home vertices | |
| texture< float4, 1, cudaReadModeElementType > | CudaTexPosConstraintList |
| texture for list of position constraints (xyz) plus force ratio (w) | |
| texture< float4, 1, cudaReadModeElementType > | CudaTexPrevVertexList |
| texture for list of previous vertices | |
| texture< uint1, 1, cudaReadModeElementType > | CudaTexSimFlagsList |
| texture for list of simulation flags | |
| texture< int1, 1, cudaReadModeElementType > | CudaTexVertexConnectionList |
| texture for list of vertex connections (x:=vertex#, y:=rest length, z:=stiffness, w:=damper) | |
| texture< float4, 1, cudaReadModeElementType > | CudaTexVertexList |
| For HETriMeshOneModelMultiParts AdvSim Function. | |
| TAPs_CUDA_CLASS_DATA_Pool | TAPs_CUDA_DATA_GlobalPool |
| CUDA Global Data Pool. | |
| #define TAPs_CUDA_DATA_VERTEX_LIST_TEXTURE_FROM_LIN_MEM |
Definition at line 243 of file TAPsCUDA.cu.
| bool AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List | ( | TAPs_CUDA_CLASS_DATA_Vertex_List * | dataObj | ) |
Add Vertex List Data Object to the CUDA Global Data Pool.
Definition at line 150 of file TAPsCUDA_Def.cu.
00151 { 00152 //printf( "AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List\n" ); 00153 //fflush( stdout ); 00154 00155 unsigned int current_size = TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool; 00156 if ( current_size == 0 ) { 00157 // SHOULD NEVER REACHED THIS!!! 00158 //TAPs_CUDA_DATA_GlobalPool.DataForVertexList = (TAPs_CUDA_CLASS_DATA_Vertex_List *)malloc( ++TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool ); 00159 TAPs_CUDA_DATA_GlobalPool.DataForVertexList = new TAPs_CUDA_CLASS_DATA_Vertex_List * [ ++TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool ]; 00160 } 00161 else { 00162 TAPs_CUDA_CLASS_DATA_Vertex_List ** old_list = TAPs_CUDA_DATA_GlobalPool.DataForVertexList; 00163 //TAPs_CUDA_DATA_GlobalPool.DataForVertexList = (TAPs_CUDA_CLASS_DATA_Vertex_List *)malloc( ++TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool ); 00164 TAPs_CUDA_DATA_GlobalPool.DataForVertexList = new TAPs_CUDA_CLASS_DATA_Vertex_List * [ ++TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool ]; 00165 for ( int i = 0; i < current_size; ++i ) { 00166 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[ i ] = old_list[ i ]; 00167 } 00168 //free ( old_list ); 00169 delete [] old_list; 00170 } 00171 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[ current_size ] = dataObj; 00172 return true; 00173 }
| __device__ void TAPsCUDA__device__F1 | ( | ) | [inline] |
__device__F1 __device__ means a function that is executed on the device and callable from the device only.
Definition at line 116 of file TAPsCUDA.cu.
| __global__ void TAPsCUDA__global__F1 | ( | ) | [inline] |
__global__F1 __global__ means a function as being a kernel, which is executed on the device and callable from the host only.
Definition at line 110 of file TAPsCUDA.cu.
| void TAPsCUDA__global__HETriMeshOneModelMultiParts_AdvSim | ( | unsigned int | cudaID, | |
| unsigned int | numOfVertices, | |||
| unsigned int | numOfThreads, | |||
| float | currentTime, | |||
| float | timeStep, | |||
| int | numOfSubSteps, | |||
| float | ptMass, | |||
| float | Ks, | |||
| float | Kd, | |||
| float | HKs, | |||
| float | HKd, | |||
| float * | host_vertex_data, | |||
| float * | host_prev_vertex_data, | |||
| float * | host_home_vertex_data, | |||
| int * | host_connection_list, | |||
| unsigned int | max_connection_size | |||
| ) |
For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function.
For HETriMeshOneModelMultiParts AdvSim Function CUDA Wrapper Function -- For HETriMeshOneModelMultiParts AdvSim Function
| cudaID | CUDA ID |
| numOfVertices | number of vertices |
| numOfThreads | number of threads per CUDA thread block |
| currentTime | current time |
| timeStep | time step |
| numOfSubSteps | number of sub-steps |
| ptMass | point mass |
| Ks | spring stiffness |
| Kd | spring damper |
| HKs | home spring stiffness |
| HKd | home spring damper |
| host_vertex_data | host's vertex data |
| host_prev_vertex_data | host's previous vertex data |
| host_home_vertex_data | host's home vertex data |
| host_connection_list | host's connection index list |
| max_connection_size | maximum connection size |
Definition at line 1217 of file TAPsCUDA_Def.cu.
01235 { 01236 //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep ); 01237 //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd ); 01238 01239 //printf( "Number of vertices: %d\n", numOfVertices ); 01240 //printf( "Number of threads: %d\n", numOfThreads ); 01241 //printf( "Number of grids: %d\n", numOfVertices/numOfThreads ); 01242 01243 // Allocate device memory for out data 01244 unsigned int size = sizeof(float) * numOfVertices * 4; 01245 float * out_data; 01246 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) ); 01247 01248 // Copy Vertex List (and Previous Vertex List) from host data to device data, 01249 // since vertices are dynamically changed. 01250 // While home vertices and connection list are unchanged. 01251 CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) ); 01252 // The Previous Vertex List can be saved in CUDA memory. 01253 // Hence, the Previous Vertex List does not have to be updated. 01254 //CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) ); 01255 01256 // Kernel invocation 01257 unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads; 01258 dim3 Dg( numOfThreadBlocks, 1, 1 ); 01259 dim3 Db( numOfThreads, 1, 1 ); 01260 size_t Ns = 0; 01261 cudaStream_t S = 0; 01262 01263 // Bind Textures 01264 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices ); 01265 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices ); 01266 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices ); 01267 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexConnectionList( numOfVertices, max_connection_size ); 01268 01269 int CHOICE = 2; 01270 01271 // HOME VERTEX 01272 if ( CHOICE == 1 ) { 01273 // Call the CUDA kernel (Choice 1 -- Home Vertex Position Only) 01274 TAPsCUDA__global__HETriMeshOneModelMultiParts_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 01275 numOfVertices, numOfThreads, 01276 currentTime, timeStep, 01277 ptMass, Ks, Kd, HKs, HKd, 01278 max_connection_size, 01279 out_data 01280 ); 01281 // Swap pointers for Vertex List and Previous Vertex List 01282 // So that the Previous Vertex List is remain unchanged in the CUDA memory. 01283 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList(); 01284 } 01285 01286 // MASS SPRING SYSTEM 01287 else if ( CHOICE == 2 ) { 01288 float subTimeStep = timeStep / numOfSubSteps; 01289 for ( int i = 0; i < numOfSubSteps; ++i ) { 01290 //printf( "delta time: %g\n", timeStep/numOfSubSteps ); 01291 01292 // Call the CUDA kernel (Choice 2 -- Mass Spring System) 01293 TAPsCUDA__global__HETriMeshOneModelMultiParts_AdvSim_CU<2><<< Dg, Db, Ns, S >>>( 01294 numOfVertices, numOfThreads, 01295 currentTime, subTimeStep, 01296 ptMass, Ks, Kd, HKs, HKd, 01297 max_connection_size, 01298 out_data 01299 ); 01300 // Swap pointers for Vertex List and Previous Vertex List 01301 // So that the Previous Vertex List is remain unchanged in the CUDA memory. 01302 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList(); 01303 01304 // Copy data to the device's memory for (current) Vertex List 01305 CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) ); 01306 currentTime += subTimeStep; 01307 } 01308 } 01309 01310 // Unbind Textures 01311 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList(); 01312 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList(); 01313 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList(); 01314 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexConnectionList(); 01315 01316 // Check if kernel execution generated and error 01317 CUT_CHECK_ERROR( "Kernel execution failed!" ); 01318 01319 // Copy output data to host data Previous Vertex List 01320 // The host will have to swap its pointers (for current and previous vertex list) 01321 if ( CHOICE > 0 ) { 01322 CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) ); 01323 } 01324 01325 //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, host_prev_vertex_data, host_home_vertex_data ); 01326 //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug_ConnectionList( numOfVertices, max_connection_size, host_connection_list ); 01327 01328 // Free device memory 01329 CUDA_SAFE_CALL( cudaFree( out_data ) ); 01330 }
| void TAPsCUDA__global__ModelStrand_AdvSim | ( | unsigned int | cudaID, | |
| unsigned int | numOfVertices, | |||
| unsigned int | numOfThreads, | |||
| float | currentTime, | |||
| float | timeStep, | |||
| int | numOfSubSteps, | |||
| float | ptMass, | |||
| float | Ks, | |||
| float | Kd, | |||
| float | Lrest, | |||
| float * | host_vertex_data, | |||
| float * | host_prev_vertex_data | |||
| ) |
For Suture Model (ModelStrand & ModelSuture).
CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function
| cudaID | CUDA ID |
| numOfVertices | number of vertices |
| numOfThreads | number of threads per CUDA thread block |
| currentTime | current time |
| timeStep | time step |
| numOfSubSteps | number of sub-steps |
| ptMass | point mass |
| Ks | spring stiffness |
| Kd | spring damper |
| Lrest | spring rest length |
| host_vertex_data | host's vertex data |
| host_prev_vertex_data | host's previous vertex data |
Definition at line 652 of file TAPsCUDA_Def.cu.
00667 { 00668 //printf( "FILE:%s LINE:%d TAPsCUDA__global__ModelStrand_AdvSim\n", __FILE__, __LINE__ ); 00669 //printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep ); 00670 //printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd ); 00671 //printf( "Number of vertices: %d\n", numOfVertices ); 00672 //printf( "Number of threads: %d\n", numOfThreads ); 00673 //printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads ); 00674 00675 // Allocate device memory for out data 00676 unsigned int size = sizeof(float) * numOfVertices * 4; 00677 float * out_data; 00678 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) ); 00679 00680 // Copy Vertex List (and Previous Vertex List) from host data to device data, 00681 // since vertices are dynamically changed. 00682 // While home vertices and connection list are unchanged. 00683 CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) ); 00684 // The Previous Vertex List can be saved in CUDA memory. 00685 // Hence, the Previous Vertex List does not have to be updated. 00686 //CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) ); 00687 00688 // Kernel invocation 00689 unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads; 00690 dim3 Dg( numOfThreadBlocks, 1, 1 ); 00691 dim3 Db( numOfThreads, 1, 1 ); 00692 size_t Ns = 0; 00693 cudaStream_t S = 0; 00694 00695 // Bind Textures 00696 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices ); 00697 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices ); 00698 //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindHomeVertexList( numOfVertices ); 00699 00700 //* 00701 int CHOICE = 1; 00702 00703 // MASS SPRING SYSTEM 00704 if ( CHOICE == 1 ) { 00705 float subTimeStep = timeStep / numOfSubSteps; 00706 for ( int i = 0; i < numOfSubSteps; ++i ) { 00707 // Call the CUDA kernel (Choice 1 -- Mass Spring System) 00708 TAPsCUDA__global__ModelStrand_AdvSim_CU<1><<< Dg, Db, Ns, S >>>( 00709 numOfVertices, numOfThreads, 00710 currentTime, subTimeStep, 00711 ptMass, Ks, Kd, Lrest, 00712 out_data 00713 ); 00714 // Swap pointers for Vertex List and Previous Vertex List 00715 // So that the Previous Vertex List is remain unchanged in the CUDA memory. 00716 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList(); 00717 00718 // Copy data to the device's memory for (current) Vertex List 00719 CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) ); 00720 currentTime += subTimeStep; 00721 } 00722 } 00723 //*/ 00724 00725 // Unbind Textures 00726 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList(); 00727 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList(); 00728 //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindHomeVertexList(); 00729 00730 // Check if kernel execution generated and error 00731 CUT_CHECK_ERROR( "Kernel execution failed!" ); 00732 00733 // Copy output data to host data Previous Vertex List 00734 // The host will have to swap its pointers (for current and previous vertex list) 00735 if ( CHOICE > 0 ) { 00736 CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) ); 00737 } 00738 00739 //TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data ); 00740 00741 // Free device memory 00742 CUDA_SAFE_CALL( cudaFree( out_data ) ); 00743 }
| void TAPsCUDA__global__ModelStrand_AdvSim_ADVSIM | ( | unsigned int | cudaID, | |
| unsigned int | numOfVertices, | |||
| unsigned int | numOfThreads, | |||
| float | currentTime, | |||
| float | timeStep, | |||
| int | numOfSubSteps, | |||
| float | ptMass, | |||
| float | Ks, | |||
| float | Kd, | |||
| float | Lrest, | |||
| float * | host_vertex_data, | |||
| float * | host_prev_vertex_data, | |||
| unsigned int * | host_sim_flags_data, | |||
| float * | host_pos_constraint_data | |||
| ) |
CUDA Wrapper Function -- For ModelStrand AdvanceSimulation Function with TAPs_ADVANCED_SIMULATION
CUDA Wrapper Function -- For ModelStrand AdvSim Function with TAPs_ADVANCED_SIMULATION
| cudaID | CUDA ID |
| numOfVertices | number of vertices |
| numOfThreads | number of threads per CUDA thread block |
| currentTime | current time |
| timeStep | time step |
| numOfSubSteps | number of sub-steps |
| ptMass | point mass |
| Ks | spring stiffness |
| Kd | spring damper |
| Lrest | spring rest length |
| host_vertex_data | host's vertex data |
| host_prev_vertex_data | host's previous vertex data |
| host_sim_flags_data | host's simulation flags data |
| host_pos_constraint_data | host's position constraint data |
Definition at line 748 of file TAPsCUDA_Def.cu.
00764 { 00765 printf( "FILE:%s LINE:%d TAPsCUDA__global__ModelStrand_AdvSim_ADVSIM\n", __FILE__, __LINE__ ); 00766 printf( "currentTime: %g, timeStep: %g,\n", currentTime, timeStep ); 00767 printf( "ptMass: %g, Ks: %g, Kd: %g,\n", ptMass, Ks, Kd ); 00768 printf( "Number of vertices: %d\n", numOfVertices ); 00769 printf( "Number of threads: %d\n", numOfThreads ); 00770 printf( "Number of grids: %d\n", ( numOfVertices + numOfThreads - 1 ) / numOfThreads ); 00771 00772 // Allocate device memory for out data 00773 unsigned int size = sizeof(float) * numOfVertices * 4; 00774 float * out_data; 00775 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) ); 00776 00777 // Allocate device memory for out data for changing suture's simulation flags list 00778 unsigned int size_SimFlagsList = sizeof(unsigned int) * numOfVertices; 00779 unsigned int * out_data_SimFlagsList; 00780 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_SimFlagsList, size_SimFlagsList ) ); 00781 00782 // Allocate device memory for out data for changing suture's position constraint list 00783 float * out_data_PosConstraintList; 00784 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data_PosConstraintList, size ) ); 00785 00786 // Copy Vertex List (and Previous Vertex List) from host data to device data, 00787 // since vertices are dynamically changed. 00788 // While home vertices and connection list are unchanged. 00789 CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), host_vertex_data, size, cudaMemcpyHostToDevice ) ); 00790 // The Previous Vertex List can be saved in CUDA memory. 00791 // Hence, the Previous Vertex List does not have to be updated. 00792 //CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetPrevVertexList(), host_prev_vertex_data, size, cudaMemcpyHostToDevice ) ); 00793 00794 CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetSimFlagsList(), host_sim_flags_data, sizeof(unsigned int)*numOfVertices, cudaMemcpyHostToDevice ) ); 00795 CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetPosConstraintList(), host_pos_constraint_data, size, cudaMemcpyHostToDevice ) ); 00796 00797 // Kernel invocation 00798 unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads; 00799 dim3 Dg( numOfThreadBlocks, 1, 1 ); 00800 dim3 Db( numOfThreads, 1, 1 ); 00801 size_t Ns = 0; 00802 cudaStream_t S = 0; 00803 00804 // Bind Textures 00805 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindVertexList( numOfVertices ); 00806 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindPrevVertexList( numOfVertices ); 00807 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindSimFlagsList( numOfVertices ); 00808 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->BindPosConstraintList( numOfVertices ); 00809 00810 //* 00811 // Add 10 for TAPs_ADVANCED_SIMULATION 00812 int CHOICE = 100 + 1; 00813 00814 // MASS SPRING SYSTEM 00815 if ( CHOICE == 101 ) { 00816 float subTimeStep = timeStep / numOfSubSteps; 00817 for ( int i = 0; i < numOfSubSteps; ++i ) { 00818 //printf( "delta time: %g\n", timeStep/numOfSubSteps ); 00819 00820 // Call the CUDA kernel (Choice 100+1 -- Mass Spring System) 00821 // with TAPs_ADVANCED_SIMULATION 00822 TAPsCUDA__global__ModelStrand_AdvSim_CU<101><<< Dg, Db, Ns, S >>>( 00823 numOfVertices, numOfThreads, 00824 currentTime, subTimeStep, 00825 ptMass, Ks, Kd, Lrest, 00826 out_data 00827 ); 00828 // Swap pointers for Vertex List and Previous Vertex List 00829 // So that the Previous Vertex List is remain unchanged in the CUDA memory. 00830 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->SwapVertexList(); 00831 00832 // Copy data to the device's memory for (current) Vertex List 00833 CUDA_SAFE_CALL( cudaMemcpy( TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->GetVertexList(), out_data, size, cudaMemcpyDeviceToDevice ) ); 00834 currentTime += subTimeStep; 00835 } 00836 00837 // Call the CUDA kernel to enforce constraints 00838 // with TAPs_ADVANCED_SIMULATION 00839 TAPsCUDA__global__ModelStrand_AdvSim_Enforce_Constraint_CU<101><<< Dg, Db, Ns, S >>>( 00840 numOfVertices, numOfThreads, 00841 currentTime, subTimeStep, 00842 ptMass, Ks, Kd, Lrest, 00843 out_data_SimFlagsList, 00844 out_data_PosConstraintList 00845 ); 00846 } 00847 //*/ 00848 00849 // Unbind Textures 00850 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindVertexList(); 00851 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPrevVertexList(); 00852 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindSimFlagsList(); 00853 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->UnbindPosConstraintList(); 00854 00855 // Check if kernel execution generated and error 00856 CUT_CHECK_ERROR( "Kernel execution failed!" ); 00857 00858 // Copy output data to host data Previous Vertex List 00859 // The host will have to swap its pointers (for current and previous vertex list) 00860 if ( CHOICE > 0 ) { 00861 CUDA_SAFE_CALL( cudaMemcpy( host_prev_vertex_data, out_data, size, cudaMemcpyDeviceToHost ) ); 00862 CUDA_SAFE_CALL( cudaMemcpy( host_sim_flags_data, out_data_SimFlagsList, size_SimFlagsList, cudaMemcpyDeviceToHost ) ); 00863 CUDA_SAFE_CALL( cudaMemcpy( host_pos_constraint_data, out_data_PosConstraintList, size, cudaMemcpyDeviceToHost ) ); 00864 } 00865 00866 /* 00867 // DEBUG 00868 { 00869 float * data = (float *)malloc( size ); 00870 CUDA_SAFE_CALL( cudaMemcpy( data, out_data, size, cudaMemcpyDeviceToHost ) ); 00871 for ( int i = 0, n = 0; i < 2; ++i, n+=4 ) { 00872 printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] ); 00873 } 00874 for ( int i = numOfVertices-2, n = i*4; i < numOfVertices; ++i, n+=4 ) { 00875 printf( "Out Vertex# %d: %g %g %g %g \n", i, data[n], data[n+1], data[n+2], data[n+3] ); 00876 } 00877 free( data ); 00878 } 00879 //*/ 00880 00881 TAPs_CUDA_DATA_GlobalPool.DataForVertexList[cudaID]->PrintDebug( numOfVertices, host_vertex_data, NULL, NULL, host_sim_flags_data, host_pos_constraint_data ); 00882 00883 // Free device memory 00884 CUDA_SAFE_CALL( cudaFree( out_data ) ); 00885 CUDA_SAFE_CALL( cudaFree( out_data_SimFlagsList ) ); 00886 CUDA_SAFE_CALL( cudaFree( out_data_PosConstraintList ) ); 00887 }
| void TAPsCUDA__global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim | ( | int | numOfVertices, | |
| int | numOfThreads, | |||
| float | currentTime, | |||
| float | timeStep, | |||
| float * | host_data | |||
| ) |
For Suture Model (ModelStrand & ModelSuture).
For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function CUDA Wrapper Function -- For SimPropForMultiPartMeshModel_HalfEdge AdvSim Function
| numOfVertices | number of vertices |
| numOfThreads | number of threads per CUDA thread block |
| currentTime | current time |
| timeStep | time step |
| host_data | host data |
Definition at line 963 of file TAPsCUDA_Def.cu.
00970 { 00971 /* 00972 // CUDA device properties 00973 int dev_no; 00974 CUDA_SAFE_CALL( cudaGetDevice( &dev_no ) ); 00975 cudaDeviceProp * prop; 00976 CUDA_SAFE_CALL( cudaGetDeviceProperties( prop, dev_no ) ); 00977 //printf( "maxThreadsPerBlock: %d\n", prop->maxThreadsPerBlock ); 00978 //*/ 00979 00980 // Kernel invocation 00981 unsigned int numOfThreadBlocks = ( numOfVertices + numOfThreads - 1 ) / numOfThreads; 00982 dim3 Dg( numOfThreadBlocks, 1, 1 ); 00983 dim3 Db( numOfThreads, 1, 1 ); 00984 size_t Ns = 0; 00985 cudaStream_t S = 0; 00986 00987 printf( "Number of vertices: %d\n", numOfVertices ); 00988 printf( "Number of threads: %d\n", numOfThreads ); 00989 printf( "Number of grids: %d\n", numOfVertices/numOfThreads ); 00990 00991 // Allocate device memory 00992 unsigned int size = sizeof(float) * numOfVertices * 3; 00993 float * device_data; 00994 CUDA_SAFE_CALL( cudaMalloc( (void **)&device_data, size ) ); 00995 float * out_data; 00996 CUDA_SAFE_CALL( cudaMalloc( (void **)&out_data, size ) ); 00997 00998 // Copy host data to device data 00999 CUDA_SAFE_CALL( cudaMemcpy( device_data, host_data, size, cudaMemcpyHostToDevice ) ); 01000 01001 // Call the CUDA kernel 01002 TAPsCUDA__global__SimPropForMultiPartMeshModel_HalfEdge_AdvSim_CU<1><<< Dg, Db, Ns, S >>> 01003 ( numOfVertices, numOfThreads, currentTime, timeStep, device_data, out_data ); 01004 01005 // Check if kernel execution generated and error 01006 CUT_CHECK_ERROR( "Kernel execution failed" ); 01007 01008 // Copy output data to host data 01009 CUDA_SAFE_CALL( cudaMemcpy( host_data, out_data, size, cudaMemcpyDeviceToHost ) ); 01010 01011 01012 // Free device memory 01013 CUDA_SAFE_CALL( cudaFree( device_data ) ); 01014 CUDA_SAFE_CALL( cudaFree( out_data ) ); 01015 }
| __host__ __device__ void TAPsCUDA__host__device__F1 | ( | ) | [inline] |
__host__device__F1 __host__ and __device__ means a function that is compiled for both the host and the device.
Definition at line 122 of file TAPsCUDA.cu.
| void TAPsCUDA__host__Init | ( | int | argc, | |
| char ** | argv | |||
| ) |
Initialize CUDA Device.
Initialize CUDA Device
Definition at line 21 of file TAPsCUDA_Def.cu.
00022 { 00023 printf( "START: CUT_DEVICE_INIT\n" ); 00024 CUT_DEVICE_INIT(argc, argv); 00025 printf( "END: CUT_DEVICE_INIT\n" ); 00026 }
| bool TAPsCUDA__InitailizeDataForSutureModel | ( | unsigned int & | cudaID, | |
| unsigned int | numOfVertices, | |||
| float * | vertexList, | |||
| float * | prevVertexList | |||
| ) |
Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture).
| cudaID | CUDA ID for the object |
| numOfVertices | number of vertices |
| vertexList | list of xyzw vertices |
| prevVertexList | list of previous xyzw vertices |
Definition at line 68 of file TAPsCUDA_Def.cu.
00074 { 00075 // Here Suture Model uses Vertex List Data 00076 // However, it has implicit connection (each vertex connect to the previous and next vertices). 00077 // So the connection list is set to zero. 00078 00079 // Home vertex is unused for now. Plan is to use home vertex for sticking suture to a surface. 00080 00081 // Assign CUDA ID to the object 00082 // The object has to use cudaID to communicate with the CUDA. 00083 cudaID = TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool; 00084 00085 // Allocate CUDA data for the object. 00086 TAPs_CUDA_CLASS_DATA_Vertex_List * newData = new TAPs_CUDA_CLASS_DATA_Vertex_List( numOfVertices, false, 0, true ); 00087 AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List( newData ); 00088 00089 // Size of memory for (xyzw) vertices 00090 int size = numOfVertices * 4 * sizeof(float); 00091 00092 // Copy Vertex List from host data to device data 00093 CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) ); 00094 // Copy Previous Vertex List from host data to device data 00095 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) ); 00096 // Copy Home Vertex List from host data to device data 00097 //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) ); 00098 // Copy Vertex Connection List from host data to device data 00100 00101 return true; 00102 }
| bool TAPsCUDA__InitailizeDataForSutureModel_ADVSIM | ( | unsigned int & | cudaID, | |
| unsigned int | numOfVertices, | |||
| float * | vertexList, | |||
| float * | prevVertexList, | |||
| unsigned int * | simFlagsList, | |||
| float * | posConstraintList | |||
| ) |
Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture).
Initialize CUDA Data for a Suture Model (ModelStrand & ModelSuture) with TAPs_ADVANCED_SIMULATION
| cudaID | CUDA ID for the object |
| numOfVertices | number of vertices |
| vertexList | list of xyzw vertices |
| prevVertexList | list of previous xyzw vertices |
| simFlagsList | list of simulation flags |
| posConstraintList | list of position constraints |
Definition at line 106 of file TAPsCUDA_Def.cu.
00114 { 00115 // Here Suture Model uses Vertex List Data 00116 // However, it has implicit connection (each vertex connect to the previous and next vertices). 00117 // So the connection list is set to zero. 00118 00119 // Home vertex is unused for now. Plan is to use home vertex for sticking suture to a surface. 00120 00121 // Assign CUDA ID to the object 00122 // The object has to use cudaID to communicate with the CUDA. 00123 cudaID = TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool; 00124 00125 // Allocate CUDA data for the object. 00126 TAPs_CUDA_CLASS_DATA_Vertex_List * newData = new TAPs_CUDA_CLASS_DATA_Vertex_List( numOfVertices, false, 0, true ); 00127 AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List( newData ); 00128 00129 // Size of memory for (xyzw) vertices 00130 int size = numOfVertices * 4 * sizeof(float); 00131 00132 // Copy Vertex List from host data to device data 00133 CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) ); 00134 // Copy Previous Vertex List from host data to device data 00135 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) ); 00136 // Copy Home Vertex List from host data to device data 00137 //CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) ); 00138 // Copy Vertex Connection List from host data to device data 00139 //CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) ); 00140 // Copy Simulation Flags List from host data to device data 00141 CUDA_SAFE_CALL( cudaMemcpy( newData->GetSimFlagsList(), simFlagsList, numOfVertices*sizeof(unsigned int), cudaMemcpyHostToDevice ) ); 00142 // Copy Position Constraint List from host data to device data 00143 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPosConstraintList(), posConstraintList, size, cudaMemcpyHostToDevice ) ); 00144 00145 return true; 00146 }
| bool TAPsCUDA__InitailizeDataForVertexList | ( | unsigned int & | cudaID, | |
| unsigned int | numOfVertices, | |||
| unsigned int | max_connection_size, | |||
| float * | vertexList, | |||
| float * | prevVertexList, | |||
| float * | homeVertexList, | |||
| int * | vertexConnectionList | |||
| ) |
Initialize CUDA Data for a Mesh Model by Using Vertex List.
| cudaID | CUDA ID for the object |
| numOfVertices | number of vertices |
| max_connection_size | maximum connection size |
| vertexList | list of xyz vertices |
| prevVertexList | list of previous xyzw vertices |
| homeVertexList | list of xyz home vertices |
| vertexConnectionList | vertex connection (1D array of 2D array -- vertex id and vertex connections) |
Definition at line 30 of file TAPsCUDA_Def.cu.
00039 { 00040 //printf( "TAPsCUDA__InitailizeDataForVertexList\n" ); 00041 //fflush( stdout ); 00042 00043 // Assign CUDA ID to the object 00044 // The object has to use cudaID to communicate with the CUDA. 00045 cudaID = TAPs_CUDA_DATA_GlobalPool.SizeOfGlobal_Pool; 00046 00047 // Allocate CUDA data for the object. 00048 TAPs_CUDA_CLASS_DATA_Vertex_List * newData = new TAPs_CUDA_CLASS_DATA_Vertex_List( numOfVertices, true, max_connection_size, false ); 00049 AddToGlobal_Pool_Of_TAPs_CUDA_CLASS_DATA_Vertex_List( newData ); 00050 00051 // Size of memory for (xyzw) vertices 00052 int size = numOfVertices * 4 * sizeof(float); 00053 00054 // Copy Vertex List from host data to device data 00055 CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexList(), vertexList, size, cudaMemcpyHostToDevice ) ); 00056 // Copy Previous Vertex List from host data to device data 00057 CUDA_SAFE_CALL( cudaMemcpy( newData->GetPrevVertexList(), prevVertexList, size, cudaMemcpyHostToDevice ) ); 00058 // Copy Home Vertex List from host data to device data 00059 CUDA_SAFE_CALL( cudaMemcpy( newData->GetHomeVertexList(), homeVertexList, size, cudaMemcpyHostToDevice ) ); 00060 // Copy Vertex Connection List from host data to device data 00061 CUDA_SAFE_CALL( cudaMemcpy( newData->GetVertexConnectionList(), vertexConnectionList, numOfVertices*max_connection_size*sizeof(int), cudaMemcpyHostToDevice ) ); 00062 00063 return true; 00064 }
| texture<float4, 1, cudaReadModeElementType> CudaTexHomeVertexList |
| texture<float4, 1, cudaReadModeElementType> CudaTexPosConstraintList |
texture for list of position constraints (xyz) plus force ratio (w)
Definition at line 241 of file TAPsCUDA.cu.
| texture<float4, 1, cudaReadModeElementType> CudaTexPrevVertexList |
| texture<uint1, 1, cudaReadModeElementType> CudaTexSimFlagsList |
| texture<int1, 1, cudaReadModeElementType> CudaTexVertexConnectionList |
texture for list of vertex connections (x:=vertex#, y:=rest length, z:=stiffness, w:=damper)
Definition at line 237 of file TAPsCUDA.cu.
| texture<float4, 1, cudaReadModeElementType> CudaTexVertexList |
For HETriMeshOneModelMultiParts AdvSim Function.
CUDA texture dimension supports only 1-, 2-, and 4-component vector types, but not 3.
Use texture from CUDA Linear Memory by calling cudaBindTexture(...). CUDA Linear Memory is allocated by calling cudaMalloc(...). To enter or change the data in the Linear Memory, must use cudaMemcpy(...). and free by calling cudaFree(...).
Use texture from CUDA Arrays by calling cudaBindTextureToArray(...). CUDA Array is allocated by calling cudaMallocArray(...). To enter or change the data in the Array, must use cudaMemcpyToArray(...). and free by calling cudaFreeArray(...).
The texture reference has to be global, otherwise NVCC will generate error! Texture References texture for list of vertices
Definition at line 234 of file TAPsCUDA.cu.
1.5.6