PxCudaContextManager
Defined in include/cudamanager/PxCudaContextManager.h
- 
class PxCudaContextManager
- Manages thread locks, and task scheduling for a CUDA context. - A PxCudaContextManager manages access to a single CUDA context, allowing it to be shared between multiple scenes. The context must be acquired from the manager before using any CUDA APIs unless stated differently. - The PxCudaContextManager is based on the CUDA driver API and explicitly does not support the CUDA runtime API (aka, CUDART). - Public Functions - 
template<typename T>
 inline void clearDeviceBufferAsync(T *deviceBuffer, PxU32 numElements, CUstream stream, PxI32 value = 0)
- Schedules clear operation for a device memory buffer on the specified stream. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline void copyDToH(T *hostBuffer, const T *deviceBuffer, PxU32 numElements)
- Copies a device buffer to the host. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline void copyHToD(T *deviceBuffer, const T *hostBuffer, PxU32 numElements)
- Copies a host buffer to the device. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline void copyDToHAsync(T *hostBuffer, const T *deviceBuffer, PxU32 numElements, CUstream stream)
- Schedules device to host copy operation on the specified stream. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline void copyHToDAsync(T *deviceBuffer, const T *hostBuffer, PxU32 numElements, CUstream stream)
- Schedules host to device copy operation on the specified stream. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline void copyDToDAsync(T *dstDeviceBuffer, const T *srcDeviceBuffer, PxU32 numElements, CUstream stream)
- Schedules device to device copy operation on the specified stream. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline void memsetAsync(T *dstDeviceBuffer, const T &value, PxU32 numElements, CUstream stream)
- Schedules a memset operation on the device on the specified stream. - Only supported for 1 byte or 4 byte data types. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline void allocDeviceBuffer(T *&deviceBuffer, PxU32 numElements, const char *filename = __FILE__, PxI32 line = __LINE__)
- Allocates a device buffer. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline T *allocDeviceBuffer(PxU32 numElements, const char *filename = __FILE__, PxI32 line = __LINE__)
- Allocates a device buffer and returns the pointer to the memory. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline void freeDeviceBuffer(T *&deviceBuffer)
- Frees a device buffer. - The cuda context will get acquired automatically 
 - 
template<typename T>
 inline void allocPinnedHostBuffer(T *&pinnedHostBuffer, PxU32 numElements, const char *filename = __FILE__, PxI32 line = __LINE__)
- Allocates a pinned host buffer. - A pinned host buffer can be used on the gpu after getting a mapped device pointer from the pinned host buffer pointer, see getMappedDevicePtr The cuda context will get acquired automatically - See also 
 - 
template<typename T>
 inline T *allocPinnedHostBuffer(PxU32 numElements, const char *filename = __FILE__, PxI32 line = __LINE__)
- Allocates a pinned host buffer and returns the pointer to the memory. - A pinned host buffer can be used on the gpu after getting a mapped device pointer from the pinned host buffer pointer, see getMappedDevicePtr The cuda context will get acquired automatically - See also 
 - 
template<typename T>
 inline void freePinnedHostBuffer(T *&pinnedHostBuffer)
- Frees a pinned host buffer. - The cuda context will get acquired automatically 
 - 
virtual CUdeviceptr getMappedDevicePtr(void *pinnedHostBuffer) = 0
- Gets a mapped pointer from a pinned host buffer that can be used in cuda kernels directly. - Data access performance with a mapped pinned host pointer will be slower than using a device pointer directly but the changes done in the kernel will be available on the host immediately. The cuda context will get acquired automatically 
 - 
virtual void acquireContext() = 0
- Acquire the CUDA context for the current thread. - Acquisitions are allowed to be recursive within a single thread. You can acquire the context multiple times so long as you release it the same count. - The context must be acquired before using most CUDA functions. 
 - 
virtual void releaseContext() = 0
- Release the CUDA context from the current thread. - The CUDA context should be released as soon as practically possible, to allow other CPU threads to work efficiently. 
 - 
virtual PxCudaContext *getCudaContext() = 0
- Return the CudaContext. 
 - 
virtual bool contextIsValid() const = 0
- Context manager has a valid CUDA context. - This method should be called after creating a PxCudaContextManager, especially if the manager was responsible for allocating its own CUDA context (desc.ctx == NULL). 
 - 
virtual bool supportsArchSM10() const = 0
- G80. 
 - 
virtual bool supportsArchSM11() const = 0
- G92. 
 - 
virtual bool supportsArchSM12() const = 0
- GT200. 
 - 
virtual bool supportsArchSM13() const = 0
- GT260. 
 - 
virtual bool supportsArchSM20() const = 0
- GF100. 
 - 
virtual bool supportsArchSM30() const = 0
- GK100. 
 - 
virtual bool supportsArchSM35() const = 0
- GK110. 
 - 
virtual bool supportsArchSM50() const = 0
- GM100. 
 - 
virtual bool supportsArchSM52() const = 0
- GM200. 
 - 
virtual bool supportsArchSM60() const = 0
- GP100. 
 - 
virtual bool isIntegrated() const = 0
- true if GPU is an integrated (MCP) part 
 - 
virtual bool canMapHostMemory() const = 0
- true if GPU map host memory to GPU (0-copy) 
 - 
virtual int getDriverVersion() const = 0
- returns cached value of cuGetDriverVersion() 
 - 
virtual size_t getDeviceTotalMemBytes() const = 0
- returns cached value of device memory size 
 - 
virtual int getMultiprocessorCount() const = 0
- returns cache value of SM unit count 
 - 
virtual unsigned int getClockRate() const = 0
- returns cached value of SM clock frequency 
 - returns total amount of shared memory available per block in bytes 
 - returns total amount of shared memory available per multiprocessor in bytes 
 - 
virtual unsigned int getMaxThreadsPerBlock() const = 0
- returns the maximum number of threads per block 
 - 
virtual const char *getDeviceName() const = 0
- returns device name retrieved from driver 
 - 
virtual void setUsingConcurrentStreams(bool) = 0
- turn on/off using concurrent streams for GPU work 
 - 
virtual bool getUsingConcurrentStreams() const = 0
- true if GPU work can run in concurrent streams 
 - 
virtual void getDeviceMemoryInfo(size_t &free, size_t &total) const = 0
- get currently available and total memory 
 - 
virtual int usingDedicatedGPU() const = 0
- Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel. - Note - If using CUDA Interop, this will always return false - Returns
- 1 if there is a dedicated GPU 0 if there is NOT a dedicated GPU -1 if the routine is not implemented 
 
 - 
virtual CUmodule *getCuModules() = 0
- Get the cuda modules that have been loaded into this context on construction. - Returns
- Pointer to the cuda modules 
 
 - 
virtual void release() = 0
- Release the PxCudaContextManager. - If the PxCudaContextManager created the CUDA context it was responsible for, it also frees that context. - Do not release the PxCudaContextManager if there are any scenes using it. Those scenes must be released first. 
 - Protected Functions - 
virtual void *allocDeviceBufferInternal(PxU32 numBytes, const char *filename = NULL, PxI32 line = -1) = 0
 - 
virtual void *allocPinnedHostBufferInternal(PxU32 numBytes, const char *filename = NULL, PxI32 line = -1) = 0
 - 
virtual void freeDeviceBufferInternal(void *deviceBuffer) = 0
 - 
virtual void freePinnedHostBufferInternal(void *pinnedHostBuffer) = 0
 - 
virtual void clearDeviceBufferAsyncInternal(void *deviceBuffer, PxU32 numBytes, CUstream stream, PxI32 value) = 0
 - 
virtual void copyDToHAsyncInternal(void *hostBuffer, const void *deviceBuffer, PxU32 numBytes, CUstream stream) = 0
 - 
virtual void copyHToDAsyncInternal(void *deviceBuffer, const void *hostBuffer, PxU32 numBytes, CUstream stream) = 0
 - 
virtual void copyDToDAsyncInternal(void *dstDeviceBuffer, const void *srcDeviceBuffer, PxU32 numBytes, CUstream stream) = 0
 
- 
template<typename T>