add xinhe runtime code
Change-Id: I3580a997dfa9f53df3cc686055b335da1fb1b66b
This commit is contained in:
parent
6e786339be
commit
026f284ba4
5
src/XINHE_Runtime/XINHE_Runtime.md
Normal file
5
src/XINHE_Runtime/XINHE_Runtime.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# CMCC -- XINHE Runtime
|
||||||
|
|
||||||
|
XINHE Runtime is an cross-arch runtime system for Multi-vendor & Multi-type architectures. XINHE Runtime discovers available functionality, manage multiple diverse programming
|
||||||
|
systems (e.g., CUDA, HIP, Level Zero, DTK, Vasti) in the same application, represents data dependencies, orchestrates data movement proactively, and allows configurable work schedulers for diverse multi-vendors devices.
|
||||||
|
|
146
src/XINHE_Runtime/include/HYPERCROSS/hycr_runtime_api.h
Normal file
146
src/XINHE_Runtime/include/HYPERCROSS/hycr_runtime_api.h
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
#ifndef HYCR_INCLUDE_HYCR_HYCR_RUNTIME_H
|
||||||
|
#define HYCR_INCLUDE_HYCR_HYCR_RUNTIME_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#else
|
||||||
|
typedef int8_t bool;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define HYCR_MAX_NPLATFORMS 64
|
||||||
|
#define HYCR_MAX_NDEVS (1 << 8) - 1
|
||||||
|
|
||||||
|
#define hycr_default (1 << 5)
|
||||||
|
#define hycr_cpu (1 << 6)
|
||||||
|
#define hycr_gpu_nvidia (1 << 7)
|
||||||
|
#define hycr_gpu_amd (1 << 8)
|
||||||
|
#define hycr_gpu_intel (1 << 9)
|
||||||
|
#define hycr_gpu_hygon (1 << 10)
|
||||||
|
#define hycr_gpu_iltar (1 << 11)
|
||||||
|
#define hycr_ogpu (hycr_gpu_nvidia | hycr_gpu_amd | hycr_gpu_intel | hycr_gpu_hygon | hycr_gpu_iltar)
|
||||||
|
#define hycr_npu_ascend (1 << 13)
|
||||||
|
#define hycr_dla_vasti (1 << 14)
|
||||||
|
#define hycr_dla_enflame (1 << 15)
|
||||||
|
|
||||||
|
|
||||||
|
#define hycr_cuda 1
|
||||||
|
#define hycr_hip 3
|
||||||
|
#define hycr_levelzero 4
|
||||||
|
#define hycr_dtk 5
|
||||||
|
#define hycr_ixc 6
|
||||||
|
#define hycr_cann 7
|
||||||
|
#define hycr_vasr 8
|
||||||
|
|
||||||
|
#define hycr_r -1
|
||||||
|
#define hycr_w -2
|
||||||
|
#define hycr_rw -3
|
||||||
|
#define hycr_xr -4
|
||||||
|
#define hycr_xw -5
|
||||||
|
#define hycr_xrw -6
|
||||||
|
|
||||||
|
#define hycr_int (1 << 0)
|
||||||
|
#define hycr_long (1 << 1)
|
||||||
|
#define hycr_float (1 << 2)
|
||||||
|
#define hycr_double (1 << 3)
|
||||||
|
|
||||||
|
#define hycr_normal (1 << 10)
|
||||||
|
#define hycr_reduction (1 << 11)
|
||||||
|
#define hycr_sum ((1 << 12) | hycr_reduction)
|
||||||
|
#define hycr_max ((1 << 13) | hycr_reduction)
|
||||||
|
#define hycr_min ((1 << 14) | hycr_reduction)
|
||||||
|
|
||||||
|
#define hycr_platform 0x3401
|
||||||
|
#define hycr_vendor 0x3402
|
||||||
|
#define hycr_name 0x3403
|
||||||
|
#define hycr_type 0x3404
|
||||||
|
|
||||||
|
#define hycr_ncmds 1
|
||||||
|
#define hycr_ncmds_kernel 2
|
||||||
|
#define hycr_ncmds_memcpy 3
|
||||||
|
#define hycr_cmds 4
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct _hycr_task* hycr_task;
|
||||||
|
typedef struct _hycr_mem* hycr_mem;
|
||||||
|
typedef struct _hycr_kernel* hycr_kernel;
|
||||||
|
typedef struct _hycr_graph* hycr_graph;
|
||||||
|
|
||||||
|
typedef int (*hycr_host_task)(void* params, const int* device);
|
||||||
|
typedef int (*command_handler)(void* params, void* device);
|
||||||
|
typedef int (*hook_task)(void* task);
|
||||||
|
typedef int (*hook_command)(void* command);
|
||||||
|
|
||||||
|
typedef int (*hycr_selector_kernel)(hycr_task task, void* params, char* kernel_name);
|
||||||
|
|
||||||
|
extern int hycr_init(int* argc, char*** argv, int sync);
|
||||||
|
extern int hycr_finalize();
|
||||||
|
|
||||||
|
|
||||||
|
extern int hycr_env_set(const char* key, const char* value);
|
||||||
|
extern int hycr_env_get(const char* key, char** value, size_t* vallen);
|
||||||
|
|
||||||
|
extern int hycr_platform_count(int* nplatforms);
|
||||||
|
extern int hycr_platform_info(int platform, int param, void* value, size_t* size);
|
||||||
|
|
||||||
|
|
||||||
|
extern int hycr_device_count(int* ndevs);
|
||||||
|
extern int hycr_device_info(int device, int param, void* value, size_t* size);
|
||||||
|
extern int hycr_device_set_default(int device);
|
||||||
|
extern int hycr_device_get_default(int* device);
|
||||||
|
extern int hycr_device_synchronize(int ndevs, int* devices);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
extern int hycr_kernel_create(const char* name, hycr_kernel* kernel);
|
||||||
|
extern int hycr_kernel_get(const char* name, hycr_kernel* kernel);
|
||||||
|
extern int hycr_kernel_setarg(hycr_kernel kernel, int idx, size_t size, void* value);
|
||||||
|
extern int hycr_kernel_setmem(hycr_kernel kernel, int idx, hycr_mem mem, size_t mode);
|
||||||
|
extern int hycr_kernel_setmem_off(hycr_kernel kernel, int idx, hycr_mem mem, size_t off, size_t mode);
|
||||||
|
extern int hycr_kernel_setmap(hycr_kernel kernel, int idx, void* host, size_t mode);
|
||||||
|
extern int hycr_kernel_release(hycr_kernel kernel);
|
||||||
|
|
||||||
|
extern int hycr_create(hycr_task* task);
|
||||||
|
extern int hycr_create_name(const char* name, hycr_task* task);
|
||||||
|
extern int hycr_depend(hycr_task task, int ntasks, hycr_task* tasks);
|
||||||
|
extern int hycr_malloc(hycr_task task, hycr_mem mem);
|
||||||
|
extern int hycr_cmd_reset_mem(hycr_task task, hycr_mem mem, uint8_t reset);
|
||||||
|
extern int hycr_h2d(hycr_task task, hycr_mem mem, size_t off, size_t size, void* host);
|
||||||
|
extern int hycr_h2d_offsets(hycr_task task, hycr_mem mem, size_t *off, size_t *host_sizes, size_t *dev_sizes, size_t elem_size, int dim, void* host);
|
||||||
|
extern int hycr_d2h(hycr_task task, hycr_mem mem, size_t off, size_t size, void* host);
|
||||||
|
extern int hycr_d2h_offsets(hycr_task task, hycr_mem mem, size_t *off, size_t *host_sizes, size_t *dev_sizes, size_t elem_size, int dim, void* host);
|
||||||
|
extern int hycr_dmem_flush_out(hycr_task task, hycr_mem mem);
|
||||||
|
extern int hycr_h2d_full(hycr_task task, hycr_mem mem, void* host);
|
||||||
|
extern int hycr_d2h_full(hycr_task task, hycr_mem mem, void* host);
|
||||||
|
|
||||||
|
|
||||||
|
extern int hycr_arch_kernel_object(hycr_task task, hycr_kernel kernel, int dim, size_t* off, size_t* gws, size_t* lws);
|
||||||
|
extern int hycr_arch_kernel_selector(hycr_task task, hycr_selector_kernel func, void* params, size_t params_size);
|
||||||
|
extern int hycr_arch_submit(hycr_task task, int device, const char* opt, int sync);
|
||||||
|
extern int hycr_arch_release(hycr_task task);
|
||||||
|
|
||||||
|
extern int hycr_mem_create(size_t size, hycr_mem* mem);
|
||||||
|
extern int hycr_mem_init_reset(hycr_mem mem, int reset);
|
||||||
|
extern int hycr_mem_create(hycr_mem* mem, void *host, size_t size);
|
||||||
|
extern int hycr_mem_update(hycr_mem mem, void *host);
|
||||||
|
extern int hycr_mem_create_region(hycr_mem* mem, hycr_mem root_mem, int region);
|
||||||
|
extern int hycr_mem_enable_outer_dim_regions(hycr_mem mem);
|
||||||
|
extern int hycr_mem_create_tile(hycr_mem* mem, void *host, size_t *off, size_t *host_size, size_t *dev_size, size_t elem_size, int dim);
|
||||||
|
extern int hycr_mem_arch(hycr_mem mem, int device, void** arch);
|
||||||
|
extern int hycr_mem_reduce(hycr_mem mem, int mode, int type);
|
||||||
|
extern int hycr_mem_release(hycr_mem mem);
|
||||||
|
|
||||||
|
|
||||||
|
extern int hycr_record_start();
|
||||||
|
extern int hycr_record_stop();
|
||||||
|
|
||||||
|
extern int hycr_timer_now(double* time);
|
||||||
|
extern void hycr_disable_consistency_check();
|
||||||
|
extern void hycr_enable_consistency_check();
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* HYCR_INCLUDE_HYCR_HYCR_RUNTIME_H */
|
||||||
|
|
123
src/XINHE_Runtime/src/runtime/Device.h
Normal file
123
src/XINHE_Runtime/src/runtime/Device.h
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
#ifndef HYCR_SRC_RT_DEVICE_H
|
||||||
|
#define HYCR_SRC_RT_DEVICE_H
|
||||||
|
|
||||||
|
#include "Debug.h"
|
||||||
|
#include "Config.h"
|
||||||
|
#include "Timer.h"
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#ifndef ASYNC_STREAMING
|
||||||
|
#define SYNC_EXECUTION
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace hycr {
|
||||||
|
namespace runtime {
|
||||||
|
|
||||||
|
class Device {
|
||||||
|
public:
|
||||||
|
|
||||||
|
Device(int devs, int platform);
|
||||||
|
virtual ~Device();
|
||||||
|
|
||||||
|
virtual void TaskPre(Task* task) { return; }
|
||||||
|
virtual void TaskPost(Task* task) { return; }
|
||||||
|
|
||||||
|
void Execute(Task* task);
|
||||||
|
|
||||||
|
void ExecuteInit(Command* cmd);
|
||||||
|
virtual void ExecuteKernel(Command* cmd);
|
||||||
|
void ExecuteMalloc(Command* cmd);
|
||||||
|
|
||||||
|
void InvokeDMemInDataTransfer(Task *task, Command *cmd, DMemType *mem);
|
||||||
|
void ExecuteMemResetInput(Task *task, Command* cmd);
|
||||||
|
void ExecuteMemIn(Task *task, Command* cmd);
|
||||||
|
void ExecuteMemInDMemIn(Task *task, Command* cmd, DataMem *mem);
|
||||||
|
void ExecuteMemInDMemRegionIn(Task *task, Command* cmd, DataMemRegion *mem);
|
||||||
|
void ExecuteMemOut(Task *task, Command* cmd);
|
||||||
|
void ExecuteMemFlushOut(Command* cmd);
|
||||||
|
|
||||||
|
void ExecuteH2D(Command* cmd);
|
||||||
|
void ExecuteH2DNP(Command* cmd);
|
||||||
|
void ExecuteD2H(Command* cmd);
|
||||||
|
void ExecuteMap(Command* cmd);
|
||||||
|
void ExecuteReleaseMem(Command* cmd);
|
||||||
|
void ExecuteHost(Command* cmd);
|
||||||
|
|
||||||
|
virtual int ResetMemory(BaseMem *mem, uint8_t reset_value)=0;
|
||||||
|
virtual void ResetContext() { }
|
||||||
|
virtual bool IsContextChangeRequired() { return false; }
|
||||||
|
virtual int Compile(char* src) { return HYCR_SUCCESS; }
|
||||||
|
virtual int Init() = 0;
|
||||||
|
virtual int BuildProgram(char* path) { return HYCR_SUCCESS; }
|
||||||
|
virtual int MemAlloc(void** mem, size_t size, bool reset=false) = 0;
|
||||||
|
virtual int MemFree(void* mem) = 0;
|
||||||
|
virtual int MemD2D(Task *task, BaseMem *mem, void *dst, void *src, size_t size) { _error("Device:%d:%s doesn't support MemD2D", devs_, name()); return HYCR_ERROR; }
|
||||||
|
virtual int MemH2D(Task *task, BaseMem* mem, size_t *off, size_t *host_sizes, size_t *dev_sizes, size_t elem_size, int dim, size_t size, void* host, const char *tag="") = 0;
|
||||||
|
virtual int MemD2H(Task *task, BaseMem* mem, size_t *off, size_t *host_sizes, size_t *dev_sizes, size_t elem_size, int dim, size_t size, void* host, const char *tag="") = 0;
|
||||||
|
virtual int KernelGet(Kernel *kernel, void** kernel_bin, const char* name) = 0;
|
||||||
|
virtual int KernelLaunchInit(Kernel* kernel) { return HYCR_SUCCESS; }
|
||||||
|
virtual int KernelSetArg(Kernel* kernel, int idx, int kindex, size_t size, void* value) = 0;
|
||||||
|
virtual int KernelSetMem(Kernel* kernel, int idx, int kindex, BaseMem* mem, size_t off) = 0;
|
||||||
|
virtual int KernelLaunch(Kernel* kernel, int dim, size_t* off, size_t* gws, size_t* lws) = 0;
|
||||||
|
virtual int Synchronize() = 0;
|
||||||
|
virtual int AddCallback(Task* task) = 0;
|
||||||
|
virtual int Custom(int tag, char* params) { return HYCR_SUCCESS; }
|
||||||
|
virtual int RecreateContext() { return HYCR_ERROR; }
|
||||||
|
virtual void SetPeerDevices(int *peers, int count) { }
|
||||||
|
virtual const char* kernel_src() { return " "; }
|
||||||
|
virtual const char* kernel_bin() { return " "; }
|
||||||
|
|
||||||
|
void set_shared_memory_buffers(bool flag=true) { shared_memory_buffers_ = flag; }
|
||||||
|
bool is_shared_memory_buffers() { return shared_memory_buffers_ && can_share_host_memory_; }
|
||||||
|
int platform() { return platform_; }
|
||||||
|
int devs() { return devs_; }
|
||||||
|
int type() { return type_; }
|
||||||
|
int model() { return model_; }
|
||||||
|
char* vendor() { return vendor_; }
|
||||||
|
char* name() { return name_; }
|
||||||
|
bool busy() { return busy_; }
|
||||||
|
bool idle() { return !busy_; }
|
||||||
|
bool enable() { return enable_; }
|
||||||
|
void enableD2D() { is_d2d_possible_ = true; }
|
||||||
|
bool isD2DEnabled() { return is_d2d_possible_; }
|
||||||
|
int ok() { return errid_; }
|
||||||
|
void set_worker(Worker* worker) { worker_ = worker; }
|
||||||
|
Worker* worker() { return worker_; }
|
||||||
|
double Now() { return timer_->Now(); }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int devs_;
|
||||||
|
int platform_;
|
||||||
|
int type_;
|
||||||
|
int model_;
|
||||||
|
char vendor_[128];
|
||||||
|
char name_[256];
|
||||||
|
char version_[64];
|
||||||
|
int driver_version_;
|
||||||
|
size_t max_compute_units_;
|
||||||
|
size_t max_work_group_size_;
|
||||||
|
size_t max_work_item_sizes_[3];
|
||||||
|
int max_block_dims_[3];
|
||||||
|
int nqueues_;
|
||||||
|
int q_;
|
||||||
|
int errid_;
|
||||||
|
|
||||||
|
char kernel_path_[256];
|
||||||
|
|
||||||
|
bool busy_;
|
||||||
|
bool enable_;
|
||||||
|
bool shared_memory_buffers_;
|
||||||
|
bool can_share_host_memory_;
|
||||||
|
bool is_d2d_possible_;
|
||||||
|
|
||||||
|
std::map<int, command_handler> cmd_handlers_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} /* namespace rt */
|
||||||
|
} /* namespace hycr */
|
||||||
|
|
||||||
|
#endif /* HYCR_SRC_RT_DEVICE_H */
|
137
src/XINHE_Runtime/src/runtime/DeviceCUDA.cpp
Normal file
137
src/XINHE_Runtime/src/runtime/DeviceCUDA.cpp
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
#include "DeviceCUDA.h"
|
||||||
|
|
||||||
|
namespace hycr {
|
||||||
|
namespace runtime {
|
||||||
|
|
||||||
|
DeviceCUDA::DeviceCUDA(CUDA* ld, Host2CUDA *host2cuda_ld, CUdevice cudev, int devs, int platform) : Device(devs, platform) {
|
||||||
|
|
||||||
|
ld_ = ld;
|
||||||
|
host2cuda_ld_ = host2cuda_ld;
|
||||||
|
peers_count_ = 0;
|
||||||
|
max_arg_idx_ = 0;
|
||||||
|
ngarbage_ = 0;
|
||||||
|
shared_mem_bytes_ = 0;
|
||||||
|
dev_ = cudev;
|
||||||
|
|
||||||
|
strcpy(vendor_, "NVIDIA");
|
||||||
|
enableD2D();
|
||||||
|
err_ = ld_->cuDeviceGetName(name_, sizeof(name_), dev_);
|
||||||
|
_cuerror(err_);
|
||||||
|
type_ = hycr_nvidia;
|
||||||
|
model_ = hycr_cuda;
|
||||||
|
err_ = ld_->cuDriverGetVersion(&driver_version_);
|
||||||
|
_cuerror(err_);
|
||||||
|
//err_ = ld_->cudaSetDevice(dev_);
|
||||||
|
_cuerror(err_);
|
||||||
|
sprintf(version_, "NVIDIA CUDA %d", driver_version_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, dev_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, dev_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, dev_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, dev_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, dev_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, dev_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, dev_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev_);
|
||||||
|
err_ = ld_->cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, dev_);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
DeviceCUDA::~DeviceCUDA() {
|
||||||
|
if (ld_->hycr_host2cuda_finalize){
|
||||||
|
ld_->hycr_host2cuda_finalize();
|
||||||
|
}
|
||||||
|
if (ld_->hycr_host2cuda_finalize_handles){
|
||||||
|
ld_->hycr_host2cuda_finalize_handles(dev_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int DeviceCUDA::Compile(char* src) {
|
||||||
|
char cmd[1024];
|
||||||
|
memset(cmd, 0, 256);
|
||||||
|
sprintf(cmd, "nvcc -ptx %s -o %s", src, kernel_path_);
|
||||||
|
if (system(cmd) != EXIT_SUCCESS) {
|
||||||
|
_error("cmd[%s]", cmd);
|
||||||
|
worker_->platform()->IncrementErrorCount();
|
||||||
|
return HYCR_ERROR;
|
||||||
|
}
|
||||||
|
return HYCR_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int DeviceCUDA::Init() {
|
||||||
|
err_ = ld_->cudaSetDevice(dev_);
|
||||||
|
err_ = ld_->cuCtxCreate(&ctx_, CU_CTX_SCHED_AUTO, dev_);
|
||||||
|
EnablePeerAccess();
|
||||||
|
_cuerror(err_);
|
||||||
|
|
||||||
|
for (int i = 0; i < nqueues_; i++) {
|
||||||
|
err_ = ld_->cuStreamCreate(streams_ + i, CU_STREAM_DEFAULT);
|
||||||
|
_cuerror(err_);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* path = kernel_path_;
|
||||||
|
char* src = NULL;
|
||||||
|
size_t srclen = 0;
|
||||||
|
if (Utils::ReadFile(path, &src, &srclen) == HYCR_ERROR) {
|
||||||
|
return HYCR_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
err_ = ld_->cuModuleLoad(&module_, path);
|
||||||
|
if (err_ != CUDA_SUCCESS) {
|
||||||
|
_cuerror(err_);
|
||||||
|
if (src) free(src);
|
||||||
|
platform()->IncrementErrorCount();
|
||||||
|
return HYCR_ERROR;
|
||||||
|
}
|
||||||
|
if (src) free(src);
|
||||||
|
return HYCR_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int DeviceCUDA::ResetMemory(BaseMem *mem, uint8_t reset_value) {
|
||||||
|
err_ = ld_->cudaMemset(mem->arch(this), reset_value, mem->size());
|
||||||
|
_cuerror(err_);
|
||||||
|
if (err_ != CUDA_SUCCESS){
|
||||||
|
worker_->platform()->IncrementErrorCount();
|
||||||
|
return HYCR_ERROR;
|
||||||
|
}
|
||||||
|
return HYCR_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int DeviceCUDA::MemAlloc(void** mem, size_t size, bool reset) {
|
||||||
|
CUdeviceptr* cumem = (CUdeviceptr*) mem;
|
||||||
|
err_ = ld_->cuMemAlloc(cumem, size);
|
||||||
|
|
||||||
|
if (reset) ld_->cudaMemset(*mem, 0, size);
|
||||||
|
if (err_ != CUDA_SUCCESS){
|
||||||
|
worker_->platform()->IncrementErrorCount();
|
||||||
|
return HYCR_ERROR;
|
||||||
|
}
|
||||||
|
return HYCR_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int DeviceCUDA::MemFree(void* mem) {
|
||||||
|
CUdeviceptr cumem = (CUdeviceptr) mem;
|
||||||
|
if (ngarbage_ >= HYCR_MAX_GABAGES) _error("ngarbage[%d]", ngarbage_);
|
||||||
|
else garbage_[ngarbage_++] = cumem;
|
||||||
|
/*
|
||||||
|
_trace("dptr[%p]", cumem);
|
||||||
|
err_ = ld_->cuMemFree(cumem);
|
||||||
|
_cuerror(err_);
|
||||||
|
*/
|
||||||
|
return HYCR_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
int DeviceCUDA::Synchronize() {
|
||||||
|
err_ = ld_->cuCtxSynchronize();
|
||||||
|
_cuerror(err_);
|
||||||
|
if (err_ != CUDA_SUCCESS){
|
||||||
|
worker_->platform()->IncrementErrorCount();
|
||||||
|
return HYCR_ERROR;
|
||||||
|
}
|
||||||
|
return HYCR_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} /* namespace runtime */
|
||||||
|
} /* namespace hycr */
|
||||||
|
|
74
src/XINHE_Runtime/src/runtime/DeviceCUDA.h
Normal file
74
src/XINHE_Runtime/src/runtime/DeviceCUDA.h
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
#ifndef HYCR_SRC_RT_DEVICE_CUDA_H
|
||||||
|
#define HYCR_SRC_RT_DEVICE_CUDA_H
|
||||||
|
|
||||||
|
#include "Device.h"
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#define HYCR_MAX_GABAGES 256
|
||||||
|
|
||||||
|
namespace hycr {
|
||||||
|
namespace runtime {
|
||||||
|
|
||||||
|
class DeviceCUDA : public Device {
|
||||||
|
public:
|
||||||
|
DeviceCUDA(CUDA* ld, Host2CUDA *host2cuda_ld, CUdevice cudev, int devs, int platform);
|
||||||
|
~DeviceCUDA();
|
||||||
|
|
||||||
|
int Compile(char* src);
|
||||||
|
int Init();
|
||||||
|
int ResetMemory(BaseMem *mem, uint8_t reset_value);
|
||||||
|
int MemAlloc(void** mem, size_t size, bool reset=false);
|
||||||
|
int MemFree(void* mem);
|
||||||
|
void EnablePeerAccess();
|
||||||
|
void SetPeerDevices(int *peers, int count);
|
||||||
|
void MemCpy3D(CUdeviceptr dev, uint8_t *host, size_t *off,
|
||||||
|
size_t *dev_sizes, size_t *host_sizes,
|
||||||
|
size_t elem_size, bool host_2_dev);
|
||||||
|
int MemD2D(Task *task, BaseMem *mem, void *dst, void *src, size_t size);
|
||||||
|
int MemH2D(Task *task, BaseMem* mem, size_t *off, size_t *host_sizes, size_t *dev_sizes, size_t elem_size, int dim, size_t size, void* host, const char *tag="");
|
||||||
|
int MemD2H(Task *task, BaseMem* mem, size_t *off, size_t *host_sizes, size_t *dev_sizes, size_t elem_size, int dim, size_t size, void* host, const char *tag="");
|
||||||
|
int KernelGet(Kernel *kernel, void** kernel_bin, const char* name);
|
||||||
|
int KernelLaunchInit(Kernel* kernel);
|
||||||
|
int KernelSetArg(Kernel* kernel, int idx, int kindex, size_t size, void* value);
|
||||||
|
int KernelSetMem(Kernel* kernel, int idx, int kindex, BaseMem* mem, size_t off);
|
||||||
|
int KernelLaunch(Kernel* kernel, int dim, size_t* off, size_t* gws, size_t* lws);
|
||||||
|
int Synchronize();
|
||||||
|
int AddCallback(Task* task);
|
||||||
|
int Custom(int tag, char* params);
|
||||||
|
|
||||||
|
const char* kernel_src() { return "KERNEL_SRC_CUDA"; }
|
||||||
|
|
||||||
|
virtual void TaskPre(Task* task);
|
||||||
|
|
||||||
|
int cudev() { return dev_; }
|
||||||
|
void ResetContext();
|
||||||
|
bool IsContextChangeRequired();
|
||||||
|
|
||||||
|
private:
|
||||||
|
static void Callback(CUstream stream, CUresult status, void* data);
|
||||||
|
void ClearGarbage();
|
||||||
|
|
||||||
|
private:
|
||||||
|
LoaderCUDA* ld_;
|
||||||
|
LoaderHost2CUDA* host2cuda_ld_;
|
||||||
|
CUdevice dev_;
|
||||||
|
CUdevice peers_[HYCR_MAX_NDEVS];
|
||||||
|
int peers_count_;
|
||||||
|
CUcontext ctx_;
|
||||||
|
CUstream streams_[HYCR_MAX_DEVICE_NQUEUES];
|
||||||
|
CUmodule module_;
|
||||||
|
CUresult err_;
|
||||||
|
unsigned int shared_mem_bytes_;
|
||||||
|
unsigned int shared_mem_offs_[HYCR_MAX_KERNEL_NARGS];
|
||||||
|
void* params_[HYCR_MAX_KERNEL_NARGS];
|
||||||
|
int max_arg_idx_;
|
||||||
|
CUdeviceptr garbage_[HYCR_MAX_GABAGES];
|
||||||
|
int ngarbage_;
|
||||||
|
std::map<CUfunction, CUfunction> kernels_offs_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} /* namespace runtime */
|
||||||
|
} /* namespace hycr */
|
||||||
|
|
||||||
|
#endif /* HYCR_SRC_RT_DEVICE_CUDA_H */
|
||||||
|
|
Loading…
Reference in New Issue
Block a user