diff --git a/libethash-cuda/CUDAMiner.cpp b/libethash-cuda/CUDAMiner.cpp index 5a315d9c54..e716cea624 100644 --- a/libethash-cuda/CUDAMiner.cpp +++ b/libethash-cuda/CUDAMiner.cpp @@ -113,75 +113,89 @@ void CUDAMiner::report(uint64_t _nonce) void CUDAMiner::kickOff() { m_hook->reset(); - startWorking(); } -void CUDAMiner::workLoop() +bool CUDAMiner::init(const h256& seed) { // take local copy of work since it may end up being overwritten by kickOff/pause. try { - WorkPackage w = work(); - if (!w) - return; + unsigned device = s_devices[index] > -1 ? s_devices[index] : index; - if (!m_miner || m_minerSeed != w.seed) - { - unsigned device = s_devices[index] > -1 ? s_devices[index] : index; + cnote << "Initialising miner..."; + m_minerSeed = seed; + + if(!m_miner) + m_miner = new ethash_cuda_miner; - if (s_dagLoadMode == DAG_LOAD_MODE_SEQUENTIAL) + EthashAux::LightType light; + light = EthashAux::light(seed); + bytesConstRef lightData = light->data(); + + m_miner->init(light->light, lightData.data(), lightData.size(), + device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), s_dagInHostMemory); + s_dagLoadIndex++; + + if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE) + { + if (s_dagLoadIndex >= s_numInstances && s_dagInHostMemory) { - while (s_dagLoadIndex < index) { - this_thread::sleep_for(chrono::seconds(1)); - } + // all devices have loaded DAG, we can free now + delete[] s_dagInHostMemory; + s_dagInHostMemory = NULL; + cout << "Freeing DAG from host" << endl; } - else if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE) + } + return true; + } + catch (std::runtime_error const& _e) + { + delete m_miner; + m_miner = nullptr; + cwarn << "Error CUDA mining: " << _e.what(); + return false; + } +} + +void CUDAMiner::workLoop() +{ + WorkPackage current; + current.header = h256{1u}; + current.seed = h256{1u}; + try + { + while(true) + { + const WorkPackage w = work(); + + if(!m_miner || current.header != w.header || current.seed != w.seed) { - if (device != s_dagCreateDevice) + if(!w || w.header == h256()) { - // wait until DAG is created on selected device - while (s_dagInHostMemory == NULL) { - this_thread::sleep_for(chrono::seconds(1)); - } + cnote << "No work. Pause for 3 s."; + std::this_thread::sleep_for(std::chrono::seconds(3)); + continue; } - else + + //cnote << "set work; seed: " << "#" + w.seed.hex().substr(0, 8) + ", target: " << "#" + w.boundary.hex().substr(0, 12); + if (!m_miner || current.seed != w.seed) { - // reset load index - s_dagLoadIndex = 0; + if(!init(w.seed)) + break; } + current = w; } - - cnote << "Initialising miner..."; - m_minerSeed = w.seed; - - delete m_miner; - m_miner = new ethash_cuda_miner; - - EthashAux::LightType light; - light = EthashAux::light(w.seed); - //bytesConstRef dagData = dag->data(); - bytesConstRef lightData = light->data(); - - m_miner->init(light->light, lightData.data(), lightData.size(), device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), s_dagInHostMemory); - s_dagLoadIndex++; - - if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE) + uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)current.boundary >> 192); + uint64_t startN = current.startNonce; + if (current.exSizeBits >= 0) + startN = current.startNonce | ((uint64_t)index << (64 - 4 - current.exSizeBits)); // this can support up to 16 devices + m_miner->search(current.header.data(), upper64OfBoundary, *m_hook, (current.exSizeBits >= 0), startN); + + // Check if we should stop. + if (shouldStop()) { - if (s_dagLoadIndex >= s_numInstances && s_dagInHostMemory) - { - // all devices have loaded DAG, we can free now - delete[] s_dagInHostMemory; - s_dagInHostMemory = NULL; - - cout << "Freeing DAG from host" << endl; - } + break; } } - - uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192); - uint64_t startN = w.startNonce; - if (w.exSizeBits >= 0) - startN = w.startNonce | ((uint64_t)index << (64 - 4 - w.exSizeBits)); // this can support up to 16 devices - m_miner->search(w.header.data(), upper64OfBoundary, *m_hook, (w.exSizeBits >= 0), startN); } catch (std::runtime_error const& _e) { @@ -194,7 +208,6 @@ void CUDAMiner::workLoop() void CUDAMiner::pause() { m_hook->abort(); - stopWorking(); } std::string CUDAMiner::platformInfo() diff --git a/libethash-cuda/CUDAMiner.h b/libethash-cuda/CUDAMiner.h index 690a3ad47f..3e0fce5fd2 100644 --- a/libethash-cuda/CUDAMiner.h +++ b/libethash-cuda/CUDAMiner.h @@ -76,6 +76,9 @@ class EthashCUDAHook; private: void workLoop() override; void report(uint64_t _nonce); + void initDevice(WorkPackage w); + + bool init(const h256& seed); EthashCUDAHook* m_hook = nullptr; ethash_cuda_miner* m_miner = nullptr; diff --git a/libethash-cuda/ethash_cuda_miner.cpp b/libethash-cuda/ethash_cuda_miner.cpp index d431a4cf72..162287c590 100644 --- a/libethash-cuda/ethash_cuda_miner.cpp +++ b/libethash-cuda/ethash_cuda_miner.cpp @@ -63,6 +63,7 @@ ethash_cuda_miner::search_hook::~search_hook() {} ethash_cuda_miner::ethash_cuda_miner() { + m_light.resize(getNumDevices()); } std::string ethash_cuda_miner::platform_info(unsigned _deviceId) @@ -204,14 +205,13 @@ bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, u { try { - int device_count = getNumDevices(); + unsigned device_count = getNumDevices(); if (device_count == 0) return false; // use selected device - m_device_num = std::min((int)_deviceId, device_count - 1); - + m_device_num = _deviceId < device_count -1 ? _deviceId : device_count - 1; nvmlh = wrap_nvml_create(); cudaDeviceProp device_props; @@ -219,11 +219,6 @@ bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, u cudalog << "Using device: " << device_props.name << " (Compute " + to_string(device_props.major) + "." + to_string(device_props.minor) + ")"; - CUDA_SAFE_CALL(cudaSetDevice(m_device_num)); - CUDA_SAFE_CALL(cudaDeviceReset()); - CUDA_SAFE_CALL(cudaSetDeviceFlags(s_scheduleFlag)); - CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); - m_search_buf = new volatile uint32_t *[s_numStreams]; m_streams = new cudaStream_t[s_numStreams]; @@ -231,55 +226,88 @@ bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, u uint32_t dagSize128 = (unsigned)(dagSize / ETHASH_MIX_BYTES); uint32_t lightSize64 = (unsigned)(_lightSize / sizeof(node)); + + + CUDA_SAFE_CALL(cudaSetDevice(m_device_num)); + cudalog << "Set Device to current"; + if(dagSize128 != m_dag_size || !m_dag) + { + //We need to reset the device and recreate the dag + cudalog << "Resetting device"; + CUDA_SAFE_CALL(cudaDeviceReset()); + CUDA_SAFE_CALL(cudaSetDeviceFlags(s_scheduleFlag)); + CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1)); + //We need to reset the light and the Dag for the following code to reallocate + //since cudaDeviceReset() free's all previous allocated memory + m_light[m_device_num] = nullptr; + m_dag = nullptr; + } // create buffer for cache - hash64_t * light = NULL; + hash128_t * dag = m_dag; + hash64_t * light = m_light[m_device_num]; - if (!hostDAG) - { + if(!light){ + cudalog << "Allocating light with size: " << _lightSize; CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast(&light), _lightSize)); - // copy dag cache to CPU. - CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast(light), _lightData, _lightSize, cudaMemcpyHostToDevice)); } - - // create buffer for dag - hash128_t * dag; - CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast(&dag), dagSize)); + // copy lightData to device + CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast(light), _lightData, _lightSize, cudaMemcpyHostToDevice)); + m_light[m_device_num] = light; - // create mining buffers - for (unsigned i = 0; i != s_numStreams; ++i) + if(dagSize128 != m_dag_size || !dag) // create buffer for dag + CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast(&dag), dagSize)); + + set_constants(dag, dagSize128, light, lightSize64); //in ethash_cuda_miner_kernel.cu + + if(dagSize128 != m_dag_size || !dag) { - CUDA_SAFE_CALL(cudaMallocHost(&m_search_buf[i], SEARCH_RESULT_BUFFER_SIZE * sizeof(uint32_t))); - CUDA_SAFE_CALL(cudaStreamCreate(&m_streams[i])); - } - set_constants(dag, dagSize128, light, lightSize64); - memset(&m_current_header, 0, sizeof(hash32_t)); - m_current_target = 0; - m_current_nonce = 0; - m_current_index = 0; - - m_sharedBytes = device_props.major * 100 < SHUFFLE_MIN_VER ? (64 * s_blockSize) / 8 : 0 ; + // create mining buffers + cudalog << "Generating mining buffers"; //TODO whats up with this? + for (unsigned i = 0; i != s_numStreams; ++i) + { + CUDA_SAFE_CALL(cudaMallocHost(&m_search_buf[i], SEARCH_RESULT_BUFFER_SIZE * sizeof(uint32_t))); + CUDA_SAFE_CALL(cudaStreamCreate(&m_streams[i])); + } + + memset(&m_current_header, 0, sizeof(hash32_t)); + m_current_target = 0; + m_current_nonce = 0; + m_current_index = 0; - if (!hostDAG) - { - cudalog << "Generating DAG for GPU #" << m_device_num; - ethash_generate_dag(dagSize, s_gridSize, s_blockSize, m_streams[0], m_device_num); + m_sharedBytes = device_props.major * 100 < SHUFFLE_MIN_VER ? (64 * s_blockSize) / 8 : 0 ; - if (_cpyToHost) + if (!hostDAG) { - uint8_t* memoryDAG = new uint8_t[dagSize]; - cudalog << "Copying DAG from GPU #" << m_device_num << " to host"; - CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast(memoryDAG), dag, dagSize, cudaMemcpyDeviceToHost)); - - hostDAG = memoryDAG; + if(m_device_num == 0 || !_cpyToHost){ //if !cpyToHost -> All devices shall generate their DAG + cudalog << "Generating DAG for GPU #" << m_device_num << " with dagSize: " + << dagSize <<" gridSize: " << s_gridSize << " &m_streams[0]: " << &m_streams[0]; + ethash_generate_dag(dagSize, s_gridSize, s_blockSize, m_streams[0], m_device_num); + + if (_cpyToHost) + { + uint8_t* memoryDAG = new uint8_t[dagSize]; + cudalog << "Copying DAG from GPU #" << m_device_num << " to host"; + CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast(memoryDAG), dag, dagSize, cudaMemcpyDeviceToHost)); + + hostDAG = memoryDAG; + } + }else{ + while(!hostDAG) + this_thread::sleep_for(chrono::milliseconds(100)); + goto cpyDag; + } + } + else + { +cpyDag: + cudalog << "Copying DAG from host to GPU #" << m_device_num; + const void* hdag = (const void*)hostDAG; + CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast(dag), hdag, dagSize, cudaMemcpyHostToDevice)); } } - else - { - cudalog << "Copying DAG from host to GPU #" << m_device_num; - const void* hdag = (const void*)hostDAG; - CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast(dag), hdag, dagSize, cudaMemcpyHostToDevice)); - } - + + m_dag = dag; + m_dag_size = dagSize128; return true; } catch (runtime_error const&) diff --git a/libethash-cuda/ethash_cuda_miner.h b/libethash-cuda/ethash_cuda_miner.h index 6defad3388..e924e8de60 100644 --- a/libethash-cuda/ethash_cuda_miner.h +++ b/libethash-cuda/ethash_cuda_miner.h @@ -58,7 +58,14 @@ class ethash_cuda_miner uint64_t m_starting_nonce; uint64_t m_current_index; uint32_t m_sharedBytes; - int m_device_num; + + ///Constants on GPU + hash128_t* m_dag = nullptr; + std::vector m_light; + uint32_t m_dag_size = -1; + uint32_t m_device_num; + + volatile uint32_t ** m_search_buf; cudaStream_t * m_streams; diff --git a/libethash-cuda/ethash_cuda_miner_kernel.cu b/libethash-cuda/ethash_cuda_miner_kernel.cu index d25cc14abe..5ae6f89649 100644 --- a/libethash-cuda/ethash_cuda_miner_kernel.cu +++ b/libethash-cuda/ethash_cuda_miner_kernel.cu @@ -104,8 +104,9 @@ ethash_calculate_dag_item(uint32_t start) dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], s4); } } - } + + #endif } SHA3_512(dag_node.uint2s);