Skip to content
This repository was archived by the owner on Apr 24, 2022. It is now read-only.

Optimise work updates on CUDA, was Branch#359 #361

Merged
merged 47 commits into from
Dec 28, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
6adfaa8
Started on #359
MariusVanDerWijden Oct 28, 2017
a816f1c
Rewrote CUDAMiner
MariusVanDerWijden Nov 23, 2017
1be1486
minor fix
MariusVanDerWijden Nov 27, 2017
42bd287
rewrote the kernel once again
MariusVanDerWijden Dec 2, 2017
2853d0c
Merge branch 'master' into branch_#359
MariusVanDerWijden Dec 3, 2017
138173d
added support for __shfl_sync since __shfl is deprecated with CUDA 9.0
MariusVanDerWijden Oct 23, 2017
8c7b88e
added support for shfl_sync
MariusVanDerWijden Oct 23, 2017
59a1227
added Bitmask to all shfl_sync operations, to target all warps
MariusVanDerWijden Oct 24, 2017
4b30cfb
Now Use CUDA_VERSION instead of __CUDA_ARCH__
MariusVanDerWijden Oct 24, 2017
5e63e33
Update to fix stratum failover argument handling
mbajkowski Nov 4, 2017
30aa051
updated for backwards compatibility with -FS argument
mbajkowski Nov 8, 2017
1a3a07e
Travis CI: Upgrade Xcode
chfast Nov 23, 2017
a50e679
Extend FAQ in README (#313)
gidoBOSSftw5731 Nov 24, 2017
83be30e
Tracis CI: Upgrade CUDA to 9.0
chfast Nov 24, 2017
6177c3b
fixed dumb error
MariusVanDerWijden Nov 24, 2017
1dbfb19
Started on #359
MariusVanDerWijden Oct 28, 2017
a62bd1f
Rewrote CUDAMiner
MariusVanDerWijden Nov 23, 2017
a7e8dca
minor fix
MariusVanDerWijden Nov 27, 2017
946783c
rewrote the kernel once again
MariusVanDerWijden Dec 2, 2017
157c7be
Merge branch 'master' into branch_#359
MariusVanDerWijden Dec 3, 2017
82d0daf
minor improvements
MariusVanDerWijden Dec 3, 2017
87d1364
minor improvements
MariusVanDerWijden Dec 3, 2017
29064ea
rebase from Master
MariusVanDerWijden Dec 3, 2017
2e0f674
restructured bad code
MariusVanDerWijden Dec 4, 2017
923ff2c
fixed compile errors
MariusVanDerWijden Dec 4, 2017
89eb45a
restructure code, added output to find errors
MariusVanDerWijden Dec 4, 2017
546b675
minor change
MariusVanDerWijden Dec 5, 2017
2c2dedd
fixed another error
MariusVanDerWijden Dec 5, 2017
0852ce2
minor update
MariusVanDerWijden Dec 5, 2017
c710eea
another fix
MariusVanDerWijden Dec 5, 2017
9f2d2f1
rearanged code and fixed a guard
MariusVanDerWijden Dec 6, 2017
81398cf
Restructured Light Allocation
MariusVanDerWijden Dec 8, 2017
45b615a
More output to debug
MariusVanDerWijden Dec 8, 2017
b37dca0
Fixed build error
MariusVanDerWijden Dec 8, 2017
a28bcda
fixed simulation
MariusVanDerWijden Dec 21, 2017
09eb4e3
rewrote guard
MariusVanDerWijden Dec 21, 2017
4ae0e20
fixed dag generation error
MariusVanDerWijden Dec 27, 2017
335cfc2
Merge branch 'branch_#359' of https://github.com/MariusVanDerWijden/e…
MariusVanDerWijden Dec 27, 2017
6a251e6
removed unnecessary print
MariusVanDerWijden Dec 28, 2017
4d60935
Merge branch 'master' into branch_#359
MariusVanDerWijden Dec 28, 2017
274500d
Merge branch 'master' into branch_#359
MariusVanDerWijden Dec 28, 2017
7c0aecf
fixed merge error
MariusVanDerWijden Dec 28, 2017
c77a0ca
removed unnecessary cnote
MariusVanDerWijden Dec 28, 2017
8aba73b
changed requested changes
MariusVanDerWijden Dec 28, 2017
a8facaf
changed hwmon breaking bug
MariusVanDerWijden Dec 28, 2017
5a42eb7
use std::vector instead of raw array
MariusVanDerWijden Dec 28, 2017
6b00137
fixed init of m_light
MariusVanDerWijden Dec 28, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 65 additions & 52 deletions libethash-cuda/CUDAMiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,75 +113,89 @@ void CUDAMiner::report(uint64_t _nonce)
void CUDAMiner::kickOff()
{
m_hook->reset();
startWorking();
}

void CUDAMiner::workLoop()
bool CUDAMiner::init(const h256& seed)
{
// take local copy of work since it may end up being overwritten by kickOff/pause.
try {
WorkPackage w = work();
if (!w)
return;
unsigned device = s_devices[index] > -1 ? s_devices[index] : index;

if (!m_miner || m_minerSeed != w.seed)
{
unsigned device = s_devices[index] > -1 ? s_devices[index] : index;
cnote << "Initialising miner...";
m_minerSeed = seed;

if(!m_miner)
m_miner = new ethash_cuda_miner;

if (s_dagLoadMode == DAG_LOAD_MODE_SEQUENTIAL)
EthashAux::LightType light;
light = EthashAux::light(seed);
bytesConstRef lightData = light->data();

m_miner->init(light->light, lightData.data(), lightData.size(),
device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), s_dagInHostMemory);
s_dagLoadIndex++;

if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE)
{
if (s_dagLoadIndex >= s_numInstances && s_dagInHostMemory)
{
while (s_dagLoadIndex < index) {
this_thread::sleep_for(chrono::seconds(1));
}
// all devices have loaded DAG, we can free now
delete[] s_dagInHostMemory;
s_dagInHostMemory = NULL;
cout << "Freeing DAG from host" << endl;
}
else if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE)
}
return true;
}
catch (std::runtime_error const& _e)
{
delete m_miner;
m_miner = nullptr;
cwarn << "Error CUDA mining: " << _e.what();
return false;
}
}

void CUDAMiner::workLoop()
{
WorkPackage current;
current.header = h256{1u};
current.seed = h256{1u};
try
{
while(true)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When this loop ends?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, fixed it

{
const WorkPackage w = work();

if(!m_miner || current.header != w.header || current.seed != w.seed)
{
if (device != s_dagCreateDevice)
if(!w || w.header == h256())
{
// wait until DAG is created on selected device
while (s_dagInHostMemory == NULL) {
this_thread::sleep_for(chrono::seconds(1));
}
cnote << "No work. Pause for 3 s.";
std::this_thread::sleep_for(std::chrono::seconds(3));
continue;
}
else

//cnote << "set work; seed: " << "#" + w.seed.hex().substr(0, 8) + ", target: " << "#" + w.boundary.hex().substr(0, 12);
if (!m_miner || current.seed != w.seed)
{
// reset load index
s_dagLoadIndex = 0;
if(!init(w.seed))
break;
}
current = w;
}

cnote << "Initialising miner...";
m_minerSeed = w.seed;

delete m_miner;
m_miner = new ethash_cuda_miner;

EthashAux::LightType light;
light = EthashAux::light(w.seed);
//bytesConstRef dagData = dag->data();
bytesConstRef lightData = light->data();

m_miner->init(light->light, lightData.data(), lightData.size(), device, (s_dagLoadMode == DAG_LOAD_MODE_SINGLE), s_dagInHostMemory);
s_dagLoadIndex++;

if (s_dagLoadMode == DAG_LOAD_MODE_SINGLE)
uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)current.boundary >> 192);
uint64_t startN = current.startNonce;
if (current.exSizeBits >= 0)
startN = current.startNonce | ((uint64_t)index << (64 - 4 - current.exSizeBits)); // this can support up to 16 devices
m_miner->search(current.header.data(), upper64OfBoundary, *m_hook, (current.exSizeBits >= 0), startN);

// Check if we should stop.
if (shouldStop())
{
if (s_dagLoadIndex >= s_numInstances && s_dagInHostMemory)
{
// all devices have loaded DAG, we can free now
delete[] s_dagInHostMemory;
s_dagInHostMemory = NULL;

cout << "Freeing DAG from host" << endl;
}
break;
}
}

uint64_t upper64OfBoundary = (uint64_t)(u64)((u256)w.boundary >> 192);
uint64_t startN = w.startNonce;
if (w.exSizeBits >= 0)
startN = w.startNonce | ((uint64_t)index << (64 - 4 - w.exSizeBits)); // this can support up to 16 devices
m_miner->search(w.header.data(), upper64OfBoundary, *m_hook, (w.exSizeBits >= 0), startN);
}
catch (std::runtime_error const& _e)
{
Expand All @@ -194,7 +208,6 @@ void CUDAMiner::workLoop()
void CUDAMiner::pause()
{
m_hook->abort();
stopWorking();
}

std::string CUDAMiner::platformInfo()
Expand Down
3 changes: 3 additions & 0 deletions libethash-cuda/CUDAMiner.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class EthashCUDAHook;
private:
void workLoop() override;
void report(uint64_t _nonce);
void initDevice(WorkPackage w);

bool init(const h256& seed);

EthashCUDAHook* m_hook = nullptr;
ethash_cuda_miner* m_miner = nullptr;
Expand Down
120 changes: 74 additions & 46 deletions libethash-cuda/ethash_cuda_miner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ ethash_cuda_miner::search_hook::~search_hook() {}

ethash_cuda_miner::ethash_cuda_miner()
{
m_light.resize(getNumDevices());
}

std::string ethash_cuda_miner::platform_info(unsigned _deviceId)
Expand Down Expand Up @@ -204,82 +205,109 @@ bool ethash_cuda_miner::init(ethash_light_t _light, uint8_t const* _lightData, u
{
try
{
int device_count = getNumDevices();
unsigned device_count = getNumDevices();

if (device_count == 0)
return false;

// use selected device
m_device_num = std::min<int>((int)_deviceId, device_count - 1);

m_device_num = _deviceId < device_count -1 ? _deviceId : device_count - 1;
nvmlh = wrap_nvml_create();

cudaDeviceProp device_props;
CUDA_SAFE_CALL(cudaGetDeviceProperties(&device_props, m_device_num));

cudalog << "Using device: " << device_props.name << " (Compute " + to_string(device_props.major) + "." + to_string(device_props.minor) + ")";

CUDA_SAFE_CALL(cudaSetDevice(m_device_num));
CUDA_SAFE_CALL(cudaDeviceReset());
CUDA_SAFE_CALL(cudaSetDeviceFlags(s_scheduleFlag));
CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));

m_search_buf = new volatile uint32_t *[s_numStreams];
m_streams = new cudaStream_t[s_numStreams];

uint64_t dagSize = ethash_get_datasize(_light->block_number);
uint32_t dagSize128 = (unsigned)(dagSize / ETHASH_MIX_BYTES);
uint32_t lightSize64 = (unsigned)(_lightSize / sizeof(node));



CUDA_SAFE_CALL(cudaSetDevice(m_device_num));
cudalog << "Set Device to current";
if(dagSize128 != m_dag_size || !m_dag)
{
//We need to reset the device and recreate the dag
cudalog << "Resetting device";
CUDA_SAFE_CALL(cudaDeviceReset());
CUDA_SAFE_CALL(cudaSetDeviceFlags(s_scheduleFlag));
CUDA_SAFE_CALL(cudaDeviceSetCacheConfig(cudaFuncCachePreferL1));
//We need to reset the light and the Dag for the following code to reallocate
//since cudaDeviceReset() free's all previous allocated memory
m_light[m_device_num] = nullptr;
m_dag = nullptr;
}
// create buffer for cache
hash64_t * light = NULL;
hash128_t * dag = m_dag;
hash64_t * light = m_light[m_device_num];

if (!hostDAG)
{
if(!light){
cudalog << "Allocating light with size: " << _lightSize;
CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast<void**>(&light), _lightSize));
// copy dag cache to CPU.
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(light), _lightData, _lightSize, cudaMemcpyHostToDevice));
}

// create buffer for dag
hash128_t * dag;
CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast<void**>(&dag), dagSize));
// copy lightData to device
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(light), _lightData, _lightSize, cudaMemcpyHostToDevice));
m_light[m_device_num] = light;

// create mining buffers
for (unsigned i = 0; i != s_numStreams; ++i)
if(dagSize128 != m_dag_size || !dag) // create buffer for dag
CUDA_SAFE_CALL(cudaMalloc(reinterpret_cast<void**>(&dag), dagSize));

set_constants(dag, dagSize128, light, lightSize64); //in ethash_cuda_miner_kernel.cu

if(dagSize128 != m_dag_size || !dag)
{
CUDA_SAFE_CALL(cudaMallocHost(&m_search_buf[i], SEARCH_RESULT_BUFFER_SIZE * sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaStreamCreate(&m_streams[i]));
}
set_constants(dag, dagSize128, light, lightSize64);
memset(&m_current_header, 0, sizeof(hash32_t));
m_current_target = 0;
m_current_nonce = 0;
m_current_index = 0;

m_sharedBytes = device_props.major * 100 < SHUFFLE_MIN_VER ? (64 * s_blockSize) / 8 : 0 ;
// create mining buffers
cudalog << "Generating mining buffers"; //TODO whats up with this?
for (unsigned i = 0; i != s_numStreams; ++i)
{
CUDA_SAFE_CALL(cudaMallocHost(&m_search_buf[i], SEARCH_RESULT_BUFFER_SIZE * sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaStreamCreate(&m_streams[i]));
}

memset(&m_current_header, 0, sizeof(hash32_t));
m_current_target = 0;
m_current_nonce = 0;
m_current_index = 0;

if (!hostDAG)
{
cudalog << "Generating DAG for GPU #" << m_device_num;
ethash_generate_dag(dagSize, s_gridSize, s_blockSize, m_streams[0], m_device_num);
m_sharedBytes = device_props.major * 100 < SHUFFLE_MIN_VER ? (64 * s_blockSize) / 8 : 0 ;

if (_cpyToHost)
if (!hostDAG)
{
uint8_t* memoryDAG = new uint8_t[dagSize];
cudalog << "Copying DAG from GPU #" << m_device_num << " to host";
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(memoryDAG), dag, dagSize, cudaMemcpyDeviceToHost));

hostDAG = memoryDAG;
if(m_device_num == 0 || !_cpyToHost){ //if !cpyToHost -> All devices shall generate their DAG
cudalog << "Generating DAG for GPU #" << m_device_num << " with dagSize: "
<< dagSize <<" gridSize: " << s_gridSize << " &m_streams[0]: " << &m_streams[0];
ethash_generate_dag(dagSize, s_gridSize, s_blockSize, m_streams[0], m_device_num);

if (_cpyToHost)
{
uint8_t* memoryDAG = new uint8_t[dagSize];
cudalog << "Copying DAG from GPU #" << m_device_num << " to host";
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(memoryDAG), dag, dagSize, cudaMemcpyDeviceToHost));

hostDAG = memoryDAG;
}
}else{
while(!hostDAG)
this_thread::sleep_for(chrono::milliseconds(100));
goto cpyDag;
}
}
else
{
cpyDag:
cudalog << "Copying DAG from host to GPU #" << m_device_num;
const void* hdag = (const void*)hostDAG;
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(dag), hdag, dagSize, cudaMemcpyHostToDevice));
}
}
else
{
cudalog << "Copying DAG from host to GPU #" << m_device_num;
const void* hdag = (const void*)hostDAG;
CUDA_SAFE_CALL(cudaMemcpy(reinterpret_cast<void*>(dag), hdag, dagSize, cudaMemcpyHostToDevice));
}


m_dag = dag;
m_dag_size = dagSize128;
return true;
}
catch (runtime_error const&)
Expand Down
9 changes: 8 additions & 1 deletion libethash-cuda/ethash_cuda_miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,14 @@ class ethash_cuda_miner
uint64_t m_starting_nonce;
uint64_t m_current_index;
uint32_t m_sharedBytes;
int m_device_num;

///Constants on GPU
hash128_t* m_dag = nullptr;
std::vector<hash64_t*> m_light;
uint32_t m_dag_size = -1;
uint32_t m_device_num;



volatile uint32_t ** m_search_buf;
cudaStream_t * m_streams;
Expand Down
3 changes: 2 additions & 1 deletion libethash-cuda/ethash_cuda_miner_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ ethash_calculate_dag_item(uint32_t start)
dag_node.uint4s[w] = fnv4(dag_node.uint4s[w], s4);
}
}

}


#endif
}
SHA3_512(dag_node.uint2s);
Expand Down