Skip to content

Commit 61faa15

Browse files
authored
Merge pull request #2 from fancyIX/fancyIX/lyra2II
Use half LDS with help of local memory
2 parents 09a1e20 + 4767c43 commit 61faa15

17 files changed

+1050
-1081
lines changed

Makefile.am

+4-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ ccminer_SOURCES = elist.h miner.h compat.h \
2323
api.cpp hashlog.cpp nvml.cpp stats.cpp sysinfos.cpp cuda.cpp \
2424
nvsettings.cpp \
2525
equi/equi-stratum.cpp equi/equi.cpp equi/blake2/blake2bx.cpp \
26-
equi/equihash.cpp equi/cuda_equi.cu \
2726
allium.cu \
2827
heavy/heavy.cu \
2928
heavy/cuda_blake512.cu heavy/cuda_blake512.h \
@@ -113,8 +112,10 @@ endif
113112
ccminer_LDADD += -lcuda
114113

115114
nvcc_ARCH :=
115+
#nvcc_ARCH += -gencode=arch=compute_86,code=\"sm_86,compute_86\"
116+
nvcc_ARCH += -gencode=arch=compute_75,code=\"sm_75,compute_75\"
116117
nvcc_ARCH += -gencode=arch=compute_61,code=\"sm_61,compute_61\"
117-
#nvcc_ARCH += -gencode=arch=compute_52,code=\"sm_52,compute_52\"
118+
nvcc_ARCH += -gencode=arch=compute_52,code=\"sm_52,compute_52\"
118119
#nvcc_ARCH += -gencode=arch=compute_50,code=\"sm_50,compute_50\"
119120
#nvcc_ARCH += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
120121
#nvcc_ARCH += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
@@ -127,7 +128,7 @@ nvcc_FLAGS += $(JANSSON_INCLUDES) --ptxas-options="-v"
127128
$(NVCC) $(nvcc_FLAGS) --maxrregcount=128 -o $@ -c $<
128129

129130
lyra2/cuda_lyra2.o: lyra2/cuda_lyra2.cu
130-
$(NVCC) $(nvcc_FLAGS) --maxrregcount=255 -o $@ -c $<
131+
$(NVCC) $(nvcc_FLAGS) --maxrregcount=128 -o $@ -c $<
131132

132133
Algo256/blake256.o: Algo256/blake256.cu
133134
$(NVCC) $(nvcc_FLAGS) --maxrregcount=64 -o $@ -c $<

README.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
ccminer-fancyIX 0.1.0 (Jan. 2018) "lyra2v2 and keccak improvements"
2+
ccminer-fancyIX 0.2.0 (Jan. 2018) "lyra2v2 and keccak improvements"
33
---------------------------------------------------------------
44

55
***************************************************************

allium.cu

+24-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ extern "C" {
1212

1313
static uint64_t* d_hash[MAX_GPUS];
1414
static uint64_t* d_matrix[MAX_GPUS];
15+
static uint64_t* g_pad[MAX_GPUS];
1516

1617
extern void blake256_cpu_init(int thr_id, uint32_t threads);
1718
extern void blake256_cpu_setBlock_80(uint32_t *pdata);
@@ -27,7 +28,9 @@ extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNon
2728
extern void skein256_cpu_init(int thr_id, uint32_t threads);
2829

2930
extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix);
30-
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti);
31+
extern void lyra2_cpu_init_high_end(int thr_id, uint32_t threads, uint64_t *g_pad);
32+
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti, bool high_end);
33+
extern void lyra2_cpu_hash_32_fancyIX(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, uint64_t *g_pad, bool gtx750ti, bool high_end);
3134

3235
extern void groestl256_cpu_init(int thr_id, uint32_t threads);
3336
extern void groestl256_cpu_free(int thr_id);
@@ -89,6 +92,7 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
8992
ptarget[7] = 0x0400;
9093

9194
static __thread bool gtx750ti;
95+
static __thread bool high_end;
9296
if (!init[thr_id])
9397
{
9498
int dev_id = device_map[thr_id];
@@ -106,6 +110,15 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
106110
if (strstr(props.name, "750 Ti")) gtx750ti = true;
107111
else gtx750ti = false;
108112

113+
if (strstr(props.name, "1080") ||
114+
strstr(props.name, "1070") ||
115+
strstr(props.name, "2080") ||
116+
strstr(props.name, "2070") ||
117+
strstr(props.name, "3080") ||
118+
strstr(props.name, "3070") ||
119+
strstr(props.name, "3060")) high_end = true;
120+
else high_end = false;
121+
109122
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
110123

111124
blake256_cpu_init(thr_id, throughput);
@@ -119,6 +132,11 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
119132
size_t matrix_sz = device_sm[dev_id] > 500 ? sizeof(uint64_t) * 4 * 4 : sizeof(uint64_t) * 8 * 8 * 3 * 4;
120133
CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], matrix_sz * throughput));
121134
lyra2_cpu_init(thr_id, throughput, d_matrix[thr_id]);
135+
if (high_end) {
136+
size_t pad_sz = sizeof(uint64_t) * 8 * 8 * 3 * 4;
137+
CUDA_SAFE_CALL(cudaMalloc(&g_pad[thr_id], pad_sz * throughput));
138+
lyra2_cpu_init_high_end(thr_id, throughput, g_pad[thr_id]);
139+
}
122140
}
123141

124142
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));
@@ -138,11 +156,11 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
138156

139157
blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
140158

141-
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
159+
lyra2_cpu_hash_32_fancyIX(thr_id, throughput, pdata[19], d_hash[thr_id], g_pad[thr_id], gtx750ti, high_end);
142160

143161
cubehash256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
144162

145-
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
163+
lyra2_cpu_hash_32_fancyIX(thr_id, throughput, pdata[19], d_hash[thr_id], g_pad[thr_id], gtx750ti, high_end);
146164

147165
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
148166

@@ -204,6 +222,9 @@ extern "C" void free_allium(int thr_id)
204222

205223
cudaFree(d_hash[thr_id]);
206224
cudaFree(d_matrix[thr_id]);
225+
if (g_pad[thr_id] != NULL) {
226+
cudaFree(g_pad[thr_id]);
227+
}
207228

208229
//keccak256_sm3_free(thr_id);
209230
groestl256_cpu_free(thr_id);

bench.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ void algo_free_all(int thr_id)
6060
free_cryptonight(thr_id);
6161
free_decred(thr_id);
6262
free_deep(thr_id);
63-
free_equihash(thr_id);
6463
free_keccak256(thr_id);
6564
free_fresh(thr_id);
6665
free_fugue256(thr_id);
@@ -105,8 +104,6 @@ void algo_free_all(int thr_id)
105104
free_x15(thr_id);
106105
free_x17(thr_id);
107106
free_zr5(thr_id);
108-
free_scrypt(thr_id);
109-
free_scrypt_jane(thr_id);
110107
free_timetravel(thr_id);
111108
free_tribus(thr_id);
112109
free_bitcore(thr_id);

build.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@ rm -f config.status
1313
# CFLAGS="-O2" ./configure
1414
./configure.sh
1515

16-
make -j 4
16+
make -j 8

ccminer.cpp

-11
Original file line numberDiff line numberDiff line change
@@ -2350,9 +2350,6 @@ static void *miner_thread(void *userdata)
23502350
case ALGO_DEEP:
23512351
rc = scanhash_deep(thr_id, &work, max_nonce, &hashes_done);
23522352
break;
2353-
case ALGO_EQUIHASH:
2354-
rc = scanhash_equihash(thr_id, &work, max_nonce, &hashes_done);
2355-
break;
23562353
case ALGO_FRESH:
23572354
rc = scanhash_fresh(thr_id, &work, max_nonce, &hashes_done);
23582355
break;
@@ -2433,14 +2430,6 @@ static void *miner_thread(void *userdata)
24332430
case ALGO_POLYTIMOS:
24342431
rc = scanhash_polytimos(thr_id, &work, max_nonce, &hashes_done);
24352432
break;
2436-
case ALGO_SCRYPT:
2437-
rc = scanhash_scrypt(thr_id, &work, max_nonce, &hashes_done,
2438-
NULL, &tv_start, &tv_end);
2439-
break;
2440-
case ALGO_SCRYPT_JANE:
2441-
rc = scanhash_scrypt_jane(thr_id, &work, max_nonce, &hashes_done,
2442-
NULL, &tv_start, &tv_end);
2443-
break;
24442433
case ALGO_SKEIN:
24452434
rc = scanhash_skeincoin(thr_id, &work, max_nonce, &hashes_done);
24462435
break;

ccminer.vcxproj

+4-33
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
</PropertyGroup>
4040
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
4141
<ImportGroup Label="ExtensionSettings">
42-
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.0.props" />
42+
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.1.props" />
4343
</ImportGroup>
4444
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
4545
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
@@ -155,7 +155,7 @@
155155
<MaxRegCount>80</MaxRegCount>
156156
<PtxAsOptionV>true</PtxAsOptionV>
157157
<Keep>true</Keep>
158-
<CodeGeneration>compute_50,sm_50;compute_52,sm_52;compute_30,sm_30</CodeGeneration>
158+
<CodeGeneration>compute_75,sm_75;compute_61,sm_61;compute_52,sm_52</CodeGeneration>
159159
<AdditionalOptions>--ptxas-options="-O2" --Wno-deprecated-gpu-targets %(AdditionalOptions)</AdditionalOptions>
160160
<Optimization>O2</Optimization>
161161
</CudaCompile>
@@ -198,7 +198,7 @@
198198
<MaxRegCount>80</MaxRegCount>
199199
<PtxAsOptionV>true</PtxAsOptionV>
200200
<Keep>true</Keep>
201-
<CodeGeneration>compute_52,sm_52;compute_50,sm_50;compute_35,sm_35;compute_30,sm_30</CodeGeneration>
201+
<CodeGeneration>compute_75,sm_75;compute_61,sm_61;compute_52,sm_52</CodeGeneration>
202202
<Include>$(NVTOOLSEXT_PATH)\include</Include>
203203
<Optimization>O3</Optimization>
204204
<TargetMachinePlatform>64</TargetMachinePlatform>
@@ -241,7 +241,6 @@
241241
</ClCompile>
242242
<ClCompile Include="equi\equi-stratum.cpp" />
243243
<ClCompile Include="equi\equi.cpp" />
244-
<ClCompile Include="equi\equihash.cpp" />
245244
<ClCompile Include="nvapi.cpp" />
246245
<ClCompile Include="nvsettings.cpp" />
247246
<ClCompile Include="pools.cpp" />
@@ -263,7 +262,6 @@
263262
<ClCompile Include="lyra2\Sponge.c" />
264263
<ClCompile Include="lyra2\Lyra2Z.c" />
265264
<ClInclude Include="equi\eqcuda.hpp" />
266-
<ClInclude Include="equi\equihash.h" />
267265
<ClInclude Include="neoscrypt\neoscrypt.h" />
268266
<ClCompile Include="neoscrypt\neoscrypt.cpp" />
269267
<ClCompile Include="neoscrypt\neoscrypt-cpu.c" />
@@ -294,17 +292,9 @@
294292
<CudaCompile Include="crypto\wildkeccak.cu">
295293
<MaxRegCount>128</MaxRegCount>
296294
</CudaCompile>
297-
<CudaCompile Include="equi\cuda_equi.cu">
298-
<CodeGeneration>compute_52,sm_52;compute_50,sm_50;compute_30,sm_30</CodeGeneration>
299-
<AdditionalOptions> -Xptxas -dlcm=ca -Xptxas -dscm=cs %(AdditionalOptions)</AdditionalOptions>
300-
<MaxRegCount>0</MaxRegCount>
301-
<CodeGeneration Condition="'$(Configuration)|$(Platform)'=='Release|x64'">compute_61,sm_61;compute_52,sm_52;compute_50,sm_50;compute_30,sm_30</CodeGeneration>
302-
</CudaCompile>
303295
<CudaCompile Include="neoscrypt\cuda_neoscrypt.cu">
304296
<MaxRegCount>160</MaxRegCount>
305297
</CudaCompile>
306-
<ClCompile Include="scrypt-jane.cpp" />
307-
<ClCompile Include="scrypt.cpp" />
308298
<ClCompile Include="sia\sia-rpc.cpp" />
309299
<ClCompile Include="skein2.cpp" />
310300
<ClCompile Include="sph\aes_helper.c" />
@@ -411,25 +401,6 @@
411401
<CudaCompile Include="scrypt\keccak.cu" />
412402
<CudaCompile Include="scrypt\sha256.cu" />
413403
<CudaCompile Include="scrypt\salsa_kernel.cu">
414-
<CodeGeneration>compute_30,sm_30</CodeGeneration>
415-
</CudaCompile>
416-
<CudaCompile Include="scrypt\fermi_kernel.cu">
417-
<CodeGeneration>compute_30,sm_30</CodeGeneration>
418-
</CudaCompile>
419-
<CudaCompile Include="scrypt\kepler_kernel.cu">
420-
<CodeGeneration>compute_30,sm_30</CodeGeneration>
421-
</CudaCompile>
422-
<CudaCompile Include="scrypt\nv_kernel.cu">
423-
<CodeGeneration>compute_30,sm_30</CodeGeneration>
424-
</CudaCompile>
425-
<CudaCompile Include="scrypt\nv_kernel2.cu">
426-
<CodeGeneration>compute_35,sm_35;compute_50,sm_50;compute_52,sm_52</CodeGeneration>
427-
</CudaCompile>
428-
<CudaCompile Include="scrypt\test_kernel.cu">
429-
<CodeGeneration>compute_30,sm_30</CodeGeneration>
430-
</CudaCompile>
431-
<CudaCompile Include="scrypt\titan_kernel.cu">
432-
<CodeGeneration>compute_35,sm_35;compute_50,sm_50</CodeGeneration>
433404
</CudaCompile>
434405
<CudaCompile Include="sha256\cuda_sha256d.cu" />
435406
<CudaCompile Include="sha256\sha256d.cu" />
@@ -607,7 +578,7 @@
607578
</ItemGroup>
608579
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
609580
<ImportGroup Label="ExtensionTargets">
610-
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 9.0.targets" />
581+
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 11.1.targets" />
611582
</ImportGroup>
612583
<!-- Copy the required dlls -->
613584
<Target Name="AfterBuild">

compat/ccminer-config.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@
164164
#define PACKAGE_URL "http://github.com/tpruvot/ccminer"
165165

166166
/* Define to the version of this package. */
167-
#define PACKAGE_VERSION "0.1.0"
167+
#define PACKAGE_VERSION "0.2.0"
168168

169169
/* If using the C implementation of alloca, define if you know the
170170
direction of stack growth for your system; otherwise it will be

configure.ac

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
AC_INIT([ccminer-fancyIX], [0.1.0], [], [ccminer-fancyIX], [http://github.com/tpruvot/ccminer])
1+
AC_INIT([ccminer-fancyIX], [0.2.0], [], [ccminer-fancyIX], [https://github.com/fancyIX/ccminer])
22

33
AC_PREREQ([2.59c])
44
AC_CANONICAL_SYSTEM

0 commit comments

Comments
 (0)