StatMech
Loading...
Searching...
No Matches
transSpin_Compare.cu File Reference

Functions

__global__ void transSpin_gpu (Integer_t *dResPtr, Integer_t const LD, Integer_t const N, Integer_t const dloc, Integer_t const SecDim, Integer_t const *representative)
 
int main (int argc, char **argv)
 

Function Documentation

◆ main()

int main ( int  argc,
char **  argv 
)
20 {
21 if(argc != 3) {
22 std::cerr << "Usage: Bit_Compare(Integer_t const N, Integer_t const dloc)" << std::endl;
23 std::exit(EX_USAGE);
24 }
25 Integer_t N = (Integer_t)std::atoi(argv[1]);
26 Integer_t dloc = (Integer_t)std::atoi(argv[2]);
27
28 magma_init();
29 magma_queue_t queue = NULL;
30 magma_int_t dev = 0;
31 magma_getdevice(&dev);
32 magma_queue_create(dev, &queue);
33
34 Integer_t const momentum = 0;
36 Sector.initialize(N, momentum, dloc);
37 Sector.copyToGPU(queue);
38
39 constexpr Integer_t GPU_UNIT = 32;
40 Integer_t const LDT = magma_roundup(Sector.dim(), GPU_UNIT);
41 matrix_gpu<Integer_t> dRes(LDT, N);
42
43 void (*funcPtr)(Integer_t*, Integer_t const, Integer_t const, Integer_t const, Integer_t const,
44 Integer_t const*);
45 funcPtr = transSpin_gpu;
46 struct cudaFuncAttributes attr;
47 cudaFuncGetAttributes(&attr, funcPtr);
48 Integer_t nThread = (Integer_t)sqrt(attr.maxThreadsPerBlock);
50 if(Sector.dim() > nBlock * nThread) nBlock += 1;
51 GPUconfig conf(dim3(nBlock, nBlock, 1), dim3(nThread, nThread, 1), 0, queue);
52 transSpin_gpuMatrixElementsInSector<<<conf.dimGrid(), conf.dimBlock(), conf.shared(),
53 conf.stream()>>>(dRes.ptr(), dRes.LD(), N, dloc,
54 Sector.dim(), Sector.rep_gpu());
55 cudaDeviceSynchronize();
56
57 matrix<Integer_t> Res(Sector.dim(), N);
58 magma_getmatrix(Sector.dim(), N, sizeof(Integer_t), dRes.ptr(), dRes.LD(), &*Res.begin(),
59 Sector.dim(), queue);
60
61 bool flag;
62 Integer_t itemp;
63 for(Integer_t n = 0; n < Sector.dim(); ++n) {
64 flag = true;
65 for(Integer_t trans = 0; trans < N; ++trans) {
66 if(Res.at(n, trans) != (itemp = transSpin(Sector.representative(n), trans, dloc, N))) {
67 flag = false;
68 std::cerr << "(" << n << "," << trans << ") transSpin(" << itemp
69 << ") != transSpin_gpu(" << Res.at(n, trans) << ")" << std::endl;
70 // break;
71 };
72 }
73 if(flag == false)
74 continue;
75 else
76 std::cout << "(OK, n=" << n << ") transSpin = transSpin_gpu" << std::endl;
77 }
78
79 cudaDeviceSynchronize();
80 return 0;
81}
std::vector< TransSector > Sector(n_max+1)
Definition mytypes.hpp:272
Translation invariant sector of a many-body Hilbert space.
Definition TransSector.hpp:19
struct cudaFuncAttributes attr
Definition getAttributesOfMatrixElementsInSector.cpp:2
Integer_t const nBlock
Definition getAttributesOfMatrixElementsInSector.cpp:5
Integer_t const nThread
Definition getAttributesOfMatrixElementsInSector.cpp:4
MKL_INT Integer_t
Definition mytypes.hpp:359
__global__ void transSpin_gpu(Integer_t *dResPtr, Integer_t const LD, Integer_t const N, Integer_t const dloc, Integer_t const SecDim, Integer_t const *representative)
Definition transSpin_Compare.cu:10

◆ transSpin_gpu()

__global__ void transSpin_gpu ( Integer_t dResPtr,
Integer_t const  LD,
Integer_t const  N,
Integer_t const  dloc,
Integer_t const  SecDim,
Integer_t const *  representative 
)
12 {
13 int idx = blockIdx.x * blockDim.x + threadIdx.x;
14 int idy = blockIdx.y * blockDim.y + threadIdx.y;
15
16 if(idx >= SecDim || idy >= N) return;
17 dResPtr[idx + LD * idy] = transSpin(representative[idx], idy, dloc, N);
18}