Functions
__global__ void	transSpin_gpu (Integer_t dResPtr, Integer_t const LD, Integer_t const N, Integer_t const dloc, Integer_t const SecDim, Integer_t const representative)

int	main (int argc, char **argv)

Function Documentation

◆ main()

int main	(	int	argc,
		char **	argv
	)

                                {
    if(argc != 3) {
        std::cerr << "Usage: Bit_Compare(Integer_t const N, Integer_t const dloc)" << std::endl;
        std::exit(EX_USAGE);
    }
    Integer_t N    = (Integer_t)std::atoi(argv[1]);
    Integer_t dloc = (Integer_t)std::atoi(argv[2]);
 
    magma_init();
    magma_queue_t queue = NULL;
    magma_int_t   dev   = 0;
    magma_getdevice(&dev);
    magma_queue_create(dev, &queue);
 
    Integer_t const momentum = 0;
    TransSector     Sector;
    Sector.initialize(N, momentum, dloc);
    Sector.copyToGPU(queue);
 
    constexpr Integer_t   GPU_UNIT = 32;
    Integer_t const       LDT      = magma_roundup(Sector.dim(), GPU_UNIT);
    matrix_gpu<Integer_t> dRes(LDT, N);
 
    void (*funcPtr)(Integer_t*, Integer_t const, Integer_t const, Integer_t const, Integer_t const,
                    Integer_t const*);
    funcPtr = transSpin_gpu;
    struct cudaFuncAttributes attr;
    cudaFuncGetAttributes(&attr, funcPtr);
    Integer_t nThread = (Integer_t)sqrt(attr.maxThreadsPerBlock);
    Integer_t nBlock  = (Integer_t)Sector.dim() / nThread;
    if(Sector.dim() > nBlock * nThread) nBlock += 1;
    GPUconfig conf(dim3(nBlock, nBlock, 1), dim3(nThread, nThread, 1), 0, queue);
    transSpin_gpuMatrixElementsInSector<<<conf.dimGrid(), conf.dimBlock(), conf.shared(),
                                          conf.stream()>>>(dRes.ptr(), dRes.LD(), N, dloc,
                                                           Sector.dim(), Sector.rep_gpu());
    cudaDeviceSynchronize();
 
    matrix<Integer_t> Res(Sector.dim(), N);
    magma_getmatrix(Sector.dim(), N, sizeof(Integer_t), dRes.ptr(), dRes.LD(), &*Res.begin(),
                    Sector.dim(), queue);
 
    bool      flag;
    Integer_t itemp;
    for(Integer_t n = 0; n < Sector.dim(); ++n) {
        flag = true;
        for(Integer_t trans = 0; trans < N; ++trans) {
            if(Res.at(n, trans) != (itemp = transSpin(Sector.representative(n), trans, dloc, N))) {
                flag = false;
                std::cerr << "(" << n << "," << trans << ") transSpin(" << itemp
                          << ") != transSpin_gpu(" << Res.at(n, trans) << ")" << std::endl;
                // break;
            };
        }
        if(flag == false)
            continue;
        else
            std::cout << "(OK, n=" << n << ") transSpin = transSpin_gpu" << std::endl;
    }
 
    cudaDeviceSynchronize();
    return 0;
}

◆ transSpin_gpu()

__global__ void transSpin_gpu	(	Integer_t *	dResPtr,
		Integer_t const	LD,
		Integer_t const	N,
		Integer_t const	dloc,
		Integer_t const	SecDim,
		Integer_t const *	representative
	)

                                                               {
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    int idy = blockIdx.y * blockDim.y + threadIdx.y;
 
    if(idx >= SecDim || idy >= N) return;
    dResPtr[idx + LD * idy] = transSpin(representative[idx], idy, dloc, N);
}

Functions

Function Documentation

◆ main()

◆ transSpin_gpu()