Functions
__global__ void	EnergyStddev_kernel (Integer_t dim, Real_t res_d, Complex_t Hamiltonian_d, Integer_t LDT)

void	EnergyStddev (std::vector< double > &res, matrix_gpu< Complex_t > const &Hamiltonian_d, TransSector const &Sector, GPUconfig const &GPUconf)

void __global__	StatePreparation (double time, Integer_t Nstates, Integer_t dIndex, Integer_t dim_sub, Complex_t dState, Integer_t LDS, Complex_t dh_tot, Integer_t LDH, double dEigenEnergy)

void __global__	InfiniteTimeAverage (Integer_t const SizeDegen, Integer_t const dDegeneracy, Integer_t const dim_sub, Complex_t dOp_tot, Integer_t const LDH)

void	EnergyStddev (std::vector< double > &res, matrix< Complex_t > const &Hamiltonian, TransSector const &Sector, void *GPUconf=nullptr)

void	StatePreparation (double time, std::vector< Integer_t > &Index, Integer_t dim_sub, matrix< Complex_t > &State, matrix< Complex_t > &h_tot, std::vector< double > &eigenEnergy)

void	InfiniteTimeAverage (std::vector< Integer_t > const &Degeneracy, Integer_t const dim_sub, matrix< Complex_t > &Op_tot)

int	main (int argc, char **argv)

Function Documentation

◆ EnergyStddev() [1/2]

void EnergyStddev	(	std::vector< double > &	res,
		matrix< Complex_t > const &	Hamiltonian,
		TransSector const &	Sector,
		void *	GPUconf = `nullptr`
	)

                                                                      {
    Integer_t dim = SectorDimension(Sector);
    res.resize(dim);
    std::fill(res.begin(), res.end(), 0.0);
 
    #pragma omp parallel for
    for(size_t j = 0; j < dim; j++) {
        for(size_t k = 0; k < j; k++)
            res.at(j) += real(conj(Hamiltonian.at(j, k)) * Hamiltonian.at(j, k));
        for(size_t k = j + 1; k < dim; k++)
            res.at(j) += real(conj(Hamiltonian.at(j, k)) * Hamiltonian.at(j, k));
        res.at(j) = sqrt(res.at(j));
    }
}

◆ EnergyStddev() [2/2]

void EnergyStddev	(	std::vector< double > &	res,
		matrix_gpu< Complex_t > const &	Hamiltonian_d,
		TransSector const &	Sector,
		GPUconfig const &	GPUconf
	)

                                                                       {
    Integer_t dim = SectorDimension(Sector);
    res.resize(dim);
 
    matrix_gpu<Real_t> Intermediate_d(Hamiltonian_d.LD(), dim);
    EnergyStddev_kernel<<<GPUconf.dimGrid(), GPUconf.dimBlock(), GPUconf.shared(),
                          GPUconf.stream()>>>(dim, Intermediate_d.ptr(), Hamiltonian_d.ptr(),
                                              Hamiltonian_d.LD());
 
    std::vector<Real_t> ones(dim, 1.0);
    matrix_gpu<Real_t>  ones_d(dim, 1);
    matrix_gpu<Real_t>  res_d(dim, 1);
    magma_setvector(dim, sizeof(Real_t), &*ones.begin(), 1, ones_d.ptr(), 1, GPUconf.queue());
    gemv(MagmaNoTrans, dim, dim, Intermediate_d, ones_d, res_d, GPUconf.queue());
    magma_getvector(dim, sizeof(Real_t), res_d.ptr(), 1, &*ones.begin(), 1, GPUconf.queue());
    #pragma omp parallel for
    for(size_t j = 0; j < dim; j++) res[j] = (double)sqrt(ones[j]);
}

◆ EnergyStddev_kernel()

__global__ void EnergyStddev_kernel	(	Integer_t	dim,
		Real_t *	res_d,
		Complex_t *	Hamiltonian_d,
		Integer_t	LDT
	)

                                                   {
    int const idx = blockIdx.x * blockDim.x + threadIdx.x;
    int const idy = blockIdx.y * blockDim.y + threadIdx.y;
    if(idx >= dim || idy >= dim) return;
    if(idx == idy) {
        res_d[idx + LDT * idy] = 0;
        return;
    }
    res_d[idx + LDT * idy]
        = real(conj(Hamiltonian_d[idx + LDT * idy]) * Hamiltonian_d[idx + LDT * idy]);
}

◆ InfiniteTimeAverage() [1/2]

void __global__ InfiniteTimeAverage	(	Integer_t const	SizeDegen,
		Integer_t const *	dDegeneracy,
		Integer_t const	dim_sub,
		Complex_t *	dOp_tot,
		Integer_t const	LDH
	)

                                                         {
    int const idx = blockIdx.x * blockDim.x + threadIdx.x;
    int const idy = blockIdx.y * blockDim.y + threadIdx.y;
    if(idx >= dim_sub || idy >= dim_sub) return;
    int j, k;
    for(j = 0; j < SizeDegen && dDegeneracy[j] <= idx; ++j) {};
    --j;
    for(k = 0; k < SizeDegen && dDegeneracy[k] <= idy; ++k) {};
    --k;
    if(j != k) dOp_tot[idx + LDH * idy] = dComplexZero<>;
}

◆ InfiniteTimeAverage() [2/2]

void InfiniteTimeAverage	(	std::vector< Integer_t > const &	Degeneracy,
		Integer_t const	dim_sub,
		matrix< Complex_t > &	Op_tot
	)

                                                    {
    // #pragma omp parallel for
    for(size_t idx = 0; idx < dim_sub; ++idx) {
        for(size_t idy = 0; idy < idx; ++idy) {
            int j, k;
            for(j = 0; j < Degeneracy.size() && Degeneracy[j] <= idx; ++j) {};
            --j;
            for(k = 0; k < Degeneracy.size() && Degeneracy[k] <= idy; ++k) {};
            --k;
            if(j != k) {
                Op_tot.at(idx, idy) = ComplexZero<>;
                Op_tot.at(idy, idx) = ComplexZero<>;
            }
            else { std::cout << idx << " " << idy << " " << j << " " << k << std::endl; }
        }
    }
}

◆ main()

int main	(	int	argc,
		char **	argv
	)

                                {
#include "PBC_TI/Framework/Fragments/setVariablesForEnsemble.cpp"
    double const dE = (argc >= Nargs_base + 1) ? std::atof(argv[Nargs_base]) : 0.02;
 
    Integer_t        dim_sub;           //全ヒルベルト空間の次元
    Integer_t        info, failed = 0;  // カウンタ
    double           gE, energyRange, OpRange, OpMin, sum;
    constexpr double precision = 1.0E-7;
 
    if(!Initialize(argc, argv, Nargs_common)) {
        std::cerr << "Error: Initialization failed." << std::endl;
        std::exit(EX_USAGE);
    }
    debug_print("# Successfully initialized.");
 
    //******************** Check for the directory structure ********************
    debug_print("# Checking for the directory structure.");
    std::ofstream OutFs;
    //******************** (END) Check for the directory structure ********************
 
    //***************************************************************************
    //******************** Allocation & Initialization **************************
    //***************************************************************************
#ifdef GPU
    magma_init();
    magma_queue_t queue = NULL;
    magma_int_t   dev   = 0;
    magma_getdevice(&dev);
    magma_queue_create(dev, &queue);
#else
    void* GPUconf = nullptr;
#endif
 
    //******************** Translation invariance ********************
    debug_print("# Calculating translation-invariant sectors.");
#include "PBC_TI/Framework/Fragments/TranslationInvariantSectors.cpp"
    //******************** (END)Translation invariance ********************
 
    //********** Allocate CPU memories **********//
    debug_print("# Allocating CPU memories.");
    double              time;
    constexpr double    Tmax    = 100000;
    constexpr Integer_t Nstep   = 1000 + 1;
    Integer_t const     dim_max = SectorDimension(Sector[n_max]);
    Integer_t           NdataInShell, Id;
    double              MCAverage, shellWidth;
 
    std::vector<Integer_t> Index(dim_max);
    std::vector<Integer_t> Degeneracy(dim_max);
    std::vector<double>    eigenEnergy(dim_max);
    std::vector<double>    EXPvalue(dim_max);
    std::vector<double>    energyExpValue(dim_max);
    std::vector<double>    energyStddev(dim_max);
    std::vector<Complex_t> ComplexVector_temp(dim_max);
    matrix<Complex_t>      h(dloc_h, dloc_h);
    matrix<Complex_t>      loc(dloc_op, dloc_op);
    matrix<Complex_t>      Dynamics(dim_max, Nstep + 1);
#ifndef GPU
    matrix<Complex_t> h_tot(dim_max, dim_max);
    matrix<Complex_t> loc_tot(dim_max, dim_max);
    matrix<Complex_t> State(dim_max, dim_max);
    #define dh       h
    #define dloc     loc
    #define dh_tot   h_tot
    #define dloc_tot loc_tot
    #define dState   State
#endif
    //********** (END) Allocate CPU memories **********//
 
#ifdef GPU
    //********** Allocate GPU memories **********//
    debug_print("# Allocating GPU memories.");
    constexpr Integer_t    GPU_UNIT = 32;
    Integer_t const        LDT      = magma_roundup(dim_max, GPU_UNIT);
    std::vector<Complex_t> tempVector(dim_max);
    matrix_gpu<Complex_t>  dh(dloc_h, dloc_h);
    matrix_gpu<Complex_t>  dloc(dloc_op, dloc_op);
    matrix_gpu<Complex_t>  dh_tot(LDT, dim_max);
    matrix_gpu<Complex_t>  dloc_tot(LDT, dim_max);
    matrix_gpu<Complex_t>  dState(LDT, dim_max);
    matrix_gpu<Integer_t>  dIndex(dim_max, 1);
    matrix_gpu<Integer_t>  dDegeneracy(dim_max + 1, 1);
    matrix_gpu<double>     dEigenEnergy(dim_max, 1);
    matrix_gpu<Complex_t>  dDynamics(LDT, Nstep + 1);
    matrix_gpu<Complex_t>  dComplexMatrix_temp1(LDT, dim_max);
    matrix_gpu<Complex_t>  dComplexMatrix_temp2(LDT, dim_max);
    //********** (END) Allocate GPU memories **********//
 
    //********** Determine GPU configuration **********//
    #include "PBC_TI/Framework/Fragments/getAttributesOfMatrixElementsInSector.cpp"
    //********** (END) Determine GPU configuration **********//
#endif  // #ifdef GPU
 
    double start, t_int, end, temp_t;
    double T_diag = 0, T_post = 0, T_pre = 0;
    init_genrand(SEED);
    start = getETtime();
    for(Integer_t repetition = 0; repetition < repMin; ++repetition) {
        generateLocal_h(h, dloc_h, -1);
        generateLocal_op(loc, dloc_op, -1);
    }
    end = getETtime();
    std::cout << "(init_genrand): time=" << std::fixed << (end - start) << std::endl;
    //***************************************************************************
    //******************** (END) Allocation & Initialization ********************
    //***************************************************************************
 
    start = getETtime();
    end   = start;
    for(Integer_t repetition = repMin; repetition <= repMax; ++repetition) {
        // 局所ハミルトニアンと局所物理量をランダムにとる ******************************//
        generateLocal_h(h, dloc_h, -1);
        generateLocal_op(loc, dloc_op, -1);
 
        std::cout << "# rep=" << repetition << std::endl;
        h.print(dloc_h, dloc_h);
        loc.print(dloc_op, dloc_op);
 
        debug_print("# Seting matrix to GPU.");
#ifdef GPU
        magma_setmatrix(dloc_h, dloc_h, sizeof(Complex_t), &*h.begin(), dloc_h, dh.ptr(), dloc_h,
                        queue);
        magma_setmatrix(dloc_op, dloc_op, sizeof(Complex_t), &*loc.begin(), dloc_op, dloc.ptr(),
                        dloc_op, queue);
        // magma_zprint_gpu(dloc_h, dloc_h, dh.ptr(), dh.LD(), queue);
        // magma_zprint_gpu(dloc_op, dloc_op, dloc.ptr(), dloc.LD(), queue);
#endif
 
        std::string outDirName(baseDirName);
        {
            std::stringstream buff;
            buff << "/RawData/Sample_No" << repetition;
            outDirName += buff.str();
            outDirName = std::regex_replace(outDirName, std::regex("//"), "/");
            filesystem::create_directories(outDirName);
        }
 
        // for(size_t n = n_max;n >= n_min; --n) {
        for(size_t n = n_min; n <= n_max; ++n) {
            debug_print("# (rep,n)=(" << repetition << "," << n << ")");
            dim_sub = SectorDimension(Sector[n]);
            ComplexVector_temp.resize(dim_sub);
            energyExpValue.resize(dim_sub);
            energyExpValue.resize(dim_sub);
 
            debug_print("# Constructing global matrices in the sector.");
            temp_t = getETtime();
            {
                dim_sub = constructGlobal_h(dh_tot, dh, num_h, Sector.at(n), GPUconf);
                constructGlobal_op(dloc_tot, dloc, num_op, Sector.at(n), GPUconf);
#ifdef GPU
                magma_queue_sync(GPUconf.queue());
#endif
            }
            {
#ifdef GPU
                magma_getvector(dim_sub, sizeof(Complex_t), dh_tot.ptr(), dh_tot.LD() + 1,
                                &*ComplexVector_temp.begin(), 1, queue);
#endif
#pragma omp parallel for
                for(size_t j = 0; j < dim_sub; j++) {
#ifdef GPU
                    energyExpValue.at(j) = real(ComplexVector_temp.at(j));
#else
                    energyExpValue.at(j) = real(h_tot.at(j, j));
#endif
                }
                EnergyStddev(energyStddev, dh_tot, Sector.at(n), GPUconf);
            }
            T_pre += getETtime() - temp_t;
 
            // magma_queue_sync(queue);
            // std::cerr << "(N=" << n << ") isnan_kernel(dh_tot)   before" << std::endl;
            //   isnan_kernel<<<GPUconf.dimGrid(),GPUconf.dimBlock(),GPUconf.shared(),GPUconf.stream()>>>(dim_sub, dh_tot.ptr(), dh_tot.LD());
            // magma_queue_sync(queue);
            // std::cerr << "(N=" << n << ") isnan_kernel(dloc_tot) before" << std::endl;
            //   isnan_kernel<<<GPUconf.dimGrid(),GPUconf.dimBlock(),GPUconf.shared(),GPUconf.stream()>>>(dim_sub, dloc_tot.ptr(), dloc_tot.LD());
            // temp_t = getETtime();
            {
                debug_print("# Calculating eigenstate matrix elements.");
                info = EigenMatrixElements(eigenEnergy, dim_sub, dh_tot, dloc_tot, GPUconf);
                if(info != 0) {
#include "PBC_TI/Framework/Fragments/writeFailedMatrixToFile.cpp"
                    continue;
                }
#ifdef GPU
                magma_getvector(dim_sub, sizeof(Complex_t), dloc_tot.ptr(), dloc_tot.LD() + 1,
                                &*tempVector.begin(), 1, queue);
                for(size_t j = 0; j < dim_sub; ++j) EXPvalue[j] = (double)real(tempVector.at(j));
#else
                for(size_t j = 0; j < dim_sub; ++j) EXPvalue[j] = real(loc_tot.at(j, j));
#endif
                energyRange = eigenEnergy[dim_sub - 1] - eigenEnergy[0];
                gE          = eigenEnergy[0];
#pragma omp parallel for
                for(size_t j = 0; j < dim_sub; ++j) {
                    eigenEnergy[j]    = (eigenEnergy[j] - gE) / energyRange;
                    energyExpValue[j] = (energyExpValue[j] - gE) / energyRange;
                    energyStddev[j]   = energyStddev[j] / energyRange;
                }
            }
            T_diag += getETtime() - temp_t;
            // magma_queue_sync(queue);
            // std::cerr << "(N=" << n << ") isnan_kernel(dh_tot)   after" << std::endl;
            //   isnan_kernel<<<GPUconf.dimGrid(),GPUconf.dimBlock(),GPUconf.shared(),GPUconf.stream()>>>(dim_sub, dh_tot.ptr(), dh_tot.LD());
            // magma_queue_sync(queue);
            // std::cerr << "(N=" << n << ") isnan_kernel(dloc_tot) after" << std::endl;
            //   isnan_kernel<<<GPUconf.dimGrid(),GPUconf.dimBlock(),GPUconf.shared(),GPUconf.stream()>>>(dim_sub, dloc_tot.ptr(), dloc_tot.LD());
 
            Degeneracy.resize(0);
            Degeneracy.push_back(0);
            for(size_t j = 1; j < dim_sub; ++j) {
                if(eigenEnergy[j] - eigenEnergy[j - 1] >= precision) Degeneracy.push_back(j);
            }
            Degeneracy.push_back(dim_sub);
            // print(Degeneracy, Degeneracy.size());
 
            Index.resize(dim_sub);
            std::iota(Index.begin(), Index.end(), 0);
            std::sort(Index.begin(), Index.end(), [&energyExpValue](size_t x, size_t y) {
                return energyExpValue[x] < energyExpValue[y];
            });
            // print(Index, Index.size());
 
#ifdef GPU
            magma_setvector(Degeneracy.size(), sizeof(Integer_t), &*Degeneracy.begin(), 1,
                            dDegeneracy.ptr(), 1, queue);
            magma_setvector(Index.size(), sizeof(Integer_t), &*Index.begin(), 1, dIndex.ptr(), 1,
                            queue);
            magma_setvector(dim_sub, sizeof(double), &*eigenEnergy.begin(), 1, dEigenEnergy.ptr(),
                            1, queue);
            magma_queue_sync(queue);
#endif
 
            for(size_t p = 0; p < Nstep; ++p) {
                time = (Tmax * p) / (double)(Nstep - 1);
#ifdef GPU
                StatePreparation<<<GPUconf.dimGrid(), GPUconf.dimBlock(), GPUconf.shared(),
                                   GPUconf.stream()>>>(time, Index.size(), dIndex.ptr(), dim_sub,
                                                       dState.ptr(), dState.LD(), dh_tot.ptr(),
                                                       dh_tot.LD(), dEigenEnergy.ptr());
                magma_queue_sync(queue);
#else
                StatePreparation(time, Index, dim_sub, State, h_tot, eigenEnergy);
#endif
#ifdef GPU
                matrixProduct_hemm(MagmaLeft, MagmaUpper, dim_sub, Index.size(), dloc_tot, dState,
                                   dComplexMatrix_temp1, queue);
                matrixProduct_gemm(MagmaConjTrans, MagmaNoTrans, Index.size(), Index.size(),
                                   dim_sub, dState, dComplexMatrix_temp1, dComplexMatrix_temp2,
                                   queue);
                magma_copyvector(Index.size(), sizeof(Complex_t), dComplexMatrix_temp2.ptr(),
                                 dComplexMatrix_temp2.LD() + 1,
                                 dDynamics.ptr() + dDynamics.LD() * p, 1, queue);
#else
                for(size_t j = 0; j < Index.size(); j++) {
                    Id = Index.at(j);
                    Dynamics.at(j, p)
                        = ComplexOne<> * QuantumExpValue_he(dim_sub, State.begin() + State.LD() * j, dloc_tot, ComplexVector_temp);
                }
#endif
            }
 
            // Calculate Infinite time average.
            {
                debug_print("# Calculating the infinite time average.");
#ifdef GPU
                StatePreparation<<<GPUconf.dimGrid(), GPUconf.dimBlock(), GPUconf.shared(),
                                   GPUconf.stream()>>>(0, Index.size(), dIndex.ptr(), dim_sub,
                                                       dState.ptr(), dState.LD(), dh_tot.ptr(),
                                                       dh_tot.LD(), dEigenEnergy.ptr());
                magma_queue_sync(queue);
#else
                StatePreparation(0, Index, dim_sub, State, h_tot, eigenEnergy);
#endif
 
#ifdef GPU
                magma_copymatrix(dim_sub, dim_sub, sizeof(Complex_t), dloc_tot.ptr(), dloc_tot.LD(),
                                 dh_tot.ptr(), dh_tot.LD(), queue);
                magma_queue_sync(queue);
#else
    #pragma omp parallel for
                for(Integer_t j = 0; j < dim_sub; ++j)
                    for(Integer_t k = 0; k < dim_sub; ++k) h_tot.at(j, k) = loc_tot.at(j, k);
#endif
 
#ifdef GPU
                InfiniteTimeAverage<<<GPUconf.dimGrid(), GPUconf.dimBlock(), GPUconf.shared(),
                                      GPUconf.stream()>>>(Degeneracy.size(), dDegeneracy.ptr(),
                                                          dim_sub, dloc_tot.ptr(), dloc_tot.LD());
                magma_queue_sync(queue);
#else
                InfiniteTimeAverage(Degeneracy, dim_sub, loc_tot);
                // loc_tot.print(dim_sub,dim_sub);
#endif
 
#ifdef GPU
                matrixProduct_hemm(MagmaLeft, MagmaUpper, dim_sub, Index.size(), dloc_tot, dState,
                                   dComplexMatrix_temp1, queue);
                matrixProduct_gemm(MagmaConjTrans, MagmaNoTrans, Index.size(), Index.size(),
                                   dim_sub, dState, dComplexMatrix_temp1, dComplexMatrix_temp2,
                                   queue);
                magma_copyvector(Index.size(), sizeof(Complex_t), dComplexMatrix_temp2.ptr(),
                                 dComplexMatrix_temp2.LD() + 1,
                                 dDynamics.ptr() + dDynamics.LD() * Nstep, 1, queue);
#else
                for(size_t j = 0; j < Index.size(); j++) {
                    Id = Index.at(j);
                    Dynamics.at(j, Nstep)
                        = ComplexOne<> * QuantumExpValue_he(dim_sub, State.begin() + State.LD() * j, loc_tot, ComplexVector_temp);
                }
#endif
            }
 
#ifdef GPU
            magma_getmatrix(Index.size(), Nstep + 1, sizeof(Complex_t), dDynamics.ptr(),
                            dDynamics.LD(), &*Dynamics.begin(), Dynamics.LD(), queue);
            magma_queue_sync(queue);
#endif
 
            temp_t = getETtime();
            { OpRange = SpectralRange(OpMin, dim_sub, dh_tot); }
            T_diag += getETtime() - temp_t;
 
            temp_t = getETtime();
            {
                debug_print("# Writing results to a file.");
                std::stringstream buff("");
                buff << "/FockStateDynamics" << PRECISION << "_N" << n << ".txt";
                std::string filename(outDirName);
                filename += buff.str();
                OutFs.open(filename);
                checkIsFileOpen(OutFs, filename);
                OutFs << std::right << std::showpos << std::scientific << std::setprecision(6);
                OutFs << "# energyRange= " << energyRange << "\n"
                      << "# gE= " << gE << "\n"
                      << "# OpRange= " << OpRange << "\n"
                      << "# OpMin= " << OpMin << "\n"
                      << "# 1.(State No.) 2.(Normalized energy) 3.(Normalized Energy Stddev) "
                         "4.(Normalized MCAverage) 5.(Normalized Cumulative ExpVal at T=1000) "
                         "6.(at T=5000) 7.(at T=10000) 8.(at T=Infty)"
                      << "\n\n";
                for(size_t j = 0; j < Index.size(); j++) {
                    Id         = Index.at(j);
                    shellWidth = dE;
                    MCAverage  = MicroCanonicalAverage(NdataInShell, energyExpValue[Id], shellWidth,
                                                       dim_sub, eigenEnergy, EXPvalue);
                    for(size_t k = 1; isnan(MCAverage); ++k) {
                        shellWidth = k * energyStddev[Id];
                        MCAverage
                            = MicroCanonicalAverage(NdataInShell, energyExpValue[Id], shellWidth,
                                                    dim_sub, eigenEnergy, EXPvalue);
                    }
 
                    OutFs << Id << " " << energyExpValue[Id] << " " << energyStddev[Id] << " "
                          << (MCAverage - OpMin) / OpRange << " ";
                    size_t p = 0;
                    sum      = 0;
                    for(; p <= (Nstep - 1) / 100; ++p) sum += real(Dynamics.at(j, p));
                    OutFs << (sum / (double)(p + 1) - OpMin) / OpRange << " ";
                    for(; p <= (Nstep - 1) / 10; ++p) sum += real(Dynamics.at(j, p));
                    OutFs << (sum / (double)(p + 1) - OpMin) / OpRange << " ";
                    for(; p <= (Nstep - 1); ++p) sum += real(Dynamics.at(j, p));
                    OutFs << (sum / (double)(p + 1) - OpMin) / OpRange << " ";
                    OutFs << (real(Dynamics.at(j, Nstep)) - OpMin) / OpRange << std::endl;
                }
                OutFs.close();
            }
            T_post += getETtime() - temp_t;
        }
        if(repetition % 10 == 9) {
            t_int = end;
            end   = getETtime();
            std::cerr << "(total=" << std::setw(6) << repetition + 1
                      << "): timeINT=" << std::setprecision(6) << std::setw(8) << (end - t_int)
                      << ", timeTOT=" << std::setprecision(6) << std::setw(8) << (end - start)
                      << ", T_construct=" << std::setprecision(6) << std::setw(10) << T_pre << "("
                      << std::setprecision(1) << 100 * T_pre / (end - start) << "%)"
                      << ", T_diag=" << std::setprecision(6) << std::setw(8) << T_diag << "("
                      << std::setprecision(1) << 100 * T_diag / (end - start) << "%)"
                      << ", T_process=" << std::setprecision(6) << std::setw(8) << T_post << "("
                      << std::setprecision(1) << 100 * T_post / (end - start) << "%)" << std::endl;
        }
    }
 
    Finalize(argc, argv);
#ifdef GPU
    magma_finalize();
#endif
    return 0;
}

◆ StatePreparation() [1/2]

void __global__ StatePreparation	(	double	time,
		Integer_t	Nstates,
		Integer_t *	dIndex,
		Integer_t	dim_sub,
		Complex_t *	dState,
		Integer_t	LDS,
		Complex_t *	dh_tot,
		Integer_t	LDH,
		double *	dEigenEnergy
	)

                                                                                         {
    int const idx = blockIdx.x * blockDim.x + threadIdx.x;
    int const idy = blockIdx.y * blockDim.y + threadIdx.y;
    if(idx >= dim_sub || idy >= Nstates) return;
    Integer_t stateId       = dIndex[idy];
    Real_t    phase         = -dEigenEnergy[idx] * time;
    dState[idx + LDS * idy] = conj(dh_tot[stateId + LDH * idx]) * MAGMA_CEXP(phase);
}

◆ StatePreparation() [2/2]

void StatePreparation	(	double	time,
		std::vector< Integer_t > &	Index,
		Integer_t	dim_sub,
		matrix< Complex_t > &	State,
		matrix< Complex_t > &	h_tot,
		std::vector< double > &	eigenEnergy
	)

                                                      {
    #pragma omp parallel for
    for(size_t k = 0; k < dim_sub; k++) {
        Complex_t phase = -ComplexI<> * eigenEnergy[k] * time;
        for(size_t j = 0; j < Index.size(); j++) {
            State.at(k, j) = conj(h_tot.at(Index[j], k)) * std::exp(phase);
        }
    }
}

Functions

Function Documentation

◆ EnergyStddev() [1/2]

◆ EnergyStddev() [2/2]

◆ EnergyStddev_kernel()

◆ InfiniteTimeAverage() [1/2]

◆ InfiniteTimeAverage() [2/2]

◆ main()

◆ StatePreparation() [1/2]

◆ StatePreparation() [2/2]