StatMech
Loading...
Searching...
No Matches
FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > > Class Template Reference

#include <ETHmeasure.hpp>

Collaboration diagram for FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >:
Collaboration graph

Public Member Functions

 FuncETHmeasure ()
 
template<class Derived >
void operator() (Eigen::VectorXd &res, Eigen::MatrixBase< Derived > const &eigVector, SubSpace< TotalSpace, Scalar > const &subSpace, mBodyOperatorSpace< Scalar > const &mBodyOpSpace, MicroCanonicalAverage const &MCaverage)
 
template<typename Matrix_t , class Derived >
void operator() (Eigen::VectorXd &res, ObjectOnGPU< Matrix_t > const &dEigVector, SubSpace< TotalSpace, Scalar > const &subSpace, ManyBodyOperatorSpaceBase< Derived > const &mBodyOpSpace, MicroCanonicalAverage const &MCaverage)
 

Private Types

using TotalSpace = TotalSpace_
 
using Scalar = Scalar_
 
using Real = typename SubSpace< TotalSpace_, Scalar_ >::Real
 
using Vector = Eigen::VectorXd
 

Private Attributes

std::vector< Vectorm_expVal
 
std::vector< Vectorm_mcAverage
 

Member Typedef Documentation

◆ Real

template<class TotalSpace_ , typename Scalar_ >
using FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >::Real = typename SubSpace<TotalSpace_, Scalar_>::Real
private

◆ Scalar

template<class TotalSpace_ , typename Scalar_ >
using FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >::Scalar = Scalar_
private

◆ TotalSpace

template<class TotalSpace_ , typename Scalar_ >
using FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >::TotalSpace = TotalSpace_
private

◆ Vector

template<class TotalSpace_ , typename Scalar_ >
using FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >::Vector = Eigen::VectorXd
private

Constructor & Destructor Documentation

◆ FuncETHmeasure()

template<class TotalSpace_ , typename Scalar_ >
FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >::FuncETHmeasure ( )
inline
146 : m_expVal(omp_get_max_threads()), m_mcAverage(omp_get_max_threads()) {
147 debug_constructor_printf(1);
148 }
std::vector< Vector > m_mcAverage
Definition ETHmeasure.hpp:143
std::vector< Vector > m_expVal
Definition ETHmeasure.hpp:142

Member Function Documentation

◆ operator()() [1/2]

template<class TotalSpace_ , typename Scalar_ >
template<class Derived >
void FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >::operator() ( Eigen::VectorXd &  res,
Eigen::MatrixBase< Derived > const &  eigVector,
SubSpace< TotalSpace, Scalar > const &  subSpace,
mBodyOperatorSpace< Scalar > const &  mBodyOpSpace,
MicroCanonicalAverage const &  MCaverage 
)
171 {
172 debug_print("FuncETHmeasure " << __func__
173 << ": eigVector is NOT on GPU. Using CPU algorithm...");
174
175 mBodyOpSpace.computeTransEqClass();
176 std::cout << "FuncETHmeasure():\tm = " << mBodyOpSpace.m()
177 << ", \tmBodyOpSpace.dim() = " << mBodyOpSpace.dim()
178 << ", \tmBodyOpSpace.transEqDim() = " << mBodyOpSpace.transEqDim()
179 << ", \tsubSpace.dim() = " << subSpace.dim() << std::endl;
180 res = Eigen::VectorXd::Zero(subSpace.dim());
181 if(res.norm() > 1.0e-4) {
182 std::cerr << "Error(" << __func__
183 << ") : failed to initialize res: res.norm() = " << res.norm() << " is too large."
184 << std::endl;
185 std::exit(EXIT_FAILURE);
186 }
187 std::for_each(m_expVal.begin(), m_expVal.end(),
188 [&eigVector](auto& x) { x.resize(eigVector.cols()); });
189
190 debug_print(eigVector);
191 debug_print(subSpace.basis());
192 debug_print(mBodyOpSpace.basisOp(0));
193
194 omp_set_max_active_levels(1);
195// #pragma omp parallel for reduction(+ : res)
196 for(int opEqClass = 0; opEqClass < mBodyOpSpace.transEqDim(); ++opEqClass) {
197 int opNum = mBodyOpSpace.transEqClassRep(opEqClass);
198 int thread = omp_get_thread_num();
199
200 m_expVal[thread]
201 = (eigVector.adjoint()
202 * (subSpace.basis().adjoint() * mBodyOpSpace.basisOp(opNum) * subSpace.basis())
203 .pruned()
204 .eval()
205 * eigVector)
206 .diagonal()
207 .real();
208 debug_print("\tBefore MCaverage: opNum=" << opNum << ", thread=" << thread);
209 MCaverage(m_mcAverage[thread], m_expVal[thread]);
210
211 res += mBodyOpSpace.transPeriod(opEqClass)
212 * (m_expVal[thread] - m_mcAverage[thread]).cwiseAbs2();
213 }
214 debug_print("FuncETHmeasure " << __func__
215 << ": eigVector is NOT on GPU. Using CPU algorithm...");
216}
__host__ __device__ int dim() const
Definition HilbertSpace.hpp:34
__host__ __device__ Vector_t const & transPeriod() const
Definition HilbertSpace.hpp:317
__host__ __device__ int transEqDim() const
Definition HilbertSpace.hpp:309
__host__ __device__ void computeTransEqClass() const
Definition HilbertSpace.hpp:357
__host__ __device__ Vector_t const & transEqClassRep() const
Definition HilbertSpace.hpp:311
__host__ __device__ int dim() const
Definition OperatorSpace.hpp:75
__host__ void basisOp(Eigen::SparseMatrix< ScalarType > &res, int opNum) const
Definition OperatorSpace.hpp:104
__host__ __device__ SparseCompressed adjoint() const
Definition MatrixUtils.cuh:430
__host__ __device__ Matrix_t & basis()
Definition HilbertSpace.hpp:668
__host__ __device__ int m() const
Definition OperatorSpace.hpp:522
debug_print("# Determining GPU configuration.")

◆ operator()() [2/2]

template<class TotalSpace_ , typename Scalar_ >
template<typename Matrix_t , class Derived >
void FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >::operator() ( Eigen::VectorXd &  res,
ObjectOnGPU< Matrix_t > const &  dEigVector,
SubSpace< TotalSpace, Scalar > const &  subSpace,
ManyBodyOperatorSpaceBase< Derived > const &  mBodyOpSpace,
MicroCanonicalAverage const &  MCaverage 
)
225 {
226
227 // dEigVector should be stored in row-major.
228
229 debug_print("FuncETHmeasure " << __func__
230 << ": dEigVector is on GPU. (Algorithm is NOT implemented)");
231 int nGPUs;
232 cuCHECK(cudaGetDeviceCount(&nGPUs));
233 mBodyOpSpace.computeTransEqClass();
234 std::cout << "FuncETHmeasure(): nGPUs = " << nGPUs
235 << ", \tmBodyOpSpace.dim() = " << mBodyOpSpace.dim()
236 << ", \tmBodyOpSpace.transEqDim() = " << mBodyOpSpace.transEqDim()
237 << ", \tsubSpace.dim() = " << subSpace.dim() << std::endl;
238 res = Eigen::VectorXd::Zero(subSpace.dim());
239 if(res.norm() > 1.0e-4) {
240 std::cerr << "Error(" << __func__
241 << ") : failed to initialize res: res.norm() = " << res.norm() << " is too large."
242 << std::endl;
243 std::exit(EXIT_FAILURE);
244 }
245 size_t const expValMemSize = sizeof(Real) * subSpace.dim();
246 size_t const eigValMemSize = sizeof(Real) * subSpace.dim();
247 size_t const requiredSmSize = expValMemSize + eigValMemSize;
248
249 // GPU-side preparation
250 cudaDeviceProp deviceProp;
251 cudaGetDeviceProperties(&deviceProp, 0);
252
254 Eigen::MatrixX<Real>::Zero(subSpace.dim(), deviceProp.multiProcessorCount).eval());
255 ObjectOnGPU< SubSpace<TotalSpace, Scalar> > dSubSpace(subSpace);
256 ObjectOnGPU< SparseCompressed<Scalar> > dAdjointBasis(subSpace.basis().adjoint());
257 ObjectOnGPU< Eigen::VectorX<Real> > dEigVal(MCaverage.eigVal());
259 ObjectOnGPU<Derived> dmBodyOpSpace(static_cast<Derived const&>(mBodyOpSpace));
260 int* transEqClassRep = nullptr;
261 int* transPeriod = nullptr;
262 cuCHECK(cudaMalloc(&transEqClassRep, mBodyOpSpace.transEqDim() * sizeof(int)));
263 cuCHECK(cudaMalloc(&transPeriod, mBodyOpSpace.transEqDim() * sizeof(int)));
264 cuCHECK(cudaMemcpyAsync(transEqClassRep, mBodyOpSpace.transEqClassRep().data(),
265 mBodyOpSpace.transEqDim() * sizeof(int), cudaMemcpyHostToDevice));
266 cuCHECK(cudaMemcpyAsync(transPeriod, mBodyOpSpace.transPeriod().data(),
267 mBodyOpSpace.transEqDim() * sizeof(int), cudaMemcpyHostToDevice));
268
269 void (*m_kernel)(
270 Eigen::DenseBase< std::remove_reference_t<decltype(*dRes.ptr())> > const*,
271 Eigen::DenseBase< std::remove_reference_t<decltype(*dEigVal.ptr())> > const*, Real,
272 Eigen::DenseBase< std::remove_reference_t<decltype(*dEigVector.ptr())> > const*,
274 ManyBodyOperatorSpaceBase<Derived> const*, int const, int const*, int const*,
275 Eigen::DenseBase< std::remove_reference_t<decltype(*dWork.ptr())> >*)
277
278 // determine the configuration of shared memory
279 int shared_memory_size = deviceProp.sharedMemPerMultiprocessor - 1024;
280 int nEigVals = (shared_memory_size - expValMemSize) / eigValMemSize;
281 int smSize = expValMemSize + nEigVals * eigValMemSize;
282
283 cuCHECK(cudaFuncSetAttribute(m_kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smSize));
284 struct cudaFuncAttributes m_attr;
285 cuCHECK(cudaFuncGetAttributes(&m_attr, m_kernel));
286 shared_memory_size = m_attr.maxDynamicSharedSizeBytes;
287
288 int constexpr warpSize = 32;
289 int const nThread = min(round_up(subSpace.dim(), warpSize), m_attr.maxThreadsPerBlock);
290 int const nBlock = static_cast<int>(sqrt(static_cast<double>(mBodyOpSpace.transEqDim()))) + 1;
291 nEigVals = 2;
292
293 smSize
294 = expValMemSize + max(nEigVals * eigValMemSize, (sizeof(int) + sizeof(Scalar)) * nThread);
295
296 std::cout << "\tnThread = " << nThread << ", nBlock = " << nBlock
297 << ", m_attr.maxThreadsPerBlock = " << m_attr.maxThreadsPerBlock
298 << ", requiredSmSize = " << requiredSmSize << ", smSize = " << smSize
299 << ", shared_memory_size = " << shared_memory_size << ", nEigVals = " << nEigVals
300 << ", deviceProp.sharedMemPerMultiprocessor = "
301 << deviceProp.sharedMemPerMultiprocessor << std::endl;
302 assert(nThread >= 1);
303 assert(nBlock >= 1);
304 assert(smSize <= shared_memory_size);
305
306 m_kernel<<<dim3(nBlock, nBlock), dim3(nThread, 1), smSize>>>(
307 dRes.ptr(), dEigVal.ptr(), static_cast<Real>(MCaverage.shellWidth()), dEigVector.ptr(),
308 dSubSpace.ptr(), dAdjointBasis.ptr(), dmBodyOpSpace.ptr(), mBodyOpSpace.transEqDim(),
309 transEqClassRep, transPeriod, dWork.ptr());
310 cuCHECK(cudaGetLastError());
311 cuCHECK(cudaFree(transEqClassRep));
312 cuCHECK(cudaFree(transPeriod));
313
314 cuCHECK(cudaDeviceSynchronize());
315
316 res = dRes.get().template cast<double>().rowwise().sum();
317}
__global__ void ETHmeasure_kernel(Eigen::DenseBase< Derived1 > const *__restrict__ resPtr, Eigen::DenseBase< Derived2 > const *__restrict__ dEigValPtr, typename SubSpace< TotalSpace, Scalar >::Real dE, Eigen::DenseBase< Derived3 > const *__restrict__ dEigVecPtr, SubSpace< TotalSpace, Scalar > const *__restrict__ subSpacePtr, SparseCompressed< Scalar > const *__restrict__ adjointBasisPtr, ManyBodyOperatorSpaceBase< Derived4 > const *__restrict__ dmBodyOpSpacePtr, int const transEqDim, int const *__restrict__ transEqClassRep, int const *__restrict__ transPeriod, Eigen::DenseBase< Derived5 > *__restrict__ dWorkPtr)
Definition ETHmeasure.hpp:37
Definition mytypes.hpp:147
typename SubSpace< TotalSpace_, Scalar_ >::Real Real
Definition ETHmeasure.hpp:140
Definition OperatorSpace.hpp:213
Object_t * ptr() const
Definition ObjectOnGPU.cuh:144
Definition ObjectOnGPU.cuh:149
Definition HilbertSpace.hpp:568
cuCHECK(cudaFuncGetAttributes(&attr, MatrixElementsInSector))
Integer_t const nBlock
Definition getAttributesOfMatrixElementsInSector.cpp:5
Integer_t const nThread
Definition getAttributesOfMatrixElementsInSector.cpp:4

Member Data Documentation

◆ m_expVal

template<class TotalSpace_ , typename Scalar_ >
std::vector<Vector> FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >::m_expVal
private

◆ m_mcAverage

template<class TotalSpace_ , typename Scalar_ >
std::vector<Vector> FuncETHmeasure< SubSpace< TotalSpace_, Scalar_ > >::m_mcAverage
private

The documentation for this class was generated from the following file: