733 {
734 using Scalar2_t = typename SparseCompressed_::Scalar;
735 debug_print(__PRETTY_FUNCTION__ << hObject.rows() <<
", " << hObject.cols() <<
", "
736 << hObject.nonZeros() << "\n");
737 this->
resize(hObject.rows(), hObject.cols(), hObject.nonZeros());
738 SparseCompressed_setNonZeros_kernel<<<1, 1>>>(this->
m_ptr, hObject.
nonZeros());
739 cuCHECK(cudaPeekAtLastError());
740 cuCHECK(cudaDeviceSynchronize());
741
742 int* dSrcOuterIndex;
743 int* dSrcInnerIndex;
745 cuCHECK(cudaMalloc((
void**)&dSrcOuterIndex,
sizeof(
int) * (hObject.outerSize() + 1)));
746 cuCHECK(cudaMalloc((
void**)&dSrcInnerIndex,
sizeof(
int) * hObject.nonZeros()));
747 cuCHECK(cudaMalloc((
void**)&dSrcValueIndex,
sizeof(
Scalar) * hObject.nonZeros()));
748
749 cuCHECK(cudaMemcpy(dSrcOuterIndex, hObject.outerIndexPtr(),
750 sizeof(int) * (hObject.outerSize() + 1), cudaMemcpyHostToDevice));
751 cuCHECK(cudaMemcpy(dSrcInnerIndex, hObject.innerIndexPtr(),
sizeof(
int) * hObject.nonZeros(),
752 cudaMemcpyHostToDevice));
753 if constexpr(std::is_same_v<Scalar, Scalar2_t>) {
754 cuCHECK(cudaMemcpy(dSrcValueIndex, hObject.valuePtr(),
sizeof(
Scalar) * hObject.nonZeros(),
755 cudaMemcpyHostToDevice));
756 }
757 else {
758 std::vector<Scalar> hSrcValueIndex(hObject.nonZeros());
759 for(int j = 0; j != hSrcValueIndex.size(); ++j) hSrcValueIndex[j] = hObject.valuePtr()[j];
760 cuCHECK(cudaMemcpy(dSrcValueIndex, hSrcValueIndex.data(),
761 sizeof(
Scalar) * hObject.nonZeros(), cudaMemcpyHostToDevice));
762 }
763
765 struct cudaFuncAttributes
attr;
767 int const numElem = (hObject.outerSize() + 1) + hObject.nonZeros();
768 int const nThread = min(numElem,
attr.maxThreadsPerBlock);
770
772 SparseCompressed_set_kernel<<<nBlock, nThread>>>(this->
m_ptr, dSrcOuterIndex, dSrcInnerIndex,
773 dSrcValueIndex);
774 cuCHECK(cudaPeekAtLastError());
775
776 cuCHECK(cudaFree(dSrcOuterIndex));
777 cuCHECK(cudaFree(dSrcInnerIndex));
778 cuCHECK(cudaFree(dSrcValueIndex));
779}
__global__ void SparseCompressed_set_kernel(SparseCompressed< Scalar_t, Options_ > *objPtr, int *srcOuterIndexPtr, int *srcInnerIndexPtr, Scalar_t *srcValuePtr)
Definition MatrixUtils.cuh:577
Definition mytypes.hpp:147
SparseCompressed< Scalar_t, Options_ > Object_t
Definition MatrixUtils.cuh:597
__host__ ObjectOnGPU & resize(int rows, int cols, int reserved=1)
Definition MatrixUtils.cuh:631
struct cudaFuncAttributes attr
Definition getAttributesOfMatrixElementsInSector.cpp:2
Integer_t const nBlock
Definition getAttributesOfMatrixElementsInSector.cpp:5
Integer_t const nThread
Definition getAttributesOfMatrixElementsInSector.cpp:4