votca 2024.2-dev
Loading...
Searching...
No Matches
cudamatrix.h
Go to the documentation of this file.
1/*
2 * Copyright 2009-2020 The VOTCA Development Team
3 * (http://www.votca.org)
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License")
6 *
7 * You may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 */
19
20#ifndef VOTCA_XTP_CUDAMATRIX_H
21#define VOTCA_XTP_CUDAMATRIX_H
22
23// CMake generated file
24#include "votca_xtp_config.h"
25#include <stdexcept>
26#include <string>
27#ifndef USE_CUDA
28#error Cuda not enabled
29#endif
30
31// Standard includes
32#include <cassert>
33#include <memory>
34// Third party includes
35#include <cublas_v2.h>
36#include <curand.h>
37
38// Local VOTCA includes
39#include "eigen.h"
40
41/*
42 * \brief Matrix Representation inside an Nvidia GPU
43 */
44namespace votca {
45namespace xtp {
46
47void checkCuda(cudaError_t result);
48void checkCublas(cublasStatus_t result);
49std::string cudaGetErrorEnum(cublasStatus_t error);
51
52template <class M>
53std::string OutputDimension(const M &mat) {
54 std::string transpose = M::transposed() ? "T" : "";
55
56 return std::string(transpose + "(" + std::to_string(mat.rows()) + "x" +
57 std::to_string(mat.cols()) + ")");
58}
59
60template <class M>
62 public:
63 CudaMatrixBlock(const M &mat, Index rowoffset, Index coloffset, Index rows,
64 Index cols)
65 : mat_(mat), rows_(rows), cols_(cols) {
66
67 assert((rowoffset + rows) <= mat.rows() &&
68 "block has to fit in matrix, rows exceeded");
69 assert((coloffset + cols) <= mat.cols() &&
70 "block has to fit in matrix, cols exceeded");
71 start_ = coloffset * ld() + rowoffset;
72 }
73 Index size() const { return rows() * cols(); }
74 Index rows() const { return rows_; }
75 Index cols() const { return cols_; }
76 Index ld() const { return mat_.ld(); }
77 double *data() const { return mat_.data() + start_; }
78
79 static constexpr bool transposed() { return M::transposed(); }
80
81 private:
82 const M &mat_;
86};
87
88template <class M>
90 public:
91 CudaMatrixTranspose(const M &mat) : mat_(mat) { ; }
92 Index size() const { return mat_.size(); }
93 Index rows() const { return mat_.rows(); }
94 Index cols() const { return mat_.cols(); }
95 Index ld() const { return mat_.ld(); }
96 double *data() const { return mat_.data(); }
97
98 static constexpr bool transposed() { return !M::transposed(); }
99
100 private:
101 const M &mat_;
102};
103
105 public:
106 Index size() const { return ld_ * cols_; };
107 Index rows() const { return ld_; };
108 Index ld() const { return ld_; }
109 Index cols() const { return cols_; };
110 double *data() const { return data_.get(); };
111
113 assert(rows * cols == size() &&
114 "reshape cannot change the size of the matrix only the shape");
115 cols_ = cols;
116 ld_ = rows;
117 }
118
122
124 Index rows, Index cols) const {
125 return CudaMatrixBlock<CudaMatrix>(*this, rowoffset, coloffset, rows, cols);
126 }
127
129 return CudaMatrixBlock<CudaMatrix>(*this, row, 0, 1, cols());
130 }
131
133 return CudaMatrixBlock<CudaMatrix>(*this, 0, col, rows(), 1);
134 }
135
137 return CudaMatrixBlock<CudaMatrix>(*this, rowoffset, 0, rows, cols());
138 }
140 return CudaMatrixBlock<CudaMatrix>(*this, 0, coloffset, rows(), cols);
141 }
142
143 static constexpr bool transposed() { return false; }
144
145 template <class T>
146 void copy_to_gpu(const T &m) {
147 if (m.rows() != ld_ || m.cols() != cols_) {
148 throw std::runtime_error("Shape mismatch of cpu (" +
149 std::to_string(m.rows()) + "x" +
150 std::to_string(m.cols()) + ") and gpu matrix" +
151 OutputDimension(*this));
152 }
153 checkCublas(cublasSetMatrixAsync(
154 int(m.rows()), int(m.cols()), sizeof(double), m.data(),
155 int(m.colStride()), this->data(), int(this->rows()), stream_));
156 }
157
158 template <class T>
159 CudaMatrix(const T &matrix, const cudaStream_t &stream)
160 : ld_{static_cast<Index>(matrix.rows())},
161 cols_{static_cast<Index>(matrix.cols())} {
163 stream_ = stream;
164 copy_to_gpu(matrix);
165 }
166
167 // Allocate memory in the GPU for a matrix
168 CudaMatrix(Index nrows, Index ncols, const cudaStream_t &stream);
169
170 void setZero();
171
172 // Convert A Cudamatrix to an EigenMatrix
173 operator Eigen::MatrixXd() const;
174
175 friend std::ostream &operator<<(std::ostream &out, const CudaMatrix &m);
176
177 private:
178 // Unique pointer with custom delete function
179 using Unique_ptr_to_GPU_data = std::unique_ptr<double, void (*)(double *)>;
180
181 Unique_ptr_to_GPU_data alloc_matrix_in_gpu(size_t size_arr) const;
182
183 void throw_if_not_enough_memory_in_gpu(size_t requested_memory) const;
184
185 size_t size_matrix() const { return this->size() * sizeof(double); }
186
187 // Attributes of the matrix in the device
189 [](double *x) { checkCuda(cudaFree(x)); }};
190 cudaStream_t stream_ = nullptr;
193};
194
195} // namespace xtp
196} // namespace votca
197
198#endif // VOTCA_XTP_CUDAMATRIX_H
double * data() const
Definition cudamatrix.h:77
static constexpr bool transposed()
Definition cudamatrix.h:79
CudaMatrixBlock(const M &mat, Index rowoffset, Index coloffset, Index rows, Index cols)
Definition cudamatrix.h:63
static constexpr bool transposed()
Definition cudamatrix.h:98
CudaMatrixBlock< CudaMatrix > block(Index rowoffset, Index coloffset, Index rows, Index cols) const
Definition cudamatrix.h:123
void reshape(Index rows, Index cols)
Definition cudamatrix.h:112
std::unique_ptr< double, void(*)(double *)> Unique_ptr_to_GPU_data
Definition cudamatrix.h:179
size_t size_matrix() const
Definition cudamatrix.h:185
Unique_ptr_to_GPU_data data_
Definition cudamatrix.h:188
static constexpr bool transposed()
Definition cudamatrix.h:143
friend std::ostream & operator<<(std::ostream &out, const CudaMatrix &m)
CudaMatrixTranspose< CudaMatrix > transpose() const
Definition cudamatrix.h:119
void copy_to_gpu(const T &m)
Definition cudamatrix.h:146
CudaMatrixBlock< CudaMatrix > col(Index col) const
Definition cudamatrix.h:132
void throw_if_not_enough_memory_in_gpu(size_t requested_memory) const
Definition cudamatrix.cc:97
double * data() const
Definition cudamatrix.h:110
CudaMatrixBlock< CudaMatrix > row(Index row) const
Definition cudamatrix.h:128
CudaMatrixBlock< CudaMatrix > middleCols(Index coloffset, Index cols) const
Definition cudamatrix.h:139
CudaMatrixBlock< CudaMatrix > middleRows(Index rowoffset, Index rows) const
Definition cudamatrix.h:136
CudaMatrix(const T &matrix, const cudaStream_t &stream)
Definition cudamatrix.h:159
Unique_ptr_to_GPU_data alloc_matrix_in_gpu(size_t size_arr) const
Definition cudamatrix.cc:87
void checkCublas(cublasStatus_t result)
Definition cudamatrix.cc:32
std::string cudaGetErrorEnum(cublasStatus_t error)
Definition cudamatrix.cc:39
Index count_available_gpus()
Definition cudamatrix.cc:65
std::string OutputDimension(const M &mat)
Definition cudamatrix.h:53
void checkCuda(cudaError_t result)
Definition cudamatrix.cc:25
base class for all analysis tools
Definition basebead.h:33
Eigen::Index Index
Definition types.h:26