Skip to content
GitLab
Explore
Sign in
Hide whitespace changes
Inline
Side-by-side
Some changes are not shown.
For a faster browsing experience, only
2 of 182+
files are shown. Download one of the files below to see all changes.
external/eigen3/Eigen/src/Core/products/GeneralMatrixVector_
MKL
.h
→
external/eigen3/Eigen/src/Core/products/GeneralMatrixVector_
BLAS
.h
View file @
a394b22a
...
...
@@ -25,13 +25,13 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to
Intel(R) MKL
* Content : Eigen bindings to
BLAS F77
* General matrix-vector product functionality based on ?GEMV.
********************************************************************************
*/
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_
MKL
_H
#define EIGEN_GENERAL_MATRIX_VECTOR_
MKL
_H
#ifndef EIGEN_GENERAL_MATRIX_VECTOR_
BLAS
_H
#define EIGEN_GENERAL_MATRIX_VECTOR_
BLAS
_H
namespace
Eigen
{
...
...
@@ -46,47 +46,46 @@ namespace internal {
// gemv specialization
template
<
typename
Index
,
typename
LhsScalar
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
typename
RhsScalar
,
bool
ConjugateRhs
>
struct
general_matrix_vector_product_gemv
:
general_matrix_vector_product
<
Index
,
LhsScalar
,
LhsStorageOrder
,
ConjugateLhs
,
RhsScalar
,
ConjugateRhs
,
BuiltIn
>
{};
template
<
typename
Index
,
typename
LhsScalar
,
int
StorageOrder
,
bool
ConjugateLhs
,
typename
RhsScalar
,
bool
ConjugateRhs
>
struct
general_matrix_vector_product_gemv
;
#define EIGEN_
MKL
_GEMV_SPECIALIZE(Scalar) \
#define EIGEN_
BLAS
_GEMV_SPECIALIZE(Scalar) \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,
ColMajor,ConjugateLhs,Scalar
,ConjugateRhs,Specialized> { \
struct general_matrix_vector_product<Index,Scalar,
const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>
,ConjugateRhs,Specialized> { \
static void run( \
Index rows, Index cols, \
const
Scalar* lhs, Index lhsStride
, \
const
Scalar* rhs, Index rhsIncr
, \
const
const_blas_data_mapper<Scalar,Index,ColMajor> &lhs
, \
const
const_blas_data_mapper<Scalar,Index,RowMajor> &rhs
, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
if (ConjugateLhs) { \
general_matrix_vector_product<Index,Scalar,
ColMajor,ConjugateLhs,Scalar
,ConjugateRhs,BuiltIn>::run( \
rows, cols, lhs,
lhsStride, rhs, rhsIncr
, res, resIncr, alpha); \
general_matrix_vector_product<Index,Scalar,
const_blas_data_mapper<Scalar,Index,ColMajor>,ColMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,RowMajor>
,ConjugateRhs,BuiltIn>::run( \
rows, cols, lhs,
rhs
, res, resIncr, alpha); \
} else { \
general_matrix_vector_product_gemv<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhs
S
tride, rhs
, rhsIncr
, res, resIncr, alpha); \
rows, cols, lhs
.data()
, lhs
.s
tride
()
, rhs
.data(), rhs.stride()
, res, resIncr, alpha); \
} \
} \
}; \
template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product<Index,Scalar,
RowMajor,ConjugateLhs,Scalar
,ConjugateRhs,Specialized> { \
struct general_matrix_vector_product<Index,Scalar,
const_blas_data_mapper<Scalar,Index,RowMajor>,RowMajor,ConjugateLhs,Scalar,const_blas_data_mapper<Scalar,Index,ColMajor>
,ConjugateRhs,Specialized> { \
static void run( \
Index rows, Index cols, \
const
Scalar* lhs, Index lhsStride
, \
const
Scalar* rhs, Index rhsIncr
, \
const
const_blas_data_mapper<Scalar,Index,RowMajor> &lhs
, \
const
const_blas_data_mapper<Scalar,Index,ColMajor> &rhs
, \
Scalar* res, Index resIncr, Scalar alpha) \
{ \
general_matrix_vector_product_gemv<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs>::run( \
rows, cols, lhs, lhs
S
tride, rhs
, rhsIncr
, res, resIncr, alpha); \
rows, cols, lhs
.data()
, lhs
.s
tride
()
, rhs
.data(), rhs.stride()
, res, resIncr, alpha); \
} \
}; \
EIGEN_
MKL
_GEMV_SPECIALIZE
(
double
)
EIGEN_
MKL
_GEMV_SPECIALIZE
(
float
)
EIGEN_
MKL
_GEMV_SPECIALIZE
(
dcomplex
)
EIGEN_
MKL
_GEMV_SPECIALIZE
(
scomplex
)
EIGEN_
BLAS
_GEMV_SPECIALIZE
(
double
)
EIGEN_
BLAS
_GEMV_SPECIALIZE
(
float
)
EIGEN_
BLAS
_GEMV_SPECIALIZE
(
dcomplex
)
EIGEN_
BLAS
_GEMV_SPECIALIZE
(
scomplex
)
#define EIGEN_
MKL
_GEMV_SPECIALIZATION(EIGTYPE,
MKL
TYPE,
MKL
PREFIX) \
#define EIGEN_
BLAS
_GEMV_SPECIALIZATION(EIGTYPE,
BLAS
TYPE,
BLAS
PREFIX) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \
...
...
@@ -98,16 +97,15 @@ static void run( \
const EIGTYPE* rhs, Index rhsIncr, \
EIGTYPE* res, Index resIncr, EIGTYPE alpha) \
{ \
MKL_INT m=rows, n=cols, lda=lhsStride, incx=rhsIncr, incy=resIncr; \
MKLTYPE alpha_, beta_; \
const EIGTYPE *x_ptr, myone(1); \
BlasIndex m=convert_index<BlasIndex>(rows), n=convert_index<BlasIndex>(cols), \
lda=convert_index<BlasIndex>(lhsStride), incx=convert_index<BlasIndex>(rhsIncr), incy=convert_index<BlasIndex>(resIncr); \
const EIGTYPE beta(1); \
const EIGTYPE *x_ptr; \
char trans=(LhsStorageOrder==ColMajor) ? 'N' : (ConjugateLhs) ? 'C' : 'T'; \
if (LhsStorageOrder==RowMajor) { \
m
=
cols; \
n
=
rows; \
m
= convert_index<BlasIndex>(
cols
)
; \
n
= convert_index<BlasIndex>(
rows
)
; \
}\
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
GEMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const GEMVVector, 0, InnerStride<> > map_x(rhs,cols,1,InnerStride<>(incx)); \
...
...
@@ -115,17 +113,17 @@ static void run( \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=rhs; \
MKL
PREFIX##gemv(&trans, &m, &n, &alpha
_
, (const
MKL
TYPE*)lhs, &lda, (const
MKL
TYPE*)x_ptr, &incx, &beta
_
, (
MKL
TYPE*)res, &incy); \
BLAS
PREFIX##gemv
_
(&trans, &m, &n, &
numext::real_ref(
alpha
)
, (const
BLAS
TYPE*)lhs, &lda, (const
BLAS
TYPE*)x_ptr, &incx, &
numext::real_ref(
beta
)
, (
BLAS
TYPE*)res, &incy); \
}\
};
EIGEN_
MKL
_GEMV_SPECIALIZATION
(
double
,
double
,
d
)
EIGEN_
MKL
_GEMV_SPECIALIZATION
(
float
,
float
,
s
)
EIGEN_
MKL
_GEMV_SPECIALIZATION
(
dcomplex
,
MKL_Complex16
,
z
)
EIGEN_
MKL
_GEMV_SPECIALIZATION
(
scomplex
,
MKL_Complex8
,
c
)
EIGEN_
BLAS
_GEMV_SPECIALIZATION
(
double
,
double
,
d
)
EIGEN_
BLAS
_GEMV_SPECIALIZATION
(
float
,
float
,
s
)
EIGEN_
BLAS
_GEMV_SPECIALIZATION
(
dcomplex
,
double
,
z
)
EIGEN_
BLAS
_GEMV_SPECIALIZATION
(
scomplex
,
float
,
c
)
}
// end namespase internal
}
// end namespace Eigen
#endif // EIGEN_GENERAL_MATRIX_VECTOR_
MKL
_H
#endif // EIGEN_GENERAL_MATRIX_VECTOR_
BLAS
_H
external/eigen3/Eigen/src/Core/products/Parallelizer.h
View file @
a394b22a
...
...
@@ -10,7 +10,7 @@
#ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
namespace
Eigen
{
namespace
Eigen
{
namespace
internal
{
...
...
@@ -49,8 +49,8 @@ inline void initParallel()
{
int
nbt
;
internal
::
manage_multi_threading
(
GetAction
,
&
nbt
);
std
::
ptrdiff_t
l1
,
l2
;
internal
::
manage_caching_sizes
(
GetAction
,
&
l1
,
&
l2
);
std
::
ptrdiff_t
l1
,
l2
,
l3
;
internal
::
manage_caching_sizes
(
GetAction
,
&
l1
,
&
l2
,
&
l3
);
}
/** \returns the max number of threads reserved for Eigen
...
...
@@ -73,17 +73,17 @@ namespace internal {
template
<
typename
Index
>
struct
GemmParallelInfo
{
GemmParallelInfo
()
:
sync
(
-
1
),
users
(
0
),
r
hs_start
(
0
),
r
hs_length
(
0
)
{}
GemmParallelInfo
()
:
sync
(
-
1
),
users
(
0
),
l
hs_start
(
0
),
l
hs_length
(
0
)
{}
int
volatile
sync
;
Index
volatile
sync
;
int
volatile
users
;
Index
r
hs_start
;
Index
r
hs_length
;
Index
l
hs_start
;
Index
l
hs_length
;
};
template
<
bool
Condition
,
typename
Functor
,
typename
Index
>
void
parallelize_gemm
(
const
Functor
&
func
,
Index
rows
,
Index
cols
,
bool
transpose
)
void
parallelize_gemm
(
const
Functor
&
func
,
Index
rows
,
Index
cols
,
Index
depth
,
bool
transpose
)
{
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
...
...
@@ -92,6 +92,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole
// parallelizer mechanism has to be redisigned anyway.
EIGEN_UNUSED_VARIABLE
(
depth
);
EIGEN_UNUSED_VARIABLE
(
transpose
);
func
(
0
,
rows
,
0
,
cols
);
#else
...
...
@@ -102,56 +103,56 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
// - we are not already in a parallel code
// - the sizes are large enough
//
1- are we already in a parallel session?
//
FIXME omp_get_num_threads()>1 only works for openmp, w
hat
if
the
user does not use openmp?
if
((
!
Condition
)
||
(
omp_get_num_threads
()
>
1
))
return
func
(
0
,
rows
,
0
,
cols
);
//
compute the maximal number of threads from the size of the product:
//
This first heuristic takes into account t
hat the
product kernel is fully optimized when working with nr columns at once.
Index
size
=
transpose
?
rows
:
cols
;
Index
pb_max_threads
=
std
::
max
<
Index
>
(
1
,
size
/
Functor
::
Traits
::
nr
);
Index
size
=
transpose
?
cols
:
rows
;
// compute the maximal number of threads from the total amount of work:
double
work
=
static_cast
<
double
>
(
rows
)
*
static_cast
<
double
>
(
cols
)
*
static_cast
<
double
>
(
depth
);
double
kMinTaskSize
=
50000
;
// FIXME improve this heuristic.
pb_max_threads
=
std
::
max
<
Index
>
(
1
,
std
::
min
<
Index
>
(
pb_max_threads
,
work
/
kMinTaskSize
));
// 2- compute the maximal number of threads from the size of the product:
// FIXME this has to be fine tuned
Index
max_threads
=
std
::
max
<
Index
>
(
1
,
size
/
32
);
// compute the number of threads we are going to use
Index
threads
=
std
::
min
<
Index
>
(
nbThreads
(),
pb_max_threads
);
//
3 - compute the number of threads we are going to use
Index
threads
=
std
::
min
<
Index
>
(
nbThreads
(),
max_
thread
s
);
if
(
threads
==
1
)
//
if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
// then abort multi-
thread
ing
// FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
if
(
(
!
Condition
)
||
(
threads
==
1
)
||
(
omp_get_num_threads
()
>
1
)
)
return
func
(
0
,
rows
,
0
,
cols
);
Eigen
::
initParallel
();
func
.
initParallelSession
();
func
.
initParallelSession
(
threads
);
if
(
transpose
)
std
::
swap
(
rows
,
cols
);
GemmParallelInfo
<
Index
>*
info
=
new
GemmParallelInfo
<
Index
>
[
threads
]
;
ei_declare_aligned_stack_constructed_variable
(
GemmParallelInfo
<
Index
>
,
info
,
threads
,
0
)
;
#pragma omp parallel num_threads(threads)
{
Index
i
=
omp_get_thread_num
();
// Note that the actual number of threads might be lower than the number of request ones.
Index
actual_threads
=
omp_get_num_threads
();
Index
blockCols
=
(
cols
/
actual_threads
)
&
~
Index
(
0x3
);
Index
blockRows
=
(
rows
/
actual_threads
)
&
~
Index
(
0x7
);
Index
blockRows
=
(
rows
/
actual_threads
);
blockRows
=
(
blockRows
/
Functor
::
Traits
::
mr
)
*
Functor
::
Traits
::
mr
;
Index
r0
=
i
*
blockRows
;
Index
actualBlockRows
=
(
i
+
1
==
actual_threads
)
?
rows
-
r0
:
blockRows
;
Index
c0
=
i
*
blockCols
;
Index
actualBlockCols
=
(
i
+
1
==
actual_threads
)
?
cols
-
c0
:
blockCols
;
info
[
i
].
r
hs_start
=
c
0
;
info
[
i
].
r
hs_length
=
actualBlock
Col
s
;
info
[
i
].
l
hs_start
=
r
0
;
info
[
i
].
l
hs_length
=
actualBlock
Row
s
;
if
(
transpose
)
func
(
0
,
cols
,
r0
,
actualBlockRows
,
info
);
else
func
(
r0
,
actualBlockRows
,
0
,
cols
,
info
);
if
(
transpose
)
func
(
c0
,
actualBlockCols
,
0
,
rows
,
info
);
else
func
(
0
,
rows
,
c0
,
actualBlockCols
,
info
);
}
delete
[]
info
;
#endif
}
...
...
Prev
1
…
6
7
8
9
10
Next