Skip to content
GitLab
Explore
Sign in
Hide whitespace changes
Inline
Side-by-side
Some changes are not shown.
For a faster browsing experience, only
2 of 182+
files are shown. Download one of the files below to see all changes.
external/eigen3/Eigen/src/Core/products/Parallelizer.h
View file @
a394b22a
...
@@ -10,7 +10,7 @@
...
@@ -10,7 +10,7 @@
#ifndef EIGEN_PARALLELIZER_H
#ifndef EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
#define EIGEN_PARALLELIZER_H
namespace
Eigen
{
namespace
Eigen
{
namespace
internal
{
namespace
internal
{
...
@@ -49,8 +49,8 @@ inline void initParallel()
...
@@ -49,8 +49,8 @@ inline void initParallel()
{
{
int
nbt
;
int
nbt
;
internal
::
manage_multi_threading
(
GetAction
,
&
nbt
);
internal
::
manage_multi_threading
(
GetAction
,
&
nbt
);
std
::
ptrdiff_t
l1
,
l2
;
std
::
ptrdiff_t
l1
,
l2
,
l3
;
internal
::
manage_caching_sizes
(
GetAction
,
&
l1
,
&
l2
);
internal
::
manage_caching_sizes
(
GetAction
,
&
l1
,
&
l2
,
&
l3
);
}
}
/** \returns the max number of threads reserved for Eigen
/** \returns the max number of threads reserved for Eigen
...
@@ -73,17 +73,17 @@ namespace internal {
...
@@ -73,17 +73,17 @@ namespace internal {
template
<
typename
Index
>
struct
GemmParallelInfo
template
<
typename
Index
>
struct
GemmParallelInfo
{
{
GemmParallelInfo
()
:
sync
(
-
1
),
users
(
0
),
r
hs_start
(
0
),
r
hs_length
(
0
)
{}
GemmParallelInfo
()
:
sync
(
-
1
),
users
(
0
),
l
hs_start
(
0
),
l
hs_length
(
0
)
{}
int
volatile
sync
;
Index
volatile
sync
;
int
volatile
users
;
int
volatile
users
;
Index
r
hs_start
;
Index
l
hs_start
;
Index
r
hs_length
;
Index
l
hs_length
;
};
};
template
<
bool
Condition
,
typename
Functor
,
typename
Index
>
template
<
bool
Condition
,
typename
Functor
,
typename
Index
>
void
parallelize_gemm
(
const
Functor
&
func
,
Index
rows
,
Index
cols
,
bool
transpose
)
void
parallelize_gemm
(
const
Functor
&
func
,
Index
rows
,
Index
cols
,
Index
depth
,
bool
transpose
)
{
{
// TODO when EIGEN_USE_BLAS is defined,
// TODO when EIGEN_USE_BLAS is defined,
// we should still enable OMP for other scalar types
// we should still enable OMP for other scalar types
...
@@ -92,6 +92,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
...
@@ -92,6 +92,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
// the matrix product when multithreading is enabled. This is a temporary
// the matrix product when multithreading is enabled. This is a temporary
// fix to support row-major destination matrices. This whole
// fix to support row-major destination matrices. This whole
// parallelizer mechanism has to be redisigned anyway.
// parallelizer mechanism has to be redisigned anyway.
EIGEN_UNUSED_VARIABLE
(
depth
);
EIGEN_UNUSED_VARIABLE
(
transpose
);
EIGEN_UNUSED_VARIABLE
(
transpose
);
func
(
0
,
rows
,
0
,
cols
);
func
(
0
,
rows
,
0
,
cols
);
#else
#else
...
@@ -102,56 +103,56 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
...
@@ -102,56 +103,56 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
// - we are not already in a parallel code
// - we are not already in a parallel code
// - the sizes are large enough
// - the sizes are large enough
//
1- are we already in a parallel session?
//
compute the maximal number of threads from the size of the product:
//
FIXME omp_get_num_threads()>1 only works for openmp, w
hat
if
the
user does not use openmp?
//
This first heuristic takes into account t
hat the
product kernel is fully optimized when working with nr columns at once.
if
((
!
Condition
)
||
(
omp_get_num_threads
()
>
1
))
Index
size
=
transpose
?
rows
:
cols
;
return
func
(
0
,
rows
,
0
,
cols
);
Index
pb_max_threads
=
std
::
max
<
Index
>
(
1
,
size
/
Functor
::
Traits
::
nr
);
Index
size
=
transpose
?
cols
:
rows
;
// compute the maximal number of threads from the total amount of work:
double
work
=
static_cast
<
double
>
(
rows
)
*
static_cast
<
double
>
(
cols
)
*
static_cast
<
double
>
(
depth
);
double
kMinTaskSize
=
50000
;
// FIXME improve this heuristic.
pb_max_threads
=
std
::
max
<
Index
>
(
1
,
std
::
min
<
Index
>
(
pb_max_threads
,
work
/
kMinTaskSize
));
// 2- compute the maximal number of threads from the size of the product:
// compute the number of threads we are going to use
// FIXME this has to be fine tuned
Index
threads
=
std
::
min
<
Index
>
(
nbThreads
(),
pb_max_threads
);
Index
max_threads
=
std
::
max
<
Index
>
(
1
,
size
/
32
);
//
3 - compute the number of threads we are going to use
//
if multi-threading is explicitely disabled, not useful, or if we already are in a parallel session,
Index
threads
=
std
::
min
<
Index
>
(
nbThreads
(),
max_
thread
s
);
// then abort multi-
thread
ing
// FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
if
(
threads
==
1
)
if
(
(
!
Condition
)
||
(
threads
==
1
)
||
(
omp_get_num_threads
()
>
1
)
)
return
func
(
0
,
rows
,
0
,
cols
);
return
func
(
0
,
rows
,
0
,
cols
);
Eigen
::
initParallel
();
Eigen
::
initParallel
();
func
.
initParallelSession
();
func
.
initParallelSession
(
threads
);
if
(
transpose
)
if
(
transpose
)
std
::
swap
(
rows
,
cols
);
std
::
swap
(
rows
,
cols
);
GemmParallelInfo
<
Index
>*
info
=
new
GemmParallelInfo
<
Index
>
[
threads
]
;
ei_declare_aligned_stack_constructed_variable
(
GemmParallelInfo
<
Index
>
,
info
,
threads
,
0
)
;
#pragma omp parallel num_threads(threads)
#pragma omp parallel num_threads(threads)
{
{
Index
i
=
omp_get_thread_num
();
Index
i
=
omp_get_thread_num
();
// Note that the actual number of threads might be lower than the number of request ones.
// Note that the actual number of threads might be lower than the number of request ones.
Index
actual_threads
=
omp_get_num_threads
();
Index
actual_threads
=
omp_get_num_threads
();
Index
blockCols
=
(
cols
/
actual_threads
)
&
~
Index
(
0x3
);
Index
blockCols
=
(
cols
/
actual_threads
)
&
~
Index
(
0x3
);
Index
blockRows
=
(
rows
/
actual_threads
)
&
~
Index
(
0x7
);
Index
blockRows
=
(
rows
/
actual_threads
);
blockRows
=
(
blockRows
/
Functor
::
Traits
::
mr
)
*
Functor
::
Traits
::
mr
;
Index
r0
=
i
*
blockRows
;
Index
r0
=
i
*
blockRows
;
Index
actualBlockRows
=
(
i
+
1
==
actual_threads
)
?
rows
-
r0
:
blockRows
;
Index
actualBlockRows
=
(
i
+
1
==
actual_threads
)
?
rows
-
r0
:
blockRows
;
Index
c0
=
i
*
blockCols
;
Index
c0
=
i
*
blockCols
;
Index
actualBlockCols
=
(
i
+
1
==
actual_threads
)
?
cols
-
c0
:
blockCols
;
Index
actualBlockCols
=
(
i
+
1
==
actual_threads
)
?
cols
-
c0
:
blockCols
;
info
[
i
].
r
hs_start
=
c
0
;
info
[
i
].
l
hs_start
=
r
0
;
info
[
i
].
r
hs_length
=
actualBlock
Col
s
;
info
[
i
].
l
hs_length
=
actualBlock
Row
s
;
if
(
transpose
)
if
(
transpose
)
func
(
c0
,
actualBlockCols
,
0
,
rows
,
info
);
func
(
0
,
cols
,
r0
,
actualBlockRows
,
info
);
else
func
(
0
,
rows
,
c0
,
actualBlockCols
,
info
);
else
func
(
r0
,
actualBlockRows
,
0
,
cols
,
info
);
}
}
delete
[]
info
;
#endif
#endif
}
}
...
...
external/eigen3/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
View file @
a394b22a
...
@@ -15,7 +15,7 @@ namespace Eigen {
...
@@ -15,7 +15,7 @@ namespace Eigen {
namespace
internal
{
namespace
internal
{
// pack a selfadjoint block diagonal for use with the gebp_kernel
// pack a selfadjoint block diagonal for use with the gebp_kernel
template
<
typename
Scalar
,
typename
Index
,
int
Pack1
,
int
Pack2
,
int
StorageOrder
>
template
<
typename
Scalar
,
typename
Index
,
int
Pack1
,
int
Pack2
_dummy
,
int
StorageOrder
>
struct
symm_pack_lhs
struct
symm_pack_lhs
{
{
template
<
int
BlockRows
>
inline
template
<
int
BlockRows
>
inline
...
@@ -45,25 +45,32 @@ struct symm_pack_lhs
...
@@ -45,25 +45,32 @@ struct symm_pack_lhs
}
}
void
operator
()(
Scalar
*
blockA
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
Index
cols
,
Index
rows
)
void
operator
()(
Scalar
*
blockA
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
Index
cols
,
Index
rows
)
{
{
enum
{
PacketSize
=
packet_traits
<
Scalar
>::
size
};
const_blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
>
lhs
(
_lhs
,
lhsStride
);
const_blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
>
lhs
(
_lhs
,
lhsStride
);
Index
count
=
0
;
Index
count
=
0
;
Index
peeled_mc
=
(
rows
/
Pack1
)
*
Pack1
;
//Index peeled_mc3 = (rows/Pack1)*Pack1;
for
(
Index
i
=
0
;
i
<
peeled_mc
;
i
+=
Pack1
)
{
const
Index
peeled_mc3
=
Pack1
>=
3
*
PacketSize
?
(
rows
/
(
3
*
PacketSize
))
*
(
3
*
PacketSize
)
:
0
;
pack
<
Pack1
>
(
blockA
,
lhs
,
cols
,
i
,
count
);
const
Index
peeled_mc2
=
Pack1
>=
2
*
PacketSize
?
peeled_mc3
+
((
rows
-
peeled_mc3
)
/
(
2
*
PacketSize
))
*
(
2
*
PacketSize
)
:
0
;
}
const
Index
peeled_mc1
=
Pack1
>=
1
*
PacketSize
?
(
rows
/
(
1
*
PacketSize
))
*
(
1
*
PacketSize
)
:
0
;
if
(
rows
-
peeled_mc
>=
Pack2
)
if
(
Pack1
>=
3
*
PacketSize
)
{
for
(
Index
i
=
0
;
i
<
peeled_mc3
;
i
+=
3
*
PacketSize
)
pack
<
Pack2
>
(
blockA
,
lhs
,
cols
,
peeled_mc
,
count
);
pack
<
3
*
PacketSize
>
(
blockA
,
lhs
,
cols
,
i
,
count
);
peeled_mc
+=
Pack2
;
}
if
(
Pack1
>=
2
*
PacketSize
)
for
(
Index
i
=
peeled_mc3
;
i
<
peeled_mc2
;
i
+=
2
*
PacketSize
)
pack
<
2
*
PacketSize
>
(
blockA
,
lhs
,
cols
,
i
,
count
);
if
(
Pack1
>=
1
*
PacketSize
)
for
(
Index
i
=
peeled_mc2
;
i
<
peeled_mc1
;
i
+=
1
*
PacketSize
)
pack
<
1
*
PacketSize
>
(
blockA
,
lhs
,
cols
,
i
,
count
);
// do the same with mr==1
// do the same with mr==1
for
(
Index
i
=
peeled_mc
;
i
<
rows
;
i
++
)
for
(
Index
i
=
peeled_mc
1
;
i
<
rows
;
i
++
)
{
{
for
(
Index
k
=
0
;
k
<
i
;
k
++
)
for
(
Index
k
=
0
;
k
<
i
;
k
++
)
blockA
[
count
++
]
=
lhs
(
i
,
k
);
// normal
blockA
[
count
++
]
=
lhs
(
i
,
k
);
// normal
blockA
[
count
++
]
=
numext
::
real
(
lhs
(
i
,
i
));
// real (diagonal)
blockA
[
count
++
]
=
numext
::
real
(
lhs
(
i
,
i
));
// real (diagonal)
...
@@ -82,7 +89,8 @@ struct symm_pack_rhs
...
@@ -82,7 +89,8 @@ struct symm_pack_rhs
Index
end_k
=
k2
+
rows
;
Index
end_k
=
k2
+
rows
;
Index
count
=
0
;
Index
count
=
0
;
const_blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
>
rhs
(
_rhs
,
rhsStride
);
const_blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
>
rhs
(
_rhs
,
rhsStride
);
Index
packet_cols
=
(
cols
/
nr
)
*
nr
;
Index
packet_cols8
=
nr
>=
8
?
(
cols
/
8
)
*
8
:
0
;
Index
packet_cols4
=
nr
>=
4
?
(
cols
/
4
)
*
4
:
0
;
// first part: normal case
// first part: normal case
for
(
Index
j2
=
0
;
j2
<
k2
;
j2
+=
nr
)
for
(
Index
j2
=
0
;
j2
<
k2
;
j2
+=
nr
)
...
@@ -91,79 +99,151 @@ struct symm_pack_rhs
...
@@ -91,79 +99,151 @@ struct symm_pack_rhs
{
{
blockB
[
count
+
0
]
=
rhs
(
k
,
j2
+
0
);
blockB
[
count
+
0
]
=
rhs
(
k
,
j2
+
0
);
blockB
[
count
+
1
]
=
rhs
(
k
,
j2
+
1
);
blockB
[
count
+
1
]
=
rhs
(
k
,
j2
+
1
);
if
(
nr
=
=
4
)
if
(
nr
>
=
4
)
{
{
blockB
[
count
+
2
]
=
rhs
(
k
,
j2
+
2
);
blockB
[
count
+
2
]
=
rhs
(
k
,
j2
+
2
);
blockB
[
count
+
3
]
=
rhs
(
k
,
j2
+
3
);
blockB
[
count
+
3
]
=
rhs
(
k
,
j2
+
3
);
}
}
if
(
nr
>=
8
)
{
blockB
[
count
+
4
]
=
rhs
(
k
,
j2
+
4
);
blockB
[
count
+
5
]
=
rhs
(
k
,
j2
+
5
);
blockB
[
count
+
6
]
=
rhs
(
k
,
j2
+
6
);
blockB
[
count
+
7
]
=
rhs
(
k
,
j2
+
7
);
}
count
+=
nr
;
count
+=
nr
;
}
}
}
}
// second part: diagonal block
// second part: diagonal block
for
(
Index
j2
=
k2
;
j2
<
(
std
::
min
)(
k2
+
rows
,
packet_cols
);
j2
+=
nr
)
Index
end8
=
nr
>=
8
?
(
std
::
min
)(
k2
+
rows
,
packet_cols8
)
:
k2
;
if
(
nr
>=
8
)
{
{
// again we can split vertically in three different parts (transpose, symmetric, normal)
for
(
Index
j2
=
k2
;
j2
<
end8
;
j2
+=
8
)
// transpose
for
(
Index
k
=
k2
;
k
<
j2
;
k
++
)
{
{
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
// again we can split vertically in three different parts (transpose, symmetric, normal)
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
// transpose
if
(
nr
==
4
)
for
(
Index
k
=
k2
;
k
<
j2
;
k
++
)
{
{
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
blockB
[
count
+
4
]
=
numext
::
conj
(
rhs
(
j2
+
4
,
k
));
blockB
[
count
+
5
]
=
numext
::
conj
(
rhs
(
j2
+
5
,
k
));
blockB
[
count
+
6
]
=
numext
::
conj
(
rhs
(
j2
+
6
,
k
));
blockB
[
count
+
7
]
=
numext
::
conj
(
rhs
(
j2
+
7
,
k
));
count
+=
8
;
}
}
count
+=
nr
;
// symmetric
}
Index
h
=
0
;
// symmetric
for
(
Index
k
=
j2
;
k
<
j2
+
8
;
k
++
)
Index
h
=
0
;
{
for
(
Index
k
=
j2
;
k
<
j2
+
nr
;
k
++
)
// normal
{
for
(
Index
w
=
0
;
w
<
h
;
++
w
)
// normal
blockB
[
count
+
w
]
=
rhs
(
k
,
j2
+
w
);
for
(
Index
w
=
0
;
w
<
h
;
++
w
)
blockB
[
count
+
w
]
=
rhs
(
k
,
j2
+
w
);
blockB
[
count
+
h
]
=
numext
::
real
(
rhs
(
k
,
k
));
blockB
[
count
+
h
]
=
numext
::
real
(
rhs
(
k
,
k
));
// transpose
// transpose
for
(
Index
w
=
h
+
1
;
w
<
nr
;
++
w
)
for
(
Index
w
=
h
+
1
;
w
<
8
;
++
w
)
blockB
[
count
+
w
]
=
numext
::
conj
(
rhs
(
j2
+
w
,
k
));
blockB
[
count
+
w
]
=
numext
::
conj
(
rhs
(
j2
+
w
,
k
));
count
+=
nr
;
count
+=
8
;
++
h
;
++
h
;
}
// normal
for
(
Index
k
=
j2
+
8
;
k
<
end_k
;
k
++
)
{
blockB
[
count
+
0
]
=
rhs
(
k
,
j2
+
0
);
blockB
[
count
+
1
]
=
rhs
(
k
,
j2
+
1
);
blockB
[
count
+
2
]
=
rhs
(
k
,
j2
+
2
);
blockB
[
count
+
3
]
=
rhs
(
k
,
j2
+
3
);
blockB
[
count
+
4
]
=
rhs
(
k
,
j2
+
4
);
blockB
[
count
+
5
]
=
rhs
(
k
,
j2
+
5
);
blockB
[
count
+
6
]
=
rhs
(
k
,
j2
+
6
);
blockB
[
count
+
7
]
=
rhs
(
k
,
j2
+
7
);
count
+=
8
;
}
}
}
// normal
}
for
(
Index
k
=
j2
+
nr
;
k
<
end_k
;
k
++
)
if
(
nr
>=
4
)
{
for
(
Index
j2
=
end8
;
j2
<
(
std
::
min
)(
k2
+
rows
,
packet_cols4
);
j2
+=
4
)
{
{
blockB
[
count
+
0
]
=
rhs
(
k
,
j2
+
0
);
// again we can split vertically in three different parts (transpose, symmetric, normal)
blockB
[
count
+
1
]
=
rhs
(
k
,
j2
+
1
);
// transpose
if
(
nr
==
4
)
for
(
Index
k
=
k2
;
k
<
j2
;
k
++
)
{
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
count
+=
4
;
}
// symmetric
Index
h
=
0
;
for
(
Index
k
=
j2
;
k
<
j2
+
4
;
k
++
)
{
{
// normal
for
(
Index
w
=
0
;
w
<
h
;
++
w
)
blockB
[
count
+
w
]
=
rhs
(
k
,
j2
+
w
);
blockB
[
count
+
h
]
=
numext
::
real
(
rhs
(
k
,
k
));
// transpose
for
(
Index
w
=
h
+
1
;
w
<
4
;
++
w
)
blockB
[
count
+
w
]
=
numext
::
conj
(
rhs
(
j2
+
w
,
k
));
count
+=
4
;
++
h
;
}
// normal
for
(
Index
k
=
j2
+
4
;
k
<
end_k
;
k
++
)
{
blockB
[
count
+
0
]
=
rhs
(
k
,
j2
+
0
);
blockB
[
count
+
1
]
=
rhs
(
k
,
j2
+
1
);
blockB
[
count
+
2
]
=
rhs
(
k
,
j2
+
2
);
blockB
[
count
+
2
]
=
rhs
(
k
,
j2
+
2
);
blockB
[
count
+
3
]
=
rhs
(
k
,
j2
+
3
);
blockB
[
count
+
3
]
=
rhs
(
k
,
j2
+
3
);
count
+=
4
;
}
}
count
+=
nr
;
}
}
}
}
// third part: transposed
// third part: transposed
for
(
Index
j2
=
k2
+
rows
;
j2
<
packet_cols
;
j2
+=
nr
)
if
(
nr
>=
8
)
{
{
for
(
Index
k
=
k2
;
k
<
end_k
;
k
++
)
for
(
Index
j2
=
k2
+
rows
;
j2
<
packet_cols8
;
j2
+=
8
)
{
{
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
for
(
Index
k
=
k2
;
k
<
end_k
;
k
++
)
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
if
(
nr
==
4
)
{
{
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
blockB
[
count
+
4
]
=
numext
::
conj
(
rhs
(
j2
+
4
,
k
));
blockB
[
count
+
5
]
=
numext
::
conj
(
rhs
(
j2
+
5
,
k
));
blockB
[
count
+
6
]
=
numext
::
conj
(
rhs
(
j2
+
6
,
k
));
blockB
[
count
+
7
]
=
numext
::
conj
(
rhs
(
j2
+
7
,
k
));
count
+=
8
;
}
}
}
if
(
nr
>=
4
)
{
for
(
Index
j2
=
(
std
::
max
)(
packet_cols8
,
k2
+
rows
);
j2
<
packet_cols4
;
j2
+=
4
)
{
for
(
Index
k
=
k2
;
k
<
end_k
;
k
++
)
{
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
count
+=
4
;
}
}
count
+=
nr
;
}
}
}
}
// copy the remaining columns one at a time (=> the same with nr==1)
// copy the remaining columns one at a time (=> the same with nr==1)
for
(
Index
j2
=
packet_cols
;
j2
<
cols
;
++
j2
)
for
(
Index
j2
=
packet_cols
4
;
j2
<
cols
;
++
j2
)
{
{
// transpose
// transpose
Index
half
=
(
std
::
min
)(
end_k
,
j2
);
Index
half
=
(
std
::
min
)(
end_k
,
j2
);
...
@@ -211,7 +291,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
...
@@ -211,7 +291,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
const
Scalar
*
lhs
,
Index
lhsStride
,
const
Scalar
*
lhs
,
Index
lhsStride
,
const
Scalar
*
rhs
,
Index
rhsStride
,
const
Scalar
*
rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
*
res
,
Index
resStride
,
const
Scalar
&
alpha
)
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
{
product_selfadjoint_matrix
<
Scalar
,
Index
,
product_selfadjoint_matrix
<
Scalar
,
Index
,
EIGEN_LOGICAL_XOR
(
RhsSelfAdjoint
,
RhsStorageOrder
==
RowMajor
)
?
ColMajor
:
RowMajor
,
EIGEN_LOGICAL_XOR
(
RhsSelfAdjoint
,
RhsStorageOrder
==
RowMajor
)
?
ColMajor
:
RowMajor
,
...
@@ -219,7 +299,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
...
@@ -219,7 +299,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
EIGEN_LOGICAL_XOR
(
LhsSelfAdjoint
,
LhsStorageOrder
==
RowMajor
)
?
ColMajor
:
RowMajor
,
EIGEN_LOGICAL_XOR
(
LhsSelfAdjoint
,
LhsStorageOrder
==
RowMajor
)
?
ColMajor
:
RowMajor
,
LhsSelfAdjoint
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
LhsSelfAdjoint
,
ConjugateLhs
),
LhsSelfAdjoint
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
LhsSelfAdjoint
,
ConjugateLhs
),
ColMajor
>
ColMajor
>
::
run
(
cols
,
rows
,
rhs
,
rhsStride
,
lhs
,
lhsStride
,
res
,
resStride
,
alpha
);
::
run
(
cols
,
rows
,
rhs
,
rhsStride
,
lhs
,
lhsStride
,
res
,
resStride
,
alpha
,
blocking
);
}
}
};
};
...
@@ -234,7 +314,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
...
@@ -234,7 +314,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
*
res
,
Index
resStride
,
const
Scalar
&
alpha
);
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
);
};
};
template
<
typename
Scalar
,
typename
Index
,
template
<
typename
Scalar
,
typename
Index
,
...
@@ -244,33 +324,35 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
...
@@ -244,33 +324,35 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
Index
rows
,
Index
cols
,
Index
rows
,
Index
cols
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
*
_
res
,
Index
resStride
,
const
Scalar
&
alpha
)
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
{
Index
size
=
rows
;
Index
size
=
rows
;
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
lhs
(
_lhs
,
lhsStride
);
const_blas_data_mapper
<
Scalar
,
Index
,
RhsStorageOrder
>
rhs
(
_rhs
,
rhsStride
);
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
Index
kc
=
size
;
// cache block size along the K direction
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
LhsMapper
;
Index
mc
=
rows
;
// cache block size along the M direction
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
(
LhsStorageOrder
==
RowMajor
)
?
ColMajor
:
RowMajor
>
LhsTransposeMapper
;
Index
nc
=
cols
;
// cache block size along the N direction
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
RhsStorageOrder
>
RhsMapper
;
computeProductBlockingSizes
<
Scalar
,
Scalar
>
(
kc
,
mc
,
nc
);
typedef
blas_data_mapper
<
typename
Traits
::
ResScalar
,
Index
,
ColMajor
>
ResMapper
;
// kc must smaller than mc
LhsMapper
lhs
(
_lhs
,
lhsStride
);
LhsTransposeMapper
lhs_transpose
(
_lhs
,
lhsStride
);
RhsMapper
rhs
(
_rhs
,
rhsStride
);
ResMapper
res
(
_res
,
resStride
);
Index
kc
=
blocking
.
kc
();
// cache block size along the K direction
Index
mc
=
(
std
::
min
)(
rows
,
blocking
.
mc
());
// cache block size along the M direction
// kc must be smaller than mc
kc
=
(
std
::
min
)(
kc
,
mc
);
kc
=
(
std
::
min
)(
kc
,
mc
);
std
::
size_t
sizeA
=
kc
*
mc
;
std
::
size_t
sizeB
=
kc
*
cols
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
sizeA
,
blocking
.
blockA
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockB
,
sizeB
,
blocking
.
blockB
());
std
::
size_t
sizeW
=
kc
*
Traits
::
WorkSpaceFactor
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
ResMapper
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
std
::
size_t
sizeB
=
sizeW
+
kc
*
cols
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
kc
*
mc
,
0
);
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
allocatedBlockB
,
sizeB
,
0
);
Scalar
*
blockB
=
allocatedBlockB
+
sizeW
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
symm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
symm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
RhsMapper
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
==
RowMajor
?
ColMajor
:
RowMajor
,
true
>
pack_lhs_transposed
;
gemm_pack_lhs
<
Scalar
,
Index
,
LhsTransposeMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
==
RowMajor
?
ColMajor
:
RowMajor
,
true
>
pack_lhs_transposed
;
for
(
Index
k2
=
0
;
k2
<
size
;
k2
+=
kc
)
for
(
Index
k2
=
0
;
k2
<
size
;
k2
+=
kc
)
{
{
...
@@ -279,7 +361,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
...
@@ -279,7 +361,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
// we have selected one row panel of rhs and one column panel of lhs
// we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory
// pack rhs's panel into a sequential chunk of memory
// and expand each coeff to a constant packet for further reuse
// and expand each coeff to a constant packet for further reuse
pack_rhs
(
blockB
,
&
rhs
(
k2
,
0
),
rhsStride
,
actual_kc
,
cols
);
pack_rhs
(
blockB
,
rhs
.
getSubMapper
(
k2
,
0
)
,
actual_kc
,
cols
);
// the select lhs's panel has to be split in three different parts:
// the select lhs's panel has to be split in three different parts:
// 1 - the transposed panel above the diagonal block => transposed packed copy
// 1 - the transposed panel above the diagonal block => transposed packed copy
...
@@ -289,9 +371,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
...
@@ -289,9 +371,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
{
{
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
k2
)
-
i2
;
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
k2
)
-
i2
;
// transposed packed copy
// transposed packed copy
pack_lhs_transposed
(
blockA
,
&
lhs
(
k
2
,
i
2
),
lhsStride
,
actual_kc
,
actual_mc
);
pack_lhs_transposed
(
blockA
,
lhs
_transpose
.
getSubMapper
(
i
2
,
k
2
),
actual_kc
,
actual_mc
);
gebp_kernel
(
res
+
i2
,
resStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
gebp_kernel
(
res
.
getSubMapper
(
i2
,
0
)
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
}
}
// the block diagonal
// the block diagonal
{
{
...
@@ -299,16 +381,16 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
...
@@ -299,16 +381,16 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
// symmetric packed copy
// symmetric packed copy
pack_lhs
(
blockA
,
&
lhs
(
k2
,
k2
),
lhsStride
,
actual_kc
,
actual_mc
);
pack_lhs
(
blockA
,
&
lhs
(
k2
,
k2
),
lhsStride
,
actual_kc
,
actual_mc
);
gebp_kernel
(
res
+
k2
,
resStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
gebp_kernel
(
res
.
getSubMapper
(
k2
,
0
)
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
}
}
for
(
Index
i2
=
k2
+
kc
;
i2
<
size
;
i2
+=
mc
)
for
(
Index
i2
=
k2
+
kc
;
i2
<
size
;
i2
+=
mc
)
{
{
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
size
)
-
i2
;
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
size
)
-
i2
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
,
false
>
()
gemm_pack_lhs
<
Scalar
,
Index
,
LhsMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
,
false
>
()
(
blockA
,
&
lhs
(
i2
,
k2
),
lhsStride
,
actual_kc
,
actual_mc
);
(
blockA
,
lhs
.
getSubMapper
(
i2
,
k2
)
,
actual_kc
,
actual_mc
);
gebp_kernel
(
res
+
i2
,
resStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
gebp_kernel
(
res
.
getSubMapper
(
i2
,
0
)
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
}
}
}
}
}
}
...
@@ -325,7 +407,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
...
@@ -325,7 +407,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
*
res
,
Index
resStride
,
const
Scalar
&
alpha
);
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
);
};
};
template
<
typename
Scalar
,
typename
Index
,
template
<
typename
Scalar
,
typename
Index
,
...
@@ -335,27 +417,27 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
...
@@ -335,27 +417,27 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
Index
rows
,
Index
cols
,
Index
rows
,
Index
cols
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
*
_
res
,
Index
resStride
,
const
Scalar
&
alpha
)
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
{
Index
size
=
cols
;
Index
size
=
cols
;
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
lhs
(
_lhs
,
lhsStride
);
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
Index
kc
=
size
;
// cache block size along the K direction
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
LhsMapper
;
Index
mc
=
rows
;
// cache block size along the M direction
typedef
blas_data_mapper
<
typename
Traits
::
ResScalar
,
Index
,
ColMajor
>
ResMapper
;
Index
nc
=
cols
;
// cache block size along the N direction
LhsMapper
lhs
(
_lhs
,
lhsStride
);
computeProductBlockingSizes
<
Scalar
,
Scalar
>
(
kc
,
mc
,
nc
);
ResMapper
res
(
_res
,
resStride
);
std
::
size_t
sizeW
=
kc
*
Traits
::
WorkSpaceFactor
;
std
::
size_t
sizeB
=
sizeW
+
kc
*
cols
;
Index
kc
=
blocking
.
kc
();
// cache block size along the K direction
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
kc
*
mc
,
0
);
Index
mc
=
(
std
::
min
)(
rows
,
blocking
.
mc
());
// cache block size along the M direction
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
allocatedBlockB
,
sizeB
,
0
);
std
::
size_t
sizeA
=
kc
*
mc
;
Scalar
*
blockB
=
allocatedBlockB
+
sizeW
;
std
::
size_t
sizeB
=
kc
*
cols
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
sizeA
,
blocking
.
blockA
());
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockB
,
sizeB
,
blocking
.
blockB
());
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
ResMapper
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
gemm_pack_lhs
<
Scalar
,
Index
,
LhsMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
symm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
symm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
for
(
Index
k2
=
0
;
k2
<
size
;
k2
+=
kc
)
for
(
Index
k2
=
0
;
k2
<
size
;
k2
+=
kc
)
...
@@ -368,9 +450,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
...
@@ -368,9 +450,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
for
(
Index
i2
=
0
;
i2
<
rows
;
i2
+=
mc
)
for
(
Index
i2
=
0
;
i2
<
rows
;
i2
+=
mc
)
{
{
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
rows
)
-
i2
;
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
rows
)
-
i2
;
pack_lhs
(
blockA
,
&
lhs
(
i2
,
k2
),
lhsStride
,
actual_kc
,
actual_mc
);
pack_lhs
(
blockA
,
lhs
.
getSubMapper
(
i2
,
k2
)
,
actual_kc
,
actual_mc
);
gebp_kernel
(
res
+
i2
,
resStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
gebp_kernel
(
res
.
getSubMapper
(
i2
,
0
)
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
}
}
}
}
}
}
...
@@ -382,55 +464,58 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
...
@@ -382,55 +464,58 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
***************************************************************************/
***************************************************************************/
namespace
internal
{
namespace
internal
{
template
<
typename
Lhs
,
int
LhsMode
,
typename
Rhs
,
int
RhsMode
>
template
<
typename
Lhs
,
int
LhsMode
,
typename
Rhs
,
int
RhsMode
>
struct
traits
<
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
>
struct
selfadjoint_product_impl
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
:
traits
<
ProductBase
<
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
,
Lhs
,
Rhs
>
>
{};
}
template
<
typename
Lhs
,
int
LhsMode
,
typename
Rhs
,
int
RhsMode
>
struct
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
:
public
ProductBase
<
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
,
Lhs
,
Rhs
>
{
{
EIGEN_PRODUCT_PUBLIC_INTERFACE
(
SelfadjointProductMatrix
)
typedef
typename
Product
<
Lhs
,
Rhs
>::
Scalar
Scalar
;
SelfadjointProductMatrix
(
const
Lhs
&
lhs
,
const
Rhs
&
rhs
)
:
Base
(
lhs
,
rhs
)
{}
typedef
internal
::
blas_traits
<
Lhs
>
LhsBlasTraits
;
typedef
typename
LhsBlasTraits
::
DirectLinearAccessType
ActualLhsType
;
typedef
internal
::
blas_traits
<
Rhs
>
RhsBlasTraits
;
typedef
typename
RhsBlasTraits
::
DirectLinearAccessType
ActualRhsType
;
enum
{
enum
{
LhsIsUpper
=
(
LhsMode
&
(
Upper
|
Lower
))
==
Upper
,
LhsIsUpper
=
(
LhsMode
&
(
Upper
|
Lower
))
==
Upper
,
LhsIsSelfAdjoint
=
(
LhsMode
&
SelfAdjoint
)
==
SelfAdjoint
,
LhsIsSelfAdjoint
=
(
LhsMode
&
SelfAdjoint
)
==
SelfAdjoint
,
RhsIsUpper
=
(
RhsMode
&
(
Upper
|
Lower
))
==
Upper
,
RhsIsUpper
=
(
RhsMode
&
(
Upper
|
Lower
))
==
Upper
,
RhsIsSelfAdjoint
=
(
RhsMode
&
SelfAdjoint
)
==
SelfAdjoint
RhsIsSelfAdjoint
=
(
RhsMode
&
SelfAdjoint
)
==
SelfAdjoint
};
};
template
<
typename
Dest
>
void
scaleAndAddTo
(
Dest
&
dst
,
const
Scalar
&
alpha
)
const
template
<
typename
Dest
>
static
void
run
(
Dest
&
dst
,
const
Lhs
&
a_lhs
,
const
Rhs
&
a_rhs
,
const
Scalar
&
alpha
)
{
{
eigen_assert
(
dst
.
rows
()
==
m
_lhs
.
rows
()
&&
dst
.
cols
()
==
m
_rhs
.
cols
());
eigen_assert
(
dst
.
rows
()
==
a
_lhs
.
rows
()
&&
dst
.
cols
()
==
a
_rhs
.
cols
());
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
lhs
=
LhsBlasTraits
::
extract
(
m
_lhs
);
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
lhs
=
LhsBlasTraits
::
extract
(
a
_lhs
);
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
rhs
=
RhsBlasTraits
::
extract
(
m
_rhs
);
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
rhs
=
RhsBlasTraits
::
extract
(
a
_rhs
);
Scalar
actualAlpha
=
alpha
*
LhsBlasTraits
::
extractScalarFactor
(
m_lhs
)
Scalar
actualAlpha
=
alpha
*
LhsBlasTraits
::
extractScalarFactor
(
a_lhs
)
*
RhsBlasTraits
::
extractScalarFactor
(
m_rhs
);
*
RhsBlasTraits
::
extractScalarFactor
(
a_rhs
);
typedef
internal
::
gemm_blocking_space
<
(
Dest
::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
Scalar
,
Scalar
,
Lhs
::
MaxRowsAtCompileTime
,
Rhs
::
MaxColsAtCompileTime
,
Lhs
::
MaxColsAtCompileTime
,
1
>
BlockingType
;
BlockingType
blocking
(
lhs
.
rows
(),
rhs
.
cols
(),
lhs
.
cols
(),
1
,
false
);
internal
::
product_selfadjoint_matrix
<
Scalar
,
Index
,
internal
::
product_selfadjoint_matrix
<
Scalar
,
Index
,
EIGEN_LOGICAL_XOR
(
LhsIsUpper
,
EIGEN_LOGICAL_XOR
(
LhsIsUpper
,
internal
::
traits
<
Lhs
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
LhsIsSelfAdjoint
,
internal
::
traits
<
Lhs
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
LhsIsSelfAdjoint
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
LhsIsUpper
,
bool
(
LhsBlasTraits
::
NeedToConjugate
)),
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
LhsIsUpper
,
bool
(
LhsBlasTraits
::
NeedToConjugate
)),
EIGEN_LOGICAL_XOR
(
RhsIsUpper
,
EIGEN_LOGICAL_XOR
(
RhsIsUpper
,
internal
::
traits
<
Rhs
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
RhsIsSelfAdjoint
,
internal
::
traits
<
Rhs
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
RhsIsSelfAdjoint
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
RhsIsUpper
,
bool
(
RhsBlasTraits
::
NeedToConjugate
)),
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
RhsIsUpper
,
bool
(
RhsBlasTraits
::
NeedToConjugate
)),
internal
::
traits
<
Dest
>::
Flags
&
RowMajorBit
?
RowMajor
:
ColMajor
>
internal
::
traits
<
Dest
>::
Flags
&
RowMajorBit
?
RowMajor
:
ColMajor
>
::
run
(
::
run
(
lhs
.
rows
(),
rhs
.
cols
(),
// sizes
lhs
.
rows
(),
rhs
.
cols
(),
// sizes
&
lhs
.
coeffRef
(
0
,
0
),
lhs
.
outerStride
(),
// lhs info
&
lhs
.
coeffRef
(
0
,
0
),
lhs
.
outerStride
(),
// lhs info
&
rhs
.
coeffRef
(
0
,
0
),
rhs
.
outerStride
(),
// rhs info
&
rhs
.
coeffRef
(
0
,
0
),
rhs
.
outerStride
(),
// rhs info
&
dst
.
coeffRef
(
0
,
0
),
dst
.
outerStride
(),
// result info
&
dst
.
coeffRef
(
0
,
0
),
dst
.
outerStride
(),
// result info
actualAlpha
// alpha
actualAlpha
,
blocking
// alpha
);
);
}
}
};
};
}
// end namespace internal
}
// end namespace Eigen
}
// end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
Prev
1
…
6
7
8
9
10
Next