Skip to content
...@@ -14,20 +14,6 @@ ...@@ -14,20 +14,6 @@
namespace Eigen { namespace Eigen {
/** \class Reverse
* \ingroup Core_Module
*
* \brief Expression of the reverse of a vector or matrix
*
* \param MatrixType the type of the object of which we are taking the reverse
*
* This class represents an expression of the reverse of a vector.
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
*/
namespace internal { namespace internal {
template<typename MatrixType, int Direction> template<typename MatrixType, int Direction>
...@@ -37,36 +23,43 @@ struct traits<Reverse<MatrixType, Direction> > ...@@ -37,36 +23,43 @@ struct traits<Reverse<MatrixType, Direction> >
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename traits<MatrixType>::StorageKind StorageKind; typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind; typedef typename traits<MatrixType>::XprKind XprKind;
typedef typename nested<MatrixType>::type MatrixTypeNested; typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
enum { enum {
RowsAtCompileTime = MatrixType::RowsAtCompileTime, RowsAtCompileTime = MatrixType::RowsAtCompileTime,
ColsAtCompileTime = MatrixType::ColsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
// let's enable LinearAccess only with vectorization because of the product overhead
LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
? LinearAccessBit : 0,
Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
}; };
}; };
template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond template<typename PacketType, bool ReversePacket> struct reverse_packet_cond
{ {
static inline PacketScalar run(const PacketScalar& x) { return preverse(x); } static inline PacketType run(const PacketType& x) { return preverse(x); }
}; };
template<typename PacketScalar> struct reverse_packet_cond<PacketScalar,false> template<typename PacketType> struct reverse_packet_cond<PacketType,false>
{ {
static inline PacketScalar run(const PacketScalar& x) { return x; } static inline PacketType run(const PacketType& x) { return x; }
}; };
} // end namespace internal } // end namespace internal
/** \class Reverse
* \ingroup Core_Module
*
* \brief Expression of the reverse of a vector or matrix
*
* \tparam MatrixType the type of the object of which we are taking the reverse
* \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections
*
* This class represents an expression of the reverse of a vector.
* It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::reverse(), VectorwiseOp::reverse()
*/
template<typename MatrixType, int Direction> class Reverse template<typename MatrixType, int Direction> class Reverse
: public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type : public internal::dense_xpr_base< Reverse<MatrixType, Direction> >::type
{ {
...@@ -74,12 +67,9 @@ template<typename MatrixType, int Direction> class Reverse ...@@ -74,12 +67,9 @@ template<typename MatrixType, int Direction> class Reverse
typedef typename internal::dense_xpr_base<Reverse>::type Base; typedef typename internal::dense_xpr_base<Reverse>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) EIGEN_DENSE_PUBLIC_INTERFACE(Reverse)
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
using Base::IsRowMajor; using Base::IsRowMajor;
// next line is necessary because otherwise const version of operator()
// is hidden by non-const version defined in this file
using Base::operator();
protected: protected:
enum { enum {
PacketSize = internal::packet_traits<Scalar>::size, PacketSize = internal::packet_traits<Scalar>::size,
...@@ -95,82 +85,19 @@ template<typename MatrixType, int Direction> class Reverse ...@@ -95,82 +85,19 @@ template<typename MatrixType, int Direction> class Reverse
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet; typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
public: public:
inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse)
inline Index rows() const { return m_matrix.rows(); } EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); } EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
inline Index innerStride() const EIGEN_DEVICE_FUNC inline Index innerStride() const
{ {
return -m_matrix.innerStride(); return -m_matrix.innerStride();
} }
inline Scalar& operator()(Index row, Index col) EIGEN_DEVICE_FUNC const typename internal::remove_all<typename MatrixType::Nested>::type&
{
eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
return coeffRef(row, col);
}
inline Scalar& coeffRef(Index row, Index col)
{
return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
inline CoeffReturnType coeff(Index row, Index col) const
{
return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row,
ReverseCol ? m_matrix.cols() - col - 1 : col);
}
inline CoeffReturnType coeff(Index index) const
{
return m_matrix.coeff(m_matrix.size() - index - 1);
}
inline Scalar& coeffRef(Index index)
{
return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1);
}
inline Scalar& operator()(Index index)
{
eigen_assert(index >= 0 && index < m_matrix.size());
return coeffRef(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index row, Index col) const
{
return reverse_packet::run(m_matrix.template packet<LoadMode>(
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
ReverseCol ? m_matrix.cols() - col - OffsetCol : col));
}
template<int LoadMode>
inline void writePacket(Index row, Index col, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<LoadMode>(
ReverseRow ? m_matrix.rows() - row - OffsetRow : row,
ReverseCol ? m_matrix.cols() - col - OffsetCol : col,
reverse_packet::run(x));
}
template<int LoadMode>
inline const PacketScalar packet(Index index) const
{
return internal::preverse(m_matrix.template packet<LoadMode>( m_matrix.size() - index - PacketSize ));
}
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& x)
{
m_matrix.const_cast_derived().template writePacket<LoadMode>(m_matrix.size() - index - PacketSize, internal::preverse(x));
}
const typename internal::remove_all<typename MatrixType::Nested>::type&
nestedExpression() const nestedExpression() const
{ {
return m_matrix; return m_matrix;
...@@ -190,33 +117,93 @@ template<typename Derived> ...@@ -190,33 +117,93 @@ template<typename Derived>
inline typename DenseBase<Derived>::ReverseReturnType inline typename DenseBase<Derived>::ReverseReturnType
DenseBase<Derived>::reverse() DenseBase<Derived>::reverse()
{ {
return derived(); return ReverseReturnType(derived());
} }
/** This is the const version of reverse(). */
template<typename Derived> //reverse const overload moved DenseBase.h due to a CUDA compiler bug
inline const typename DenseBase<Derived>::ConstReverseReturnType
DenseBase<Derived>::reverse() const
{
return derived();
}
/** This is the "in place" version of reverse: it reverses \c *this. /** This is the "in place" version of reverse: it reverses \c *this.
* *
* In most cases it is probably better to simply use the reversed expression * In most cases it is probably better to simply use the reversed expression
* of a matrix. However, when reversing the matrix data itself is really needed, * of a matrix. However, when reversing the matrix data itself is really needed,
* then this "in-place" version is probably the right choice because it provides * then this "in-place" version is probably the right choice because it provides
* the following additional features: * the following additional benefits:
* - less error prone: doing the same operation with .reverse() requires special care: * - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode * \code m = m.reverse().eval(); \endcode
* - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap) * - this API enables reverse operations without the need for a temporary
* - it allows future optimizations (cache friendliness, etc.) * - it allows future optimizations (cache friendliness, etc.)
* *
* \sa reverse() */ * \sa VectorwiseOp::reverseInPlace(), reverse() */
template<typename Derived> template<typename Derived>
inline void DenseBase<Derived>::reverseInPlace() inline void DenseBase<Derived>::reverseInPlace()
{ {
derived() = derived().reverse().eval(); if(cols()>rows())
{
Index half = cols()/2;
leftCols(half).swap(rightCols(half).reverse());
if((cols()%2)==1)
{
Index half2 = rows()/2;
col(half).head(half2).swap(col(half).tail(half2).reverse());
}
}
else
{
Index half = rows()/2;
topRows(half).swap(bottomRows(half).reverse());
if((rows()%2)==1)
{
Index half2 = cols()/2;
row(half).head(half2).swap(row(half).tail(half2).reverse());
}
}
}
namespace internal {
template<int Direction>
struct vectorwise_reverse_inplace_impl;
template<>
struct vectorwise_reverse_inplace_impl<Vertical>
{
template<typename ExpressionType>
static void run(ExpressionType &xpr)
{
Index half = xpr.rows()/2;
xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse());
}
};
template<>
struct vectorwise_reverse_inplace_impl<Horizontal>
{
template<typename ExpressionType>
static void run(ExpressionType &xpr)
{
Index half = xpr.cols()/2;
xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse());
}
};
} // end namespace internal
/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this.
*
* In most cases it is probably better to simply use the reversed expression
* of a matrix. However, when reversing the matrix data itself is really needed,
* then this "in-place" version is probably the right choice because it provides
* the following additional benefits:
* - less error prone: doing the same operation with .reverse() requires special care:
* \code m = m.reverse().eval(); \endcode
* - this API enables reverse operations without the need for a temporary
*
* \sa DenseBase::reverseInPlace(), reverse() */
template<typename ExpressionType, int Direction>
void VectorwiseOp<ExpressionType,Direction>::reverseInPlace()
{
internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived());
} }
} // end namespace Eigen } // end namespace Eigen
......
...@@ -43,23 +43,21 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> > ...@@ -43,23 +43,21 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit
CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
+ EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
}; };
}; };
} }
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
class Select : internal::no_assignment_operator, class Select : public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type,
public internal::dense_xpr_base< Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >::type internal::no_assignment_operator
{ {
public: public:
typedef typename internal::dense_xpr_base<Select>::type Base; typedef typename internal::dense_xpr_base<Select>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Select) EIGEN_DENSE_PUBLIC_INTERFACE(Select)
inline EIGEN_DEVICE_FUNC
Select(const ConditionMatrixType& a_conditionMatrix, Select(const ConditionMatrixType& a_conditionMatrix,
const ThenMatrixType& a_thenMatrix, const ThenMatrixType& a_thenMatrix,
const ElseMatrixType& a_elseMatrix) const ElseMatrixType& a_elseMatrix)
...@@ -69,9 +67,10 @@ class Select : internal::no_assignment_operator, ...@@ -69,9 +67,10 @@ class Select : internal::no_assignment_operator,
eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols()); eigen_assert(m_condition.cols() == m_then.cols() && m_condition.cols() == m_else.cols());
} }
Index rows() const { return m_condition.rows(); } inline EIGEN_DEVICE_FUNC Index rows() const { return m_condition.rows(); }
Index cols() const { return m_condition.cols(); } inline EIGEN_DEVICE_FUNC Index cols() const { return m_condition.cols(); }
inline EIGEN_DEVICE_FUNC
const Scalar coeff(Index i, Index j) const const Scalar coeff(Index i, Index j) const
{ {
if (m_condition.coeff(i,j)) if (m_condition.coeff(i,j))
...@@ -80,6 +79,7 @@ class Select : internal::no_assignment_operator, ...@@ -80,6 +79,7 @@ class Select : internal::no_assignment_operator,
return m_else.coeff(i,j); return m_else.coeff(i,j);
} }
inline EIGEN_DEVICE_FUNC
const Scalar coeff(Index i) const const Scalar coeff(Index i) const
{ {
if (m_condition.coeff(i)) if (m_condition.coeff(i))
...@@ -88,17 +88,17 @@ class Select : internal::no_assignment_operator, ...@@ -88,17 +88,17 @@ class Select : internal::no_assignment_operator,
return m_else.coeff(i); return m_else.coeff(i);
} }
const ConditionMatrixType& conditionMatrix() const inline EIGEN_DEVICE_FUNC const ConditionMatrixType& conditionMatrix() const
{ {
return m_condition; return m_condition;
} }
const ThenMatrixType& thenMatrix() const inline EIGEN_DEVICE_FUNC const ThenMatrixType& thenMatrix() const
{ {
return m_then; return m_then;
} }
const ElseMatrixType& elseMatrix() const inline EIGEN_DEVICE_FUNC const ElseMatrixType& elseMatrix() const
{ {
return m_else; return m_else;
} }
......
...@@ -32,54 +32,60 @@ namespace internal { ...@@ -32,54 +32,60 @@ namespace internal {
template<typename MatrixType, unsigned int UpLo> template<typename MatrixType, unsigned int UpLo>
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType> struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
{ {
typedef typename nested<MatrixType>::type MatrixTypeNested; typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType; typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject DenseMatrixType; typedef typename MatrixType::PlainObject FullMatrixType;
enum { enum {
Mode = UpLo | SelfAdjoint, Mode = UpLo | SelfAdjoint,
Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits) FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
& (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)), // FIXME these flags should be preserved Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits|FlagsLvalueBit)
CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved
}; };
}; };
} }
template <typename Lhs, int LhsMode, bool LhsIsVector,
typename Rhs, int RhsMode, bool RhsIsVector>
struct SelfadjointProductMatrix;
// FIXME could also be called SelfAdjointWrapper to be consistent with DiagonalWrapper ?? template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
template<typename MatrixType, unsigned int UpLo> class SelfAdjointView : public TriangularBase<SelfAdjointView<_MatrixType, UpLo> >
: public TriangularBase<SelfAdjointView<MatrixType, UpLo> >
{ {
public: public:
typedef _MatrixType MatrixType;
typedef TriangularBase<SelfAdjointView> Base; typedef TriangularBase<SelfAdjointView> Base;
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested; typedef typename internal::traits<SelfAdjointView>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned; typedef typename internal::traits<SelfAdjointView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
typedef MatrixTypeNestedCleaned NestedExpression;
/** \brief The type of coefficients in this matrix */ /** \brief The type of coefficients in this matrix */
typedef typename internal::traits<SelfAdjointView>::Scalar Scalar; typedef typename internal::traits<SelfAdjointView>::Scalar Scalar;
typedef typename MatrixType::StorageIndex StorageIndex;
typedef typename MatrixType::Index Index; typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
enum { enum {
Mode = internal::traits<SelfAdjointView>::Mode Mode = internal::traits<SelfAdjointView>::Mode,
Flags = internal::traits<SelfAdjointView>::Flags,
TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0)
}; };
typedef typename MatrixType::PlainObject PlainObject; typedef typename MatrixType::PlainObject PlainObject;
inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) EIGEN_DEVICE_FUNC
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{} {}
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_matrix.cols(); } inline Index cols() const { return m_matrix.cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return m_matrix.outerStride(); } inline Index outerStride() const { return m_matrix.outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return m_matrix.innerStride(); } inline Index innerStride() const { return m_matrix.innerStride(); }
/** \sa MatrixBase::coeff() /** \sa MatrixBase::coeff()
* \warning the coordinates must fit into the referenced triangular part * \warning the coordinates must fit into the referenced triangular part
*/ */
EIGEN_DEVICE_FUNC
inline Scalar coeff(Index row, Index col) const inline Scalar coeff(Index row, Index col) const
{ {
Base::check_coordinates_internal(row, col); Base::check_coordinates_internal(row, col);
...@@ -89,36 +95,46 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView ...@@ -89,36 +95,46 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
/** \sa MatrixBase::coeffRef() /** \sa MatrixBase::coeffRef()
* \warning the coordinates must fit into the referenced triangular part * \warning the coordinates must fit into the referenced triangular part
*/ */
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index col) inline Scalar& coeffRef(Index row, Index col)
{ {
EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
Base::check_coordinates_internal(row, col); Base::check_coordinates_internal(row, col);
return m_matrix.const_cast_derived().coeffRef(row, col); return m_matrix.coeffRef(row, col);
} }
/** \internal */ /** \internal */
EIGEN_DEVICE_FUNC
const MatrixTypeNestedCleaned& _expression() const { return m_matrix; } const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
EIGEN_DEVICE_FUNC
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; } const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); } EIGEN_DEVICE_FUNC
MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }
/** Efficient self-adjoint matrix times vector/matrix product */ /** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived> template<typename OtherDerived>
SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime> EIGEN_DEVICE_FUNC
const Product<SelfAdjointView,OtherDerived>
operator*(const MatrixBase<OtherDerived>& rhs) const operator*(const MatrixBase<OtherDerived>& rhs) const
{ {
return SelfadjointProductMatrix return Product<SelfAdjointView,OtherDerived>(*this, rhs.derived());
<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
(m_matrix, rhs.derived());
} }
/** Efficient vector/matrix times self-adjoint matrix product */ /** Efficient vector/matrix times triangular matrix product */
template<typename OtherDerived> friend template<typename OtherDerived> friend
SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false> EIGEN_DEVICE_FUNC
const Product<OtherDerived,SelfAdjointView>
operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs) operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
{ {
return SelfadjointProductMatrix return Product<OtherDerived,SelfAdjointView>(lhs.derived(),rhs);
<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false> }
(lhs.derived(),rhs.m_matrix);
friend EIGEN_DEVICE_FUNC
const SelfAdjointView<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,MatrixType,product),UpLo>
operator*(const Scalar& s, const SelfAdjointView& mat)
{
return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
} }
/** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this: /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this:
...@@ -132,6 +148,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView ...@@ -132,6 +148,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
* \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar) * \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
*/ */
template<typename DerivedU, typename DerivedV> template<typename DerivedU, typename DerivedV>
EIGEN_DEVICE_FUNC
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1)); SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
/** Perform a symmetric rank K update of the selfadjoint matrix \c *this: /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
...@@ -145,8 +162,74 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView ...@@ -145,8 +162,74 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
* \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar) * \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
*/ */
template<typename DerivedU> template<typename DerivedU>
EIGEN_DEVICE_FUNC
SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1)); SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
/** \returns an expression of a triangular view extracted from the current selfadjoint view of a given triangular part
*
* The parameter \a TriMode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
* \c #Lower, \c #StrictlyLower, \c #UnitLower.
*
* If \c TriMode references the same triangular part than \c *this, then this method simply return a \c TriangularView of the nested expression,
* otherwise, the nested expression is first transposed, thus returning a \c TriangularView<Transpose<MatrixType>> object.
*
* \sa MatrixBase::triangularView(), class TriangularView
*/
template<unsigned int TriMode>
EIGEN_DEVICE_FUNC
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
TriangularView<MatrixType,TriMode>,
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type
triangularView() const
{
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::ConstTransposeReturnType>::type tmp1(m_matrix);
typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)), MatrixType&, typename MatrixType::AdjointReturnType>::type tmp2(tmp1);
return typename internal::conditional<(TriMode&(Upper|Lower))==(UpLo&(Upper|Lower)),
TriangularView<MatrixType,TriMode>,
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
}
typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */
EIGEN_DEVICE_FUNC
inline const ConjugateReturnType conjugate() const
{ return ConjugateReturnType(m_matrix.conjugate()); }
typedef SelfAdjointView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
/** \sa MatrixBase::adjoint() const */
EIGEN_DEVICE_FUNC
inline const AdjointReturnType adjoint() const
{ return AdjointReturnType(m_matrix.adjoint()); }
typedef SelfAdjointView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
/** \sa MatrixBase::transpose() */
EIGEN_DEVICE_FUNC
inline TransposeReturnType transpose()
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
typename MatrixType::TransposeReturnType tmp(m_matrix);
return TransposeReturnType(tmp);
}
typedef SelfAdjointView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
/** \sa MatrixBase::transpose() const */
EIGEN_DEVICE_FUNC
inline const ConstTransposeReturnType transpose() const
{
return ConstTransposeReturnType(m_matrix.transpose());
}
/** \returns a const expression of the main diagonal of the matrix \c *this
*
* This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator.
*
* \sa MatrixBase::diagonal(), class Diagonal */
EIGEN_DEVICE_FUNC
typename MatrixType::ConstDiagonalReturnType diagonal() const
{
return typename MatrixType::ConstDiagonalReturnType(m_matrix);
}
/////////// Cholesky module /////////// /////////// Cholesky module ///////////
const LLT<PlainObject, UpLo> llt() const; const LLT<PlainObject, UpLo> llt() const;
...@@ -159,31 +242,10 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView ...@@ -159,31 +242,10 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
/** Return type of eigenvalues() */ /** Return type of eigenvalues() */
typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType; typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
EIGEN_DEVICE_FUNC
EigenvaluesReturnType eigenvalues() const; EigenvaluesReturnType eigenvalues() const;
EIGEN_DEVICE_FUNC
RealScalar operatorNorm() const; RealScalar operatorNorm() const;
#ifdef EIGEN2_SUPPORT
template<typename OtherDerived>
SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
{
enum {
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
};
m_matrix.const_cast_derived().template triangularView<UpLo>() = other;
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.adjoint();
return *this;
}
template<typename OtherMatrixType, unsigned int OtherMode>
SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
{
enum {
OtherPart = UpLo == Upper ? StrictlyLower : StrictlyUpper
};
m_matrix.const_cast_derived().template triangularView<UpLo>() = other.toDenseMatrix();
m_matrix.const_cast_derived().template triangularView<OtherPart>() = other.toDenseMatrix().adjoint();
return *this;
}
#endif
protected: protected:
MatrixTypeNested m_matrix; MatrixTypeNested m_matrix;
...@@ -201,90 +263,54 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView ...@@ -201,90 +263,54 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
namespace internal { namespace internal {
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite> // TODO currently a selfadjoint expression has the form SelfAdjointView<.,.>
struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount, ClearOpposite> // in the future selfadjoint-ness should be defined by the expression traits
{ // such that Transpose<SelfAdjointView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
enum { template<typename MatrixType, unsigned int Mode>
col = (UnrollCount-1) / Derived1::RowsAtCompileTime, struct evaluator_traits<SelfAdjointView<MatrixType,Mode> >
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
if(row == col)
dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
else if(row < col)
dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
{
static inline void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, int UnrollCount, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount, ClearOpposite>
{
enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
if(row == col)
dst.coeffRef(row, col) = numext::real(src.coeff(row, col));
else if(row > col)
dst.coeffRef(col, row) = numext::conj(dst.coeffRef(row, col) = src.coeff(row, col));
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
{ {
static inline void run(Derived1 &, const Derived2 &) {} typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef SelfAdjointShape Shape;
}; };
template<typename Derived1, typename Derived2, bool ClearOpposite> template<int UpLo, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite> class triangular_dense_assignment_kernel<UpLo,SelfAdjoint,SetOpposite,DstEvaluatorTypeT,SrcEvaluatorTypeT,Functor,Version>
: public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
{ {
typedef typename Derived1::Index Index; protected:
static inline void run(Derived1 &dst, const Derived2 &src) typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
typedef typename Base::DstXprType DstXprType;
typedef typename Base::SrcXprType SrcXprType;
using Base::m_dst;
using Base::m_src;
using Base::m_functor;
public:
typedef typename Base::DstEvaluatorType DstEvaluatorType;
typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
typedef typename Base::Scalar Scalar;
typedef typename Base::AssignmentTraits AssignmentTraits;
EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
: Base(dst, src, func, dstExpr)
{}
EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
{ {
for(Index j = 0; j < dst.cols(); ++j) eigen_internal_assert(row!=col);
{ Scalar tmp = m_src.coeff(row,col);
for(Index i = 0; i < j; ++i) m_functor.assignCoeff(m_dst.coeffRef(row,col), tmp);
{ m_functor.assignCoeff(m_dst.coeffRef(col,row), numext::conj(tmp));
dst.copyCoeff(i, j, src);
dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
}
dst.copyCoeff(j, j, src);
}
} }
};
EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
{
static inline void run(Derived1 &dst, const Derived2 &src)
{ {
typedef typename Derived1::Index Index; Base::assignCoeff(id,id);
for(Index i = 0; i < dst.rows(); ++i)
{
for(Index j = 0; j < i; ++j)
{
dst.copyCoeff(i, j, src);
dst.coeffRef(j,i) = numext::conj(dst.coeff(i,j));
}
dst.copyCoeff(i, i, src);
}
} }
EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index)
{ eigen_internal_assert(false && "should never be called"); }
}; };
} // end namespace internal } // end namespace internal
...@@ -293,20 +319,30 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dyn ...@@ -293,20 +319,30 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dyn
* Implementation of MatrixBase methods * Implementation of MatrixBase methods
***************************************************************************/ ***************************************************************************/
/** This is the const version of MatrixBase::selfadjointView() */
template<typename Derived> template<typename Derived>
template<unsigned int UpLo> template<unsigned int UpLo>
typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView() const MatrixBase<Derived>::selfadjointView() const
{ {
return derived(); return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived());
} }
/** \returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the current matrix
*
* The parameter \a UpLo can be either \c #Upper or \c #Lower
*
* Example: \include MatrixBase_selfadjointView.cpp
* Output: \verbinclude MatrixBase_selfadjointView.out
*
* \sa class SelfAdjointView
*/
template<typename Derived> template<typename Derived>
template<unsigned int UpLo> template<unsigned int UpLo>
typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type
MatrixBase<Derived>::selfadjointView() MatrixBase<Derived>::selfadjointView()
{ {
return derived(); return typename SelfAdjointViewReturnType<UpLo>::Type(derived());
} }
} // end namespace Eigen } // end namespace Eigen
......
...@@ -12,177 +12,37 @@ ...@@ -12,177 +12,37 @@
namespace Eigen { namespace Eigen {
/** \class SelfCwiseBinaryOp // TODO generalize the scalar type of 'other'
* \ingroup Core_Module
*
* \internal
*
* \brief Internal helper class for optimizing operators like +=, -=
*
* This is a pseudo expression class re-implementing the copyCoeff/copyPacket
* method to directly performs a +=/-= operations in an optimal way. In particular,
* this allows to make sure that the input/output data are loaded only once using
* aligned packet loads.
*
* \sa class SwapWrapper for a similar trick.
*/
namespace internal { template<typename Derived>
template<typename BinaryOp, typename Lhs, typename Rhs> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
struct traits<SelfCwiseBinaryOp<BinaryOp,Lhs,Rhs> >
: traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >
{ {
enum { typedef typename Derived::PlainObject PlainObject;
// Note that it is still a good idea to preserve the DirectAccessBit internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
// so that assign can correctly align the data. return derived();
Flags = traits<CwiseBinaryOp<BinaryOp,Lhs,Rhs> >::Flags | (Lhs::Flags&DirectAccessBit) | (Lhs::Flags&LvalueBit),
OuterStrideAtCompileTime = Lhs::OuterStrideAtCompileTime,
InnerStrideAtCompileTime = Lhs::InnerStrideAtCompileTime
};
};
} }
template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp template<typename Derived>
: public internal::dense_xpr_base< SelfCwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{ {
public: typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
typedef typename internal::dense_xpr_base<SelfCwiseBinaryOp>::type Base; return derived();
EIGEN_DENSE_PUBLIC_INTERFACE(SelfCwiseBinaryOp) }
typedef typename internal::packet_traits<Scalar>::type Packet;
inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
inline Index rows() const { return m_matrix.rows(); }
inline Index cols() const { return m_matrix.cols(); }
inline Index outerStride() const { return m_matrix.outerStride(); }
inline Index innerStride() const { return m_matrix.innerStride(); }
inline const Scalar* data() const { return m_matrix.data(); }
// note that this function is needed by assign to correctly align loads/stores
// TODO make Assign use .data()
inline Scalar& coeffRef(Index row, Index col)
{
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
return m_matrix.const_cast_derived().coeffRef(row, col);
}
inline const Scalar& coeffRef(Index row, Index col) const
{
return m_matrix.coeffRef(row, col);
}
// note that this function is needed by assign to correctly align loads/stores
// TODO make Assign use .data()
inline Scalar& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(Lhs)
return m_matrix.const_cast_derived().coeffRef(index);
}
inline const Scalar& coeffRef(Index index) const
{
return m_matrix.const_cast_derived().coeffRef(index);
}
template<typename OtherDerived>
void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
Scalar& tmp = m_matrix.coeffRef(row,col);
tmp = m_functor(tmp, _other.coeff(row,col));
}
template<typename OtherDerived>
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
eigen_internal_assert(index >= 0 && index < m_matrix.size());
Scalar& tmp = m_matrix.coeffRef(index);
tmp = m_functor(tmp, _other.coeff(index));
}
template<typename OtherDerived, int StoreMode, int LoadMode>
void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
eigen_internal_assert(row >= 0 && row < rows()
&& col >= 0 && col < cols());
m_matrix.template writePacket<StoreMode>(row, col,
m_functor.packetOp(m_matrix.template packet<StoreMode>(row, col),_other.template packet<LoadMode>(row, col)) );
}
template<typename OtherDerived, int StoreMode, int LoadMode>
void copyPacket(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
eigen_internal_assert(index >= 0 && index < m_matrix.size());
m_matrix.template writePacket<StoreMode>(index,
m_functor.packetOp(m_matrix.template packet<StoreMode>(index),_other.template packet<LoadMode>(index)) );
}
// reimplement lazyAssign to handle complex *= real
// see CwiseBinaryOp ctor for details
template<typename RhsDerived>
EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
{
EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp,typename Lhs::Scalar,typename RhsDerived::Scalar);
#ifdef EIGEN_DEBUG_ASSIGN
internal::assign_traits<SelfCwiseBinaryOp, RhsDerived>::debug();
#endif
eigen_assert(rows() == rhs.rows() && cols() == rhs.cols());
internal::assign_impl<SelfCwiseBinaryOp, RhsDerived>::run(*this,rhs.derived());
#ifndef EIGEN_NO_DEBUG
this->checkTransposeAliasing(rhs.derived());
#endif
return *this;
}
// overloaded to honor evaluation of special matrices
// maybe another solution would be to not use SelfCwiseBinaryOp
// at first...
SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
{
typename internal::nested<Rhs>::type rhs(_rhs);
return Base::operator=(rhs);
}
Lhs& expression() const
{
return m_matrix;
}
const BinaryOp& functor() const
{
return m_functor;
}
protected:
Lhs& m_matrix;
const BinaryOp& m_functor;
private:
SelfCwiseBinaryOp& operator=(const SelfCwiseBinaryOp&);
};
template<typename Derived> template<typename Derived>
inline Derived& DenseBase<Derived>::operator*=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
tmp = PlainObject::Constant(rows(),cols(),other);
return derived(); return derived();
} }
template<typename Derived> template<typename Derived>
inline Derived& DenseBase<Derived>::operator/=(const Scalar& other) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{ {
typedef typename Derived::PlainObject PlainObject; typedef typename Derived::PlainObject PlainObject;
SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, Derived, typename PlainObject::ConstantReturnType> tmp(derived()); internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
tmp = PlainObject::Constant(rows(),cols(), other);
return derived(); return derived();
} }
......
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_SOLVE_H
#define EIGEN_SOLVE_H
namespace Eigen {
template<typename Decomposition, typename RhsType, typename StorageKind> class SolveImpl;
/** \class Solve
* \ingroup Core_Module
*
* \brief Pseudo expression representing a solving operation
*
* \tparam Decomposition the type of the matrix or decomposion object
* \tparam Rhstype the type of the right-hand side
*
* This class represents an expression of A.solve(B)
* and most of the time this is the only way it is used.
*
*/
namespace internal {
// this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse)
template<typename Decomposition, typename RhsType,typename StorageKind> struct solve_traits;
template<typename Decomposition, typename RhsType>
struct solve_traits<Decomposition,RhsType,Dense>
{
typedef typename make_proper_matrix_type<typename RhsType::Scalar,
Decomposition::ColsAtCompileTime,
RhsType::ColsAtCompileTime,
RhsType::PlainObject::Options,
Decomposition::MaxColsAtCompileTime,
RhsType::MaxColsAtCompileTime>::type PlainObject;
};
template<typename Decomposition, typename RhsType>
struct traits<Solve<Decomposition, RhsType> >
: traits<typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject>
{
typedef typename solve_traits<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>::PlainObject PlainObject;
typedef typename promote_index_type<typename Decomposition::StorageIndex, typename RhsType::StorageIndex>::type StorageIndex;
typedef traits<PlainObject> BaseTraits;
enum {
Flags = BaseTraits::Flags & RowMajorBit,
CoeffReadCost = HugeCost
};
};
}
template<typename Decomposition, typename RhsType>
class Solve : public SolveImpl<Decomposition,RhsType,typename internal::traits<RhsType>::StorageKind>
{
public:
typedef typename internal::traits<Solve>::PlainObject PlainObject;
typedef typename internal::traits<Solve>::StorageIndex StorageIndex;
Solve(const Decomposition &dec, const RhsType &rhs)
: m_dec(dec), m_rhs(rhs)
{}
EIGEN_DEVICE_FUNC Index rows() const { return m_dec.cols(); }
EIGEN_DEVICE_FUNC Index cols() const { return m_rhs.cols(); }
EIGEN_DEVICE_FUNC const Decomposition& dec() const { return m_dec; }
EIGEN_DEVICE_FUNC const RhsType& rhs() const { return m_rhs; }
protected:
const Decomposition &m_dec;
const RhsType &m_rhs;
};
// Specialization of the Solve expression for dense results
template<typename Decomposition, typename RhsType>
class SolveImpl<Decomposition,RhsType,Dense>
: public MatrixBase<Solve<Decomposition,RhsType> >
{
typedef Solve<Decomposition,RhsType> Derived;
public:
typedef MatrixBase<Solve<Decomposition,RhsType> > Base;
EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
private:
Scalar coeff(Index row, Index col) const;
Scalar coeff(Index i) const;
};
// Generic API dispatcher
template<typename Decomposition, typename RhsType, typename StorageKind>
class SolveImpl : public internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type
{
public:
typedef typename internal::generic_xpr_base<Solve<Decomposition,RhsType>, MatrixXpr, StorageKind>::type Base;
};
namespace internal {
// Evaluator of Solve -> eval into a temporary
template<typename Decomposition, typename RhsType>
struct evaluator<Solve<Decomposition,RhsType> >
: public evaluator<typename Solve<Decomposition,RhsType>::PlainObject>
{
typedef Solve<Decomposition,RhsType> SolveType;
typedef typename SolveType::PlainObject PlainObject;
typedef evaluator<PlainObject> Base;
enum { Flags = Base::Flags | EvalBeforeNestingBit };
EIGEN_DEVICE_FUNC explicit evaluator(const SolveType& solve)
: m_result(solve.rows(), solve.cols())
{
::new (static_cast<Base*>(this)) Base(m_result);
solve.dec()._solve_impl(solve.rhs(), m_result);
}
protected:
PlainObject m_result;
};
// Specialization for "dst = dec.solve(rhs)"
// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
{
typedef Solve<DecType,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
src.dec()._solve_impl(src.rhs(), dst);
}
};
// Specialization for "dst = dec.transpose().solve(rhs)"
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense>
{
typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
}
};
// Specialization for "dst = dec.adjoint().solve(rhs)"
template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>,
internal::assign_op<Scalar,Scalar>, Dense2Dense>
{
typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
}
};
} // end namepsace internal
} // end namespace Eigen
#endif // EIGEN_SOLVE_H
...@@ -68,7 +68,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1> ...@@ -68,7 +68,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,1>
if(!useRhsDirectly) if(!useRhsDirectly)
MappedRhs(actualRhs,rhs.size()) = rhs; MappedRhs(actualRhs,rhs.size()) = rhs;
triangular_solve_vector<LhsScalar, RhsScalar, typename Lhs::Index, Side, Mode, LhsProductTraits::NeedToConjugate, triangular_solve_vector<LhsScalar, RhsScalar, Index, Side, Mode, LhsProductTraits::NeedToConjugate,
(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor> (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor>
::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs); ::run(actualLhs.cols(), actualLhs.data(), actualLhs.outerStride(), actualRhs);
...@@ -82,7 +82,6 @@ template<typename Lhs, typename Rhs, int Side, int Mode> ...@@ -82,7 +82,6 @@ template<typename Lhs, typename Rhs, int Side, int Mode>
struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic> struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
{ {
typedef typename Rhs::Scalar Scalar; typedef typename Rhs::Scalar Scalar;
typedef typename Rhs::Index Index;
typedef blas_traits<Lhs> LhsProductTraits; typedef blas_traits<Lhs> LhsProductTraits;
typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType; typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType;
...@@ -96,7 +95,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic> ...@@ -96,7 +95,7 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, typedef internal::gemm_blocking_space<(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType; Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxRowsAtCompileTime,4> BlockingType;
BlockingType blocking(rhs.rows(), rhs.cols(), size); BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor, triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor> (Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
...@@ -108,32 +107,32 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic> ...@@ -108,32 +107,32 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
* meta-unrolling implementation * meta-unrolling implementation
***************************************************************************/ ***************************************************************************/
template<typename Lhs, typename Rhs, int Mode, int Index, int Size, template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size,
bool Stop = Index==Size> bool Stop = LoopIndex==Size>
struct triangular_solver_unroller; struct triangular_solver_unroller;
template<typename Lhs, typename Rhs, int Mode, int Index, int Size> template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,false> { struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,false> {
enum { enum {
IsLower = ((Mode&Lower)==Lower), IsLower = ((Mode&Lower)==Lower),
RowIndex = IsLower ? Index : Size - Index - 1, DiagIndex = IsLower ? LoopIndex : Size - LoopIndex - 1,
S = IsLower ? 0 : RowIndex+1 StartIndex = IsLower ? 0 : DiagIndex+1
}; };
static void run(const Lhs& lhs, Rhs& rhs) static void run(const Lhs& lhs, Rhs& rhs)
{ {
if (Index>0) if (LoopIndex>0)
rhs.coeffRef(RowIndex) -= lhs.row(RowIndex).template segment<Index>(S).transpose() rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex).template segment<LoopIndex>(StartIndex).transpose()
.cwiseProduct(rhs.template segment<Index>(S)).sum(); .cwiseProduct(rhs.template segment<LoopIndex>(StartIndex)).sum();
if(!(Mode & UnitDiag)) if(!(Mode & UnitDiag))
rhs.coeffRef(RowIndex) /= lhs.coeff(RowIndex,RowIndex); rhs.coeffRef(DiagIndex) /= lhs.coeff(DiagIndex,DiagIndex);
triangular_solver_unroller<Lhs,Rhs,Mode,Index+1,Size>::run(lhs,rhs); triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex+1,Size>::run(lhs,rhs);
} }
}; };
template<typename Lhs, typename Rhs, int Mode, int Index, int Size> template<typename Lhs, typename Rhs, int Mode, int LoopIndex, int Size>
struct triangular_solver_unroller<Lhs,Rhs,Mode,Index,Size,true> { struct triangular_solver_unroller<Lhs,Rhs,Mode,LoopIndex,Size,true> {
static void run(const Lhs&, Rhs&) {} static void run(const Lhs&, Rhs&) {}
}; };
...@@ -162,61 +161,35 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> { ...@@ -162,61 +161,35 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> {
* TriangularView methods * TriangularView methods
***************************************************************************/ ***************************************************************************/
/** "in-place" version of TriangularView::solve() where the result is written in \a other #ifndef EIGEN_PARSED_BY_DOXYGEN
*
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
* This function will const_cast it, so constness isn't honored here.
*
* See TriangularView:solve() for the details.
*/
template<typename MatrixType, unsigned int Mode> template<typename MatrixType, unsigned int Mode>
template<int Side, typename OtherDerived> template<int Side, typename OtherDerived>
void TriangularView<MatrixType,Mode>::solveInPlace(const MatrixBase<OtherDerived>& _other) const void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const
{ {
OtherDerived& other = _other.const_cast_derived(); OtherDerived& other = _other.const_cast_derived();
eigen_assert( cols() == rows() && ((Side==OnTheLeft && cols() == other.rows()) || (Side==OnTheRight && cols() == other.cols())) ); eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) );
eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower))); eigen_assert((!(Mode & ZeroDiag)) && bool(Mode & (Upper|Lower)));
enum { copy = internal::traits<OtherDerived>::Flags & RowMajorBit && OtherDerived::IsVectorAtCompileTime }; enum { copy = (internal::traits<OtherDerived>::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && OtherDerived::SizeAtCompileTime!=1};
typedef typename internal::conditional<copy, typedef typename internal::conditional<copy,
typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy; typename internal::plain_matrix_type_column_major<OtherDerived>::type, OtherDerived&>::type OtherCopy;
OtherCopy otherCopy(other); OtherCopy otherCopy(other);
internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type, internal::triangular_solver_selector<MatrixType, typename internal::remove_reference<OtherCopy>::type,
Side, Mode>::run(nestedExpression(), otherCopy); Side, Mode>::run(derived().nestedExpression(), otherCopy);
if (copy) if (copy)
other = otherCopy; other = otherCopy;
} }
/** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
*
* This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
* \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if
* \a Side==OnTheRight.
*
* The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
* diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
* is an upper (resp. lower) triangular matrix.
*
* Example: \include MatrixBase_marked.cpp
* Output: \verbinclude MatrixBase_marked.out
*
* This function returns an expression of the inverse-multiply and can works in-place if it is assigned
* to the same matrix or vector \a other.
*
* For users coming from BLAS, this function (and more specifically solveInPlace()) offer
* all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
*
* \sa TriangularView::solveInPlace()
*/
template<typename Derived, unsigned int Mode> template<typename Derived, unsigned int Mode>
template<int Side, typename Other> template<int Side, typename Other>
const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other> const internal::triangular_solve_retval<Side,TriangularView<Derived,Mode>,Other>
TriangularView<Derived,Mode>::solve(const MatrixBase<Other>& other) const TriangularViewImpl<Derived,Mode,Dense>::solve(const MatrixBase<Other>& other) const
{ {
return internal::triangular_solve_retval<Side,TriangularView,Other>(*this, other.derived()); return internal::triangular_solve_retval<Side,TriangularViewType,Other>(derived(), other.derived());
} }
#endif
namespace internal { namespace internal {
...@@ -232,7 +205,6 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv ...@@ -232,7 +205,6 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
{ {
typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned; typedef typename remove_all<typename Rhs::Nested>::type RhsNestedCleaned;
typedef ReturnByValue<triangular_solve_retval> Base; typedef ReturnByValue<triangular_solve_retval> Base;
typedef typename Base::Index Index;
triangular_solve_retval(const TriangularType& tri, const Rhs& rhs) triangular_solve_retval(const TriangularType& tri, const Rhs& rhs)
: m_triangularMatrix(tri), m_rhs(rhs) : m_triangularMatrix(tri), m_rhs(rhs)
...@@ -243,7 +215,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv ...@@ -243,7 +215,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
template<typename Dest> inline void evalTo(Dest& dst) const template<typename Dest> inline void evalTo(Dest& dst) const
{ {
if(!(is_same<RhsNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_rhs))) if(!is_same_dense(dst,m_rhs))
dst = m_rhs; dst = m_rhs;
m_triangularMatrix.template solveInPlace<Side>(dst); m_triangularMatrix.template solveInPlace<Side>(dst);
} }
......
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_SOLVERBASE_H
#define EIGEN_SOLVERBASE_H
namespace Eigen {
namespace internal {
} // end namespace internal
/** \class SolverBase
* \brief A base class for matrix decomposition and solvers
*
* \tparam Derived the actual type of the decomposition/solver.
*
* Any matrix decomposition inheriting this base class provide the following API:
*
* \code
* MatrixType A, b, x;
* DecompositionType dec(A);
* x = dec.solve(b); // solve A * x = b
* x = dec.transpose().solve(b); // solve A^T * x = b
* x = dec.adjoint().solve(b); // solve A' * x = b
* \endcode
*
* \warning Currently, any other usage of transpose() and adjoint() are not supported and will produce compilation errors.
*
* \sa class PartialPivLU, class FullPivLU
*/
template<typename Derived>
class SolverBase : public EigenBase<Derived>
{
public:
typedef EigenBase<Derived> Base;
typedef typename internal::traits<Derived>::Scalar Scalar;
typedef Scalar CoeffReturnType;
enum {
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime>::ret),
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime>::ret),
IsVectorAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime == 1
|| internal::traits<Derived>::MaxColsAtCompileTime == 1
};
/** Default constructor */
SolverBase()
{}
~SolverBase()
{}
using Base::derived;
/** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A.
*/
template<typename Rhs>
inline const Solve<Derived, Rhs>
solve(const MatrixBase<Rhs>& b) const
{
eigen_assert(derived().rows()==b.rows() && "solve(): invalid number of rows of the right hand side matrix b");
return Solve<Derived, Rhs>(derived(), b.derived());
}
/** \internal the return type of transpose() */
typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
/** \returns an expression of the transposed of the factored matrix.
*
* A typical usage is to solve for the transposed problem A^T x = b:
* \code x = dec.transpose().solve(b); \endcode
*
* \sa adjoint(), solve()
*/
inline ConstTransposeReturnType transpose() const
{
return ConstTransposeReturnType(derived());
}
/** \internal the return type of adjoint() */
typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, ConstTransposeReturnType>,
ConstTransposeReturnType
>::type AdjointReturnType;
/** \returns an expression of the adjoint of the factored matrix
*
* A typical usage is to solve for the adjoint problem A' x = b:
* \code x = dec.adjoint().solve(b); \endcode
*
* For real scalar types, this function is equivalent to transpose().
*
* \sa transpose(), solve()
*/
inline AdjointReturnType adjoint() const
{
return AdjointReturnType(derived().transpose());
}
protected:
};
namespace internal {
template<typename Derived>
struct generic_xpr_base<Derived, MatrixXpr, SolverStorage>
{
typedef SolverBase<Derived> type;
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_SOLVERBASE_H
...@@ -17,10 +17,9 @@ namespace internal { ...@@ -17,10 +17,9 @@ namespace internal {
template<typename ExpressionType, typename Scalar> template<typename ExpressionType, typename Scalar>
inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale) inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
{ {
using std::max;
Scalar maxCoeff = bl.cwiseAbs().maxCoeff(); Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
if (maxCoeff>scale) if(maxCoeff>scale)
{ {
ssq = ssq * numext::abs2(scale/maxCoeff); ssq = ssq * numext::abs2(scale/maxCoeff);
Scalar tmp = Scalar(1)/maxCoeff; Scalar tmp = Scalar(1)/maxCoeff;
...@@ -29,12 +28,21 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc ...@@ -29,12 +28,21 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc
invScale = NumTraits<Scalar>::highest(); invScale = NumTraits<Scalar>::highest();
scale = Scalar(1)/invScale; scale = Scalar(1)/invScale;
} }
else if(maxCoeff>NumTraits<Scalar>::highest()) // we got a INF
{
invScale = Scalar(1);
scale = maxCoeff;
}
else else
{ {
scale = maxCoeff; scale = maxCoeff;
invScale = tmp; invScale = tmp;
} }
} }
else if(maxCoeff!=maxCoeff) // we got a NaN
{
scale = maxCoeff;
}
// TODO if the maxCoeff is much much smaller than the current scale, // TODO if the maxCoeff is much much smaller than the current scale,
// then we can neglect this sub vector // then we can neglect this sub vector
...@@ -47,15 +55,12 @@ inline typename NumTraits<typename traits<Derived>::Scalar>::Real ...@@ -47,15 +55,12 @@ inline typename NumTraits<typename traits<Derived>::Scalar>::Real
blueNorm_impl(const EigenBase<Derived>& _vec) blueNorm_impl(const EigenBase<Derived>& _vec)
{ {
typedef typename Derived::RealScalar RealScalar; typedef typename Derived::RealScalar RealScalar;
typedef typename Derived::Index Index;
using std::pow; using std::pow;
using std::min;
using std::max;
using std::sqrt; using std::sqrt;
using std::abs; using std::abs;
const Derived& vec(_vec.derived()); const Derived& vec(_vec.derived());
static bool initialized = false; static bool initialized = false;
static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr; static RealScalar b1, b2, s1m, s2m, rbig, relerr;
if(!initialized) if(!initialized)
{ {
int ibeta, it, iemin, iemax, iexp; int ibeta, it, iemin, iemax, iexp;
...@@ -84,7 +89,6 @@ blueNorm_impl(const EigenBase<Derived>& _vec) ...@@ -84,7 +89,6 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
iexp = - ((iemax+it)/2); iexp = - ((iemax+it)/2);
s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range s2m = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp))); // scaling factor for upper range
overfl = rbig*s2m; // overflow boundary for abig
eps = RealScalar(pow(double(ibeta), 1-it)); eps = RealScalar(pow(double(ibeta), 1-it));
relerr = sqrt(eps); // tolerance for neglecting asml relerr = sqrt(eps); // tolerance for neglecting asml
initialized = true; initialized = true;
...@@ -101,13 +105,13 @@ blueNorm_impl(const EigenBase<Derived>& _vec) ...@@ -101,13 +105,13 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
else if(ax < b1) asml += numext::abs2(ax*s1m); else if(ax < b1) asml += numext::abs2(ax*s1m);
else amed += numext::abs2(ax); else amed += numext::abs2(ax);
} }
if(amed!=amed)
return amed; // we got a NaN
if(abig > RealScalar(0)) if(abig > RealScalar(0))
{ {
abig = sqrt(abig); abig = sqrt(abig);
if(abig > overfl) if(abig > rbig) // overflow, or *this contains INF values
{ return abig; // return INF
return rbig;
}
if(amed > RealScalar(0)) if(amed > RealScalar(0))
{ {
abig = abig/s2m; abig = abig/s2m;
...@@ -128,8 +132,8 @@ blueNorm_impl(const EigenBase<Derived>& _vec) ...@@ -128,8 +132,8 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
} }
else else
return sqrt(amed); return sqrt(amed);
asml = (min)(abig, amed); asml = numext::mini(abig, amed);
abig = (max)(abig, amed); abig = numext::maxi(abig, amed);
if(asml <= abig*relerr) if(asml <= abig*relerr)
return abig; return abig;
else else
...@@ -152,21 +156,35 @@ template<typename Derived> ...@@ -152,21 +156,35 @@ template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
MatrixBase<Derived>::stableNorm() const MatrixBase<Derived>::stableNorm() const
{ {
using std::min;
using std::sqrt; using std::sqrt;
using std::abs;
const Index blockSize = 4096; const Index blockSize = 4096;
RealScalar scale(0); RealScalar scale(0);
RealScalar invScale(1); RealScalar invScale(1);
RealScalar ssq(0); // sum of square RealScalar ssq(0); // sum of square
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
DerivedCopy copy(derived());
enum { enum {
Alignment = (int(Flags)&DirectAccessBit) || (int(Flags)&AlignedBit) ? 1 : 0 CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
|| (int(internal::evaluator<DerivedCopyClean>::Alignment)>0) // FIXME Alignment)>0 might not be enough
) && (blockSize*sizeof(Scalar)*2<EIGEN_STACK_ALLOCATION_LIMIT)
&& (EIGEN_MAX_STATIC_ALIGN_BYTES>0) // if we cannot allocate on the stack, then let's not bother about this optimization
}; };
typedef typename internal::conditional<CanAlign, Ref<const Matrix<Scalar,Dynamic,1,0,blockSize,1>, internal::evaluator<DerivedCopyClean>::Alignment>,
typename DerivedCopyClean::ConstSegmentReturnType>::type SegmentWrapper;
Index n = size(); Index n = size();
Index bi = internal::first_aligned(derived());
if(n==1)
return abs(this->coeff(0));
Index bi = internal::first_default_aligned(copy);
if (bi>0) if (bi>0)
internal::stable_norm_kernel(this->head(bi), ssq, scale, invScale); internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
for (; bi<n; bi+=blockSize) for (; bi<n; bi+=blockSize)
internal::stable_norm_kernel(this->segment(bi,(min)(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale); internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi,numext::mini(blockSize, n - bi))), ssq, scale, invScale);
return scale * sqrt(ssq); return scale * sqrt(ssq);
} }
......
...@@ -31,8 +31,8 @@ namespace Eigen { ...@@ -31,8 +31,8 @@ namespace Eigen {
* arguments to the constructor. * arguments to the constructor.
* *
* Indeed, this class takes two template parameters: * Indeed, this class takes two template parameters:
* \param _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime. * \tparam _OuterStrideAtCompileTime the outer stride, or Dynamic if you want to specify it at runtime.
* \param _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime. * \tparam _InnerStrideAtCompileTime the inner stride, or Dynamic if you want to specify it at runtime.
* *
* Here is an example: * Here is an example:
* \include Map_general_stride.cpp * \include Map_general_stride.cpp
...@@ -44,13 +44,14 @@ template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime> ...@@ -44,13 +44,14 @@ template<int _OuterStrideAtCompileTime, int _InnerStrideAtCompileTime>
class Stride class Stride
{ {
public: public:
typedef DenseIndex Index; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
enum { enum {
InnerStrideAtCompileTime = _InnerStrideAtCompileTime, InnerStrideAtCompileTime = _InnerStrideAtCompileTime,
OuterStrideAtCompileTime = _OuterStrideAtCompileTime OuterStrideAtCompileTime = _OuterStrideAtCompileTime
}; };
/** Default constructor, for use when strides are fixed at compile time */ /** Default constructor, for use when strides are fixed at compile time */
EIGEN_DEVICE_FUNC
Stride() Stride()
: m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime) : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
{ {
...@@ -58,6 +59,7 @@ class Stride ...@@ -58,6 +59,7 @@ class Stride
} }
/** Constructor allowing to pass the strides at runtime */ /** Constructor allowing to pass the strides at runtime */
EIGEN_DEVICE_FUNC
Stride(Index outerStride, Index innerStride) Stride(Index outerStride, Index innerStride)
: m_outer(outerStride), m_inner(innerStride) : m_outer(outerStride), m_inner(innerStride)
{ {
...@@ -65,13 +67,16 @@ class Stride ...@@ -65,13 +67,16 @@ class Stride
} }
/** Copy constructor */ /** Copy constructor */
EIGEN_DEVICE_FUNC
Stride(const Stride& other) Stride(const Stride& other)
: m_outer(other.outer()), m_inner(other.inner()) : m_outer(other.outer()), m_inner(other.inner())
{} {}
/** \returns the outer stride */ /** \returns the outer stride */
EIGEN_DEVICE_FUNC
inline Index outer() const { return m_outer.value(); } inline Index outer() const { return m_outer.value(); }
/** \returns the inner stride */ /** \returns the inner stride */
EIGEN_DEVICE_FUNC
inline Index inner() const { return m_inner.value(); } inline Index inner() const { return m_inner.value(); }
protected: protected:
...@@ -81,26 +86,24 @@ class Stride ...@@ -81,26 +86,24 @@ class Stride
/** \brief Convenience specialization of Stride to specify only an inner stride /** \brief Convenience specialization of Stride to specify only an inner stride
* See class Map for some examples */ * See class Map for some examples */
template<int Value = Dynamic> template<int Value>
class InnerStride : public Stride<0, Value> class InnerStride : public Stride<0, Value>
{ {
typedef Stride<0, Value> Base; typedef Stride<0, Value> Base;
public: public:
typedef DenseIndex Index; EIGEN_DEVICE_FUNC InnerStride() : Base() {}
InnerStride() : Base() {} EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {} // FIXME making this explicit could break valid code
InnerStride(Index v) : Base(0, v) {}
}; };
/** \brief Convenience specialization of Stride to specify only an outer stride /** \brief Convenience specialization of Stride to specify only an outer stride
* See class Map for some examples */ * See class Map for some examples */
template<int Value = Dynamic> template<int Value>
class OuterStride : public Stride<Value, 0> class OuterStride : public Stride<Value, 0>
{ {
typedef Stride<Value, 0> Base; typedef Stride<Value, 0> Base;
public: public:
typedef DenseIndex Index; EIGEN_DEVICE_FUNC OuterStride() : Base() {}
OuterStride() : Base() {} EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {} // FIXME making this explicit could break valid code
OuterStride(Index v) : Base(v,0) {}
}; };
} // end namespace Eigen } // end namespace Eigen
......
...@@ -12,115 +12,56 @@ ...@@ -12,115 +12,56 @@
namespace Eigen { namespace Eigen {
/** \class SwapWrapper
* \ingroup Core_Module
*
* \internal
*
* \brief Internal helper class for swapping two expressions
*/
namespace internal { namespace internal {
template<typename ExpressionType>
struct traits<SwapWrapper<ExpressionType> > : traits<ExpressionType> {};
}
template<typename ExpressionType> class SwapWrapper // Overload default assignPacket behavior for swapping them
: public internal::dense_xpr_base<SwapWrapper<ExpressionType> >::type template<typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT>
class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, Specialized>
: public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn>
{ {
public: protected:
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, swap_assign_op<typename DstEvaluatorTypeT::Scalar>, BuiltIn> Base;
typedef typename internal::dense_xpr_base<SwapWrapper>::type Base; using Base::m_dst;
EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper) using Base::m_src;
typedef typename internal::packet_traits<Scalar>::type Packet; using Base::m_functor;
inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {} public:
typedef typename Base::Scalar Scalar;
inline Index rows() const { return m_expression.rows(); } typedef typename Base::DstXprType DstXprType;
inline Index cols() const { return m_expression.cols(); } typedef swap_assign_op<Scalar> Functor;
inline Index outerStride() const { return m_expression.outerStride(); }
inline Index innerStride() const { return m_expression.innerStride(); } EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, const Functor &func, DstXprType& dstExpr)
: Base(dst, src, func, dstExpr)
typedef typename internal::conditional< {}
internal::is_lvalue<ExpressionType>::value,
Scalar, template<int StoreMode, int LoadMode, typename PacketType>
const Scalar void assignPacket(Index row, Index col)
>::type ScalarWithConstIfNotLvalue; {
PacketType tmp = m_src.template packet<LoadMode,PacketType>(row,col);
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(row,col, m_dst.template packet<StoreMode,PacketType>(row,col));
inline const Scalar* data() const { return m_expression.data(); } m_dst.template writePacket<StoreMode>(row,col,tmp);
}
inline Scalar& coeffRef(Index rowId, Index colId)
{ template<int StoreMode, int LoadMode, typename PacketType>
return m_expression.const_cast_derived().coeffRef(rowId, colId); void assignPacket(Index index)
} {
PacketType tmp = m_src.template packet<LoadMode,PacketType>(index);
inline Scalar& coeffRef(Index index) const_cast<SrcEvaluatorTypeT&>(m_src).template writePacket<LoadMode>(index, m_dst.template packet<StoreMode,PacketType>(index));
{ m_dst.template writePacket<StoreMode>(index,tmp);
return m_expression.const_cast_derived().coeffRef(index); }
}
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael)
inline Scalar& coeffRef(Index rowId, Index colId) const template<int StoreMode, int LoadMode, typename PacketType>
{ void assignPacketByOuterInner(Index outer, Index inner)
return m_expression.coeffRef(rowId, colId); {
} Index row = Base::rowIndexByOuterInner(outer, inner);
Index col = Base::colIndexByOuterInner(outer, inner);
inline Scalar& coeffRef(Index index) const assignPacket<StoreMode,LoadMode,PacketType>(row, col);
{ }
return m_expression.coeffRef(index);
}
template<typename OtherDerived>
void copyCoeff(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
eigen_internal_assert(rowId >= 0 && rowId < rows()
&& colId >= 0 && colId < cols());
Scalar tmp = m_expression.coeff(rowId, colId);
m_expression.coeffRef(rowId, colId) = _other.coeff(rowId, colId);
_other.coeffRef(rowId, colId) = tmp;
}
template<typename OtherDerived>
void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
eigen_internal_assert(index >= 0 && index < m_expression.size());
Scalar tmp = m_expression.coeff(index);
m_expression.coeffRef(index) = _other.coeff(index);
_other.coeffRef(index) = tmp;
}
template<typename OtherDerived, int StoreMode, int LoadMode>
void copyPacket(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
eigen_internal_assert(rowId >= 0 && rowId < rows()
&& colId >= 0 && colId < cols());
Packet tmp = m_expression.template packet<StoreMode>(rowId, colId);
m_expression.template writePacket<StoreMode>(rowId, colId,
_other.template packet<LoadMode>(rowId, colId)
);
_other.template writePacket<LoadMode>(rowId, colId, tmp);
}
template<typename OtherDerived, int StoreMode, int LoadMode>
void copyPacket(Index index, const DenseBase<OtherDerived>& other)
{
OtherDerived& _other = other.const_cast_derived();
eigen_internal_assert(index >= 0 && index < m_expression.size());
Packet tmp = m_expression.template packet<StoreMode>(index);
m_expression.template writePacket<StoreMode>(index,
_other.template packet<LoadMode>(index)
);
_other.template writePacket<LoadMode>(index, tmp);
}
ExpressionType& expression() const { return m_expression; }
protected:
ExpressionType& m_expression;
}; };
} // namespace internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_SWAP_H #endif // EIGEN_SWAP_H
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2009-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
...@@ -13,39 +13,21 @@ ...@@ -13,39 +13,21 @@
namespace Eigen { namespace Eigen {
/** \class Transpose
* \ingroup Core_Module
*
* \brief Expression of the transpose of a matrix
*
* \param MatrixType the type of the object of which we are taking the transpose
*
* This class represents an expression of the transpose of a matrix.
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
*/
namespace internal { namespace internal {
template<typename MatrixType> template<typename MatrixType>
struct traits<Transpose<MatrixType> > : traits<MatrixType> struct traits<Transpose<MatrixType> > : public traits<MatrixType>
{ {
typedef typename MatrixType::Scalar Scalar; typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain; typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedPlain;
typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind;
enum { enum {
RowsAtCompileTime = MatrixType::ColsAtCompileTime, RowsAtCompileTime = MatrixType::ColsAtCompileTime,
ColsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::RowsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0, FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit), Flags0 = traits<MatrixTypeNestedPlain>::Flags & ~(LvalueBit | NestByRefBit),
Flags1 = Flags0 | FlagsLvalueBit, Flags1 = Flags0 | FlagsLvalueBit,
Flags = Flags1 ^ RowMajorBit, Flags = Flags1 ^ RowMajorBit,
CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost,
InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret, InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
}; };
...@@ -54,31 +36,55 @@ struct traits<Transpose<MatrixType> > : traits<MatrixType> ...@@ -54,31 +36,55 @@ struct traits<Transpose<MatrixType> > : traits<MatrixType>
template<typename MatrixType, typename StorageKind> class TransposeImpl; template<typename MatrixType, typename StorageKind> class TransposeImpl;
/** \class Transpose
* \ingroup Core_Module
*
* \brief Expression of the transpose of a matrix
*
* \tparam MatrixType the type of the object of which we are taking the transpose
*
* This class represents an expression of the transpose of a matrix.
* It is the return type of MatrixBase::transpose() and MatrixBase::adjoint()
* and most of the time this is the only way it is used.
*
* \sa MatrixBase::transpose(), MatrixBase::adjoint()
*/
template<typename MatrixType> class Transpose template<typename MatrixType> class Transpose
: public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind> : public TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>
{ {
public: public:
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base; typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose) EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {} EIGEN_DEVICE_FUNC
explicit inline Transpose(MatrixType& matrix) : m_matrix(matrix) {}
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
inline Index rows() const { return m_matrix.cols(); } EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
inline Index cols() const { return m_matrix.rows(); } EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
/** \returns the nested expression */ /** \returns the nested expression */
const typename internal::remove_all<typename MatrixType::Nested>::type& EIGEN_DEVICE_FUNC
const typename internal::remove_all<MatrixTypeNested>::type&
nestedExpression() const { return m_matrix; } nestedExpression() const { return m_matrix; }
/** \returns the nested expression */ /** \returns the nested expression */
typename internal::remove_all<typename MatrixType::Nested>::type& EIGEN_DEVICE_FUNC
nestedExpression() { return m_matrix.const_cast_derived(); } typename internal::remove_reference<MatrixTypeNested>::type&
nestedExpression() { return m_matrix; }
/** \internal */
void resize(Index nrows, Index ncols) {
m_matrix.resize(ncols,nrows);
}
protected: protected:
typename MatrixType::Nested m_matrix; typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
}; };
namespace internal { namespace internal {
...@@ -97,17 +103,27 @@ struct TransposeImpl_base<MatrixType, false> ...@@ -97,17 +103,27 @@ struct TransposeImpl_base<MatrixType, false>
} // end namespace internal } // end namespace internal
// Generic API dispatcher
template<typename XprType, typename StorageKind>
class TransposeImpl
: public internal::generic_xpr_base<Transpose<XprType> >::type
{
public:
typedef typename internal::generic_xpr_base<Transpose<XprType> >::type Base;
};
template<typename MatrixType> class TransposeImpl<MatrixType,Dense> template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
: public internal::TransposeImpl_base<MatrixType>::type : public internal::TransposeImpl_base<MatrixType>::type
{ {
public: public:
typedef typename internal::TransposeImpl_base<MatrixType>::type Base; typedef typename internal::TransposeImpl_base<MatrixType>::type Base;
using Base::coeffRef;
EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>) EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
inline Index innerStride() const { return derived().nestedExpression().innerStride(); } EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
inline Index outerStride() const { return derived().nestedExpression().outerStride(); } EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
typedef typename internal::conditional< typedef typename internal::conditional<
internal::is_lvalue<MatrixType>::value, internal::is_lvalue<MatrixType>::value,
...@@ -115,64 +131,21 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense> ...@@ -115,64 +131,21 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
const Scalar const Scalar
>::type ScalarWithConstIfNotLvalue; >::type ScalarWithConstIfNotLvalue;
inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); } EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
inline const Scalar* data() const { return derived().nestedExpression().data(); } EIGEN_DEVICE_FUNC inline const Scalar* data() const { return derived().nestedExpression().data(); }
inline ScalarWithConstIfNotLvalue& coeffRef(Index rowId, Index colId)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return derived().nestedExpression().const_cast_derived().coeffRef(colId, rowId);
}
inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return derived().nestedExpression().const_cast_derived().coeffRef(index);
}
// FIXME: shall we keep the const version of coeffRef?
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const inline const Scalar& coeffRef(Index rowId, Index colId) const
{ {
return derived().nestedExpression().coeffRef(colId, rowId); return derived().nestedExpression().coeffRef(colId, rowId);
} }
EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return derived().nestedExpression().coeffRef(index); return derived().nestedExpression().coeffRef(index);
} }
inline CoeffReturnType coeff(Index rowId, Index colId) const
{
return derived().nestedExpression().coeff(colId, rowId);
}
inline CoeffReturnType coeff(Index index) const
{
return derived().nestedExpression().coeff(index);
}
template<int LoadMode>
inline const PacketScalar packet(Index rowId, Index colId) const
{
return derived().nestedExpression().template packet<LoadMode>(colId, rowId);
}
template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& x)
{
derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(colId, rowId, x);
}
template<int LoadMode>
inline const PacketScalar packet(Index index) const
{
return derived().nestedExpression().template packet<LoadMode>(index);
}
template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& x)
{
derived().nestedExpression().const_cast_derived().template writePacket<LoadMode>(index, x);
}
}; };
/** \returns an expression of the transpose of *this. /** \returns an expression of the transpose of *this.
...@@ -198,7 +171,7 @@ template<typename Derived> ...@@ -198,7 +171,7 @@ template<typename Derived>
inline Transpose<Derived> inline Transpose<Derived>
DenseBase<Derived>::transpose() DenseBase<Derived>::transpose()
{ {
return derived(); return TransposeReturnType(derived());
} }
/** This is the const version of transpose(). /** This is the const version of transpose().
...@@ -236,8 +209,7 @@ template<typename Derived> ...@@ -236,8 +209,7 @@ template<typename Derived>
inline const typename MatrixBase<Derived>::AdjointReturnType inline const typename MatrixBase<Derived>::AdjointReturnType
MatrixBase<Derived>::adjoint() const MatrixBase<Derived>::adjoint() const
{ {
return this->transpose(); // in the complex case, the .conjugate() is be implicit here return AdjointReturnType(this->transpose());
// due to implicit conversion to return type
} }
/*************************************************************************** /***************************************************************************
...@@ -247,18 +219,38 @@ MatrixBase<Derived>::adjoint() const ...@@ -247,18 +219,38 @@ MatrixBase<Derived>::adjoint() const
namespace internal { namespace internal {
template<typename MatrixType, template<typename MatrixType,
bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic> bool IsSquare = (MatrixType::RowsAtCompileTime == MatrixType::ColsAtCompileTime) && MatrixType::RowsAtCompileTime!=Dynamic,
bool MatchPacketSize =
(int(MatrixType::RowsAtCompileTime) == int(internal::packet_traits<typename MatrixType::Scalar>::size))
&& (internal::evaluator<MatrixType>::Flags&PacketAccessBit) >
struct inplace_transpose_selector; struct inplace_transpose_selector;
template<typename MatrixType> template<typename MatrixType>
struct inplace_transpose_selector<MatrixType,true> { // square matrix struct inplace_transpose_selector<MatrixType,true,false> { // square matrix
static void run(MatrixType& m) { static void run(MatrixType& m) {
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
} }
}; };
// TODO: vectorized path is currently limited to LargestPacketSize x LargestPacketSize cases only.
template<typename MatrixType> template<typename MatrixType>
struct inplace_transpose_selector<MatrixType,false> { // non square matrix struct inplace_transpose_selector<MatrixType,true,true> { // PacketSize x PacketSize
static void run(MatrixType& m) {
typedef typename MatrixType::Scalar Scalar;
typedef typename internal::packet_traits<typename MatrixType::Scalar>::type Packet;
const Index PacketSize = internal::packet_traits<Scalar>::size;
const Index Alignment = internal::evaluator<MatrixType>::Alignment;
PacketBlock<Packet> A;
for (Index i=0; i<PacketSize; ++i)
A.packet[i] = m.template packetByOuterInner<Alignment>(i,0);
internal::ptranspose(A);
for (Index i=0; i<PacketSize; ++i)
m.template writePacket<Alignment>(m.rowIndexByOuterInner(i,0), m.colIndexByOuterInner(i,0), A.packet[i]);
}
};
template<typename MatrixType,bool MatchPacketSize>
struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non square matrix
static void run(MatrixType& m) { static void run(MatrixType& m) {
if (m.rows()==m.cols()) if (m.rows()==m.cols())
m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose()); m.matrix().template triangularView<StrictlyUpper>().swap(m.matrix().transpose());
...@@ -331,14 +323,6 @@ inline void MatrixBase<Derived>::adjointInPlace() ...@@ -331,14 +323,6 @@ inline void MatrixBase<Derived>::adjointInPlace()
namespace internal { namespace internal {
template<typename BinOp,typename NestedXpr,typename Rhs>
struct blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> >
: blas_traits<NestedXpr>
{
typedef SelfCwiseBinaryOp<BinOp,NestedXpr,Rhs> XprType;
static inline const XprType extract(const XprType& x) { return x; }
};
template<bool DestIsTransposed, typename OtherDerived> template<bool DestIsTransposed, typename OtherDerived>
struct check_transpose_aliasing_compile_time_selector struct check_transpose_aliasing_compile_time_selector
{ {
...@@ -404,15 +388,15 @@ struct checkTransposeAliasing_impl<Derived, OtherDerived, false> ...@@ -404,15 +388,15 @@ struct checkTransposeAliasing_impl<Derived, OtherDerived, false>
} }
}; };
} // end namespace internal template<typename Dst, typename Src>
void check_for_aliasing(const Dst &dst, const Src &src)
template<typename Derived>
template<typename OtherDerived>
void DenseBase<Derived>::checkTransposeAliasing(const OtherDerived& other) const
{ {
internal::checkTransposeAliasing_impl<Derived, OtherDerived>::run(derived(), other); internal::checkTransposeAliasing_impl<Dst, Src>::run(dst, src);
} }
#endif
} // end namespace internal
#endif // EIGEN_NO_DEBUG
} // end namespace Eigen } // end namespace Eigen
......
...@@ -12,39 +12,6 @@ ...@@ -12,39 +12,6 @@
namespace Eigen { namespace Eigen {
/** \class Transpositions
* \ingroup Core_Module
*
* \brief Represents a sequence of transpositions (row/column interchange)
*
* \param SizeAtCompileTime the number of transpositions, or Dynamic
* \param MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
*
* This class represents a permutation transformation as a sequence of \em n transpositions
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
* the rows \c i and \c indices[i] of the matrix \c M.
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
*
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
*
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
* \code
* Transpositions tr;
* MatrixXf mat;
* mat = tr * mat;
* \endcode
* In this example, we detect that the matrix appears on both side, and so the transpositions
* are applied in-place without any temporary or extra copy.
*
* \sa class PermutationMatrix
*/
namespace internal {
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed=false> struct transposition_matrix_product_retval;
}
template<typename Derived> template<typename Derived>
class TranspositionsBase class TranspositionsBase
{ {
...@@ -53,7 +20,8 @@ class TranspositionsBase ...@@ -53,7 +20,8 @@ class TranspositionsBase
public: public:
typedef typename Traits::IndicesType IndicesType; typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index; typedef typename IndicesType::Scalar StorageIndex;
typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
Derived& derived() { return *static_cast<Derived*>(this); } Derived& derived() { return *static_cast<Derived*>(this); }
const Derived& derived() const { return *static_cast<const Derived*>(this); } const Derived& derived() const { return *static_cast<const Derived*>(this); }
...@@ -65,7 +33,7 @@ class TranspositionsBase ...@@ -65,7 +33,7 @@ class TranspositionsBase
indices() = other.indices(); indices() = other.indices();
return derived(); return derived();
} }
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
/** This is a special case of the templated operator=. Its purpose is to /** This is a special case of the templated operator=. Its purpose is to
* prevent a default operator= from hiding the templated operator=. * prevent a default operator= from hiding the templated operator=.
...@@ -78,20 +46,24 @@ class TranspositionsBase ...@@ -78,20 +46,24 @@ class TranspositionsBase
#endif #endif
/** \returns the number of transpositions */ /** \returns the number of transpositions */
inline Index size() const { return indices().size(); } Index size() const { return indices().size(); }
/** \returns the number of rows of the equivalent permutation matrix */
Index rows() const { return indices().size(); }
/** \returns the number of columns of the equivalent permutation matrix */
Index cols() const { return indices().size(); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline const Index& coeff(Index i) const { return indices().coeff(i); } inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline Index& coeffRef(Index i) { return indices().coeffRef(i); } inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline const Index& operator()(Index i) const { return indices()(i); } inline const StorageIndex& operator()(Index i) const { return indices()(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline Index& operator()(Index i) { return indices()(i); } inline StorageIndex& operator()(Index i) { return indices()(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline const Index& operator[](Index i) const { return indices()(i); } inline const StorageIndex& operator[](Index i) const { return indices()(i); }
/** Direct access to the underlying index vector */ /** Direct access to the underlying index vector */
inline Index& operator[](Index i) { return indices()(i); } inline StorageIndex& operator[](Index i) { return indices()(i); }
/** const version of indices(). */ /** const version of indices(). */
const IndicesType& indices() const { return derived().indices(); } const IndicesType& indices() const { return derived().indices(); }
...@@ -99,7 +71,7 @@ class TranspositionsBase ...@@ -99,7 +71,7 @@ class TranspositionsBase
IndicesType& indices() { return derived().indices(); } IndicesType& indices() { return derived().indices(); }
/** Resizes to given size. */ /** Resizes to given size. */
inline void resize(int newSize) inline void resize(Index newSize)
{ {
indices().resize(newSize); indices().resize(newSize);
} }
...@@ -107,7 +79,7 @@ class TranspositionsBase ...@@ -107,7 +79,7 @@ class TranspositionsBase
/** Sets \c *this to represents an identity transformation */ /** Sets \c *this to represents an identity transformation */
void setIdentity() void setIdentity()
{ {
for(int i = 0; i < indices().size(); ++i) for(StorageIndex i = 0; i < indices().size(); ++i)
coeffRef(i) = i; coeffRef(i) = i;
} }
...@@ -144,23 +116,53 @@ class TranspositionsBase ...@@ -144,23 +116,53 @@ class TranspositionsBase
}; };
namespace internal { namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType> template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> > struct traits<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
{ {
typedef IndexType Index; typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType;
typedef Matrix<Index, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; typedef TranspositionsStorage StorageKind;
}; };
} }
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType> /** \class Transpositions
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType> > * \ingroup Core_Module
*
* \brief Represents a sequence of transpositions (row/column interchange)
*
* \tparam SizeAtCompileTime the number of transpositions, or Dynamic
* \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it.
*
* This class represents a permutation transformation as a sequence of \em n transpositions
* \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices.
* Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges
* the rows \c i and \c indices[i] of the matrix \c M.
* A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange.
*
* Compared to the class PermutationMatrix, such a sequence of transpositions is what is
* computed during a decomposition with pivoting, and it is faster when applying the permutation in-place.
*
* To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example:
* \code
* Transpositions tr;
* MatrixXf mat;
* mat = tr * mat;
* \endcode
* In this example, we detect that the matrix appears on both side, and so the transpositions
* are applied in-place without any temporary or extra copy.
*
* \sa class PermutationMatrix
*/
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex>
class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
{ {
typedef internal::traits<Transpositions> Traits; typedef internal::traits<Transpositions> Traits;
public: public:
typedef TranspositionsBase<Transpositions> Base; typedef TranspositionsBase<Transpositions> Base;
typedef typename Traits::IndicesType IndicesType; typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index; typedef typename IndicesType::Scalar StorageIndex;
inline Transpositions() {} inline Transpositions() {}
...@@ -177,7 +179,7 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim ...@@ -177,7 +179,7 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
/** Generic constructor from expression of the transposition indices. */ /** Generic constructor from expression of the transposition indices. */
template<typename Other> template<typename Other>
explicit inline Transpositions(const MatrixBase<Other>& a_indices) : m_indices(a_indices) explicit inline Transpositions(const MatrixBase<Other>& indices) : m_indices(indices)
{} {}
/** Copies the \a other transpositions into \c *this */ /** Copies the \a other transpositions into \c *this */
...@@ -215,30 +217,32 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim ...@@ -215,30 +217,32 @@ class Transpositions : public TranspositionsBase<Transpositions<SizeAtCompileTim
namespace internal { namespace internal {
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int _PacketAccess> template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int _PacketAccess>
struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,_PacketAccess> > struct traits<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,_PacketAccess> >
: traits<PermutationMatrix<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex> >
{ {
typedef IndexType Index; typedef Map<const Matrix<_StorageIndex,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType;
typedef Map<const Matrix<Index,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1>, _PacketAccess> IndicesType; typedef _StorageIndex StorageIndex;
typedef TranspositionsStorage StorageKind;
}; };
} }
template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename IndexType, int PacketAccess> template<int SizeAtCompileTime, int MaxSizeAtCompileTime, typename _StorageIndex, int PacketAccess>
class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess> class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess>
: public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,PacketAccess> > : public TranspositionsBase<Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,_StorageIndex>,PacketAccess> >
{ {
typedef internal::traits<Map> Traits; typedef internal::traits<Map> Traits;
public: public:
typedef TranspositionsBase<Map> Base; typedef TranspositionsBase<Map> Base;
typedef typename Traits::IndicesType IndicesType; typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index; typedef typename IndicesType::Scalar StorageIndex;
inline Map(const Index* indicesPtr) explicit inline Map(const StorageIndex* indicesPtr)
: m_indices(indicesPtr) : m_indices(indicesPtr)
{} {}
inline Map(const Index* indicesPtr, Index size) inline Map(const StorageIndex* indicesPtr, Index size)
: m_indices(indicesPtr,size) : m_indices(indicesPtr,size)
{} {}
...@@ -274,9 +278,9 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,Packe ...@@ -274,9 +278,9 @@ class Map<Transpositions<SizeAtCompileTime,MaxSizeAtCompileTime,IndexType>,Packe
namespace internal { namespace internal {
template<typename _IndicesType> template<typename _IndicesType>
struct traits<TranspositionsWrapper<_IndicesType> > struct traits<TranspositionsWrapper<_IndicesType> >
: traits<PermutationWrapper<_IndicesType> >
{ {
typedef typename _IndicesType::Scalar Index; typedef TranspositionsStorage StorageKind;
typedef _IndicesType IndicesType;
}; };
} }
...@@ -289,10 +293,10 @@ class TranspositionsWrapper ...@@ -289,10 +293,10 @@ class TranspositionsWrapper
typedef TranspositionsBase<TranspositionsWrapper> Base; typedef TranspositionsBase<TranspositionsWrapper> Base;
typedef typename Traits::IndicesType IndicesType; typedef typename Traits::IndicesType IndicesType;
typedef typename IndicesType::Scalar Index; typedef typename IndicesType::Scalar StorageIndex;
inline TranspositionsWrapper(IndicesType& a_indices) explicit inline TranspositionsWrapper(IndicesType& indices)
: m_indices(a_indices) : m_indices(indices)
{} {}
/** Copies the \a other transpositions into \c *this */ /** Copies the \a other transpositions into \c *this */
...@@ -321,83 +325,46 @@ class TranspositionsWrapper ...@@ -321,83 +325,46 @@ class TranspositionsWrapper
protected: protected:
const typename IndicesType::Nested m_indices; typename IndicesType::Nested m_indices;
}; };
/** \returns the \a matrix with the \a transpositions applied to the columns. /** \returns the \a matrix with the \a transpositions applied to the columns.
*/ */
template<typename Derived, typename TranspositionsDerived> template<typename MatrixDerived, typename TranspositionsDerived>
inline const internal::transposition_matrix_product_retval<TranspositionsDerived, Derived, OnTheRight> EIGEN_DEVICE_FUNC
operator*(const MatrixBase<Derived>& matrix, const Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
const TranspositionsBase<TranspositionsDerived> &transpositions) operator*(const MatrixBase<MatrixDerived> &matrix,
const TranspositionsBase<TranspositionsDerived>& transpositions)
{ {
return internal::transposition_matrix_product_retval return Product<MatrixDerived, TranspositionsDerived, AliasFreeProduct>
<TranspositionsDerived, Derived, OnTheRight> (matrix.derived(), transpositions.derived());
(transpositions.derived(), matrix.derived());
} }
/** \returns the \a matrix with the \a transpositions applied to the rows. /** \returns the \a matrix with the \a transpositions applied to the rows.
*/ */
template<typename Derived, typename TranspositionDerived> template<typename TranspositionsDerived, typename MatrixDerived>
inline const internal::transposition_matrix_product_retval EIGEN_DEVICE_FUNC
<TranspositionDerived, Derived, OnTheLeft> const Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
operator*(const TranspositionsBase<TranspositionDerived> &transpositions, operator*(const TranspositionsBase<TranspositionsDerived> &transpositions,
const MatrixBase<Derived>& matrix) const MatrixBase<MatrixDerived>& matrix)
{ {
return internal::transposition_matrix_product_retval return Product<TranspositionsDerived, MatrixDerived, AliasFreeProduct>
<TranspositionDerived, Derived, OnTheLeft> (transpositions.derived(), matrix.derived());
(transpositions.derived(), matrix.derived());
} }
namespace internal { // Template partial specialization for transposed/inverse transpositions
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
struct traits<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
{
typedef typename MatrixType::PlainObject ReturnType;
};
template<typename TranspositionType, typename MatrixType, int Side, bool Transposed>
struct transposition_matrix_product_retval
: public ReturnByValue<transposition_matrix_product_retval<TranspositionType, MatrixType, Side, Transposed> >
{
typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
typedef typename TranspositionType::Index Index;
transposition_matrix_product_retval(const TranspositionType& tr, const MatrixType& matrix)
: m_transpositions(tr), m_matrix(matrix)
{}
inline int rows() const { return m_matrix.rows(); } namespace internal {
inline int cols() const { return m_matrix.cols(); }
template<typename Dest> inline void evalTo(Dest& dst) const
{
const int size = m_transpositions.size();
Index j = 0;
if(!(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix)))
dst = m_matrix;
for(int k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
if((j=m_transpositions.coeff(k))!=k)
{
if(Side==OnTheLeft)
dst.row(k).swap(dst.row(j));
else if(Side==OnTheRight)
dst.col(k).swap(dst.col(j));
}
}
protected: template<typename Derived>
const TranspositionType& m_transpositions; struct traits<Transpose<TranspositionsBase<Derived> > >
typename MatrixType::Nested m_matrix; : traits<Derived>
}; {};
} // end namespace internal } // end namespace internal
/* Template partial specialization for transposed/inverse transpositions */
template<typename TranspositionsDerived> template<typename TranspositionsDerived>
class Transpose<TranspositionsBase<TranspositionsDerived> > class Transpose<TranspositionsBase<TranspositionsDerived> >
{ {
...@@ -405,27 +372,31 @@ class Transpose<TranspositionsBase<TranspositionsDerived> > ...@@ -405,27 +372,31 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
typedef typename TranspositionType::IndicesType IndicesType; typedef typename TranspositionType::IndicesType IndicesType;
public: public:
Transpose(const TranspositionType& t) : m_transpositions(t) {} explicit Transpose(const TranspositionType& t) : m_transpositions(t) {}
inline int size() const { return m_transpositions.size(); } Index size() const { return m_transpositions.size(); }
Index rows() const { return m_transpositions.size(); }
Index cols() const { return m_transpositions.size(); }
/** \returns the \a matrix with the inverse transpositions applied to the columns. /** \returns the \a matrix with the inverse transpositions applied to the columns.
*/ */
template<typename Derived> friend template<typename OtherDerived> friend
inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true> const Product<OtherDerived, Transpose, AliasFreeProduct>
operator*(const MatrixBase<Derived>& matrix, const Transpose& trt) operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
{ {
return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheRight, true>(trt.m_transpositions, matrix.derived()); return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
} }
/** \returns the \a matrix with the inverse transpositions applied to the rows. /** \returns the \a matrix with the inverse transpositions applied to the rows.
*/ */
template<typename Derived> template<typename OtherDerived>
inline const internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true> const Product<Transpose, OtherDerived, AliasFreeProduct>
operator*(const MatrixBase<Derived>& matrix) const operator*(const MatrixBase<OtherDerived>& matrix) const
{ {
return internal::transposition_matrix_product_retval<TranspositionType, Derived, OnTheLeft, true>(m_transpositions, matrix.derived()); return Product<Transpose, OtherDerived, AliasFreeProduct>(*this, matrix.derived());
} }
const TranspositionType& nestedExpression() const { return m_transpositions; }
protected: protected:
const TranspositionType& m_transpositions; const TranspositionType& m_transpositions;
......
...@@ -19,9 +19,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv ...@@ -19,9 +19,7 @@ template<int Side, typename TriangularType, typename Rhs> struct triangular_solv
} }
/** \internal /** \class TriangularBase
*
* \class TriangularBase
* \ingroup Core_Module * \ingroup Core_Module
* *
* \brief Base class for triangular part in a matrix * \brief Base class for triangular part in a matrix
...@@ -32,41 +30,69 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived> ...@@ -32,41 +30,69 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
enum { enum {
Mode = internal::traits<Derived>::Mode, Mode = internal::traits<Derived>::Mode,
CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime, RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime, ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime, MaxRowsAtCompileTime = internal::traits<Derived>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime MaxColsAtCompileTime = internal::traits<Derived>::MaxColsAtCompileTime,
SizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::RowsAtCompileTime,
internal::traits<Derived>::ColsAtCompileTime>::ret),
/**< This is equal to the number of coefficients, i.e. the number of
* rows times the number of columns, or to \a Dynamic if this is not
* known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */
MaxSizeAtCompileTime = (internal::size_at_compile_time<internal::traits<Derived>::MaxRowsAtCompileTime,
internal::traits<Derived>::MaxColsAtCompileTime>::ret)
}; };
typedef typename internal::traits<Derived>::Scalar Scalar; typedef typename internal::traits<Derived>::Scalar Scalar;
typedef typename internal::traits<Derived>::StorageKind StorageKind; typedef typename internal::traits<Derived>::StorageKind StorageKind;
typedef typename internal::traits<Derived>::Index Index; typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType; typedef typename internal::traits<Derived>::FullMatrixType DenseMatrixType;
typedef DenseMatrixType DenseType; typedef DenseMatrixType DenseType;
typedef Derived const& Nested;
EIGEN_DEVICE_FUNC
inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); } inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
EIGEN_DEVICE_FUNC
inline Index rows() const { return derived().rows(); } inline Index rows() const { return derived().rows(); }
EIGEN_DEVICE_FUNC
inline Index cols() const { return derived().cols(); } inline Index cols() const { return derived().cols(); }
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return derived().outerStride(); } inline Index outerStride() const { return derived().outerStride(); }
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return derived().innerStride(); } inline Index innerStride() const { return derived().innerStride(); }
// dummy resize function
void resize(Index rows, Index cols)
{
EIGEN_UNUSED_VARIABLE(rows);
EIGEN_UNUSED_VARIABLE(cols);
eigen_assert(rows==this->rows() && cols==this->cols());
}
EIGEN_DEVICE_FUNC
inline Scalar coeff(Index row, Index col) const { return derived().coeff(row,col); } inline Scalar coeff(Index row, Index col) const { return derived().coeff(row,col); }
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); } inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); }
/** \see MatrixBase::copyCoeff(row,col) /** \see MatrixBase::copyCoeff(row,col)
*/ */
template<typename Other> template<typename Other>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other) EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other)
{ {
derived().coeffRef(row, col) = other.coeff(row, col); derived().coeffRef(row, col) = other.coeff(row, col);
} }
EIGEN_DEVICE_FUNC
inline Scalar operator()(Index row, Index col) const inline Scalar operator()(Index row, Index col) const
{ {
check_coordinates(row, col); check_coordinates(row, col);
return coeff(row,col); return coeff(row,col);
} }
EIGEN_DEVICE_FUNC
inline Scalar& operator()(Index row, Index col) inline Scalar& operator()(Index row, Index col)
{ {
check_coordinates(row, col); check_coordinates(row, col);
...@@ -74,15 +100,20 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived> ...@@ -74,15 +100,20 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
} }
#ifndef EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN
EIGEN_DEVICE_FUNC
inline const Derived& derived() const { return *static_cast<const Derived*>(this); } inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
EIGEN_DEVICE_FUNC
inline Derived& derived() { return *static_cast<Derived*>(this); } inline Derived& derived() { return *static_cast<Derived*>(this); }
#endif // not EIGEN_PARSED_BY_DOXYGEN #endif // not EIGEN_PARSED_BY_DOXYGEN
template<typename DenseDerived> template<typename DenseDerived>
EIGEN_DEVICE_FUNC
void evalTo(MatrixBase<DenseDerived> &other) const; void evalTo(MatrixBase<DenseDerived> &other) const;
template<typename DenseDerived> template<typename DenseDerived>
EIGEN_DEVICE_FUNC
void evalToLazy(MatrixBase<DenseDerived> &other) const; void evalToLazy(MatrixBase<DenseDerived> &other) const;
EIGEN_DEVICE_FUNC
DenseMatrixType toDenseMatrix() const DenseMatrixType toDenseMatrix() const
{ {
DenseMatrixType res(rows(), cols()); DenseMatrixType res(rows(), cols());
...@@ -119,17 +150,17 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived> ...@@ -119,17 +150,17 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
/** \class TriangularView /** \class TriangularView
* \ingroup Core_Module * \ingroup Core_Module
* *
* \brief Base class for triangular part in a matrix * \brief Expression of a triangular part in a matrix
* *
* \param MatrixType the type of the object in which we are taking the triangular part * \param MatrixType the type of the object in which we are taking the triangular part
* \param Mode the kind of triangular matrix expression to construct. Can be #Upper, * \param Mode the kind of triangular matrix expression to construct. Can be #Upper,
* #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower. * #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower.
* This is in fact a bit field; it must have either #Upper or #Lower, * This is in fact a bit field; it must have either #Upper or #Lower,
* and additionnaly it may have #UnitDiag or #ZeroDiag or neither. * and additionally it may have #UnitDiag or #ZeroDiag or neither.
* *
* This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular * This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular
* matrices one should speak of "trapezoid" parts. This class is the return type * matrices one should speak of "trapezoid" parts. This class is the return type
* of MatrixBase::triangularView() and most of the time this is the only way it is used. * of MatrixBase::triangularView() and SparseMatrixBase::triangularView(), and most of the time this is the only way it is used.
* *
* \sa MatrixBase::triangularView() * \sa MatrixBase::triangularView()
*/ */
...@@ -137,499 +168,405 @@ namespace internal { ...@@ -137,499 +168,405 @@ namespace internal {
template<typename MatrixType, unsigned int _Mode> template<typename MatrixType, unsigned int _Mode>
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType> struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
{ {
typedef typename nested<MatrixType>::type MatrixTypeNested; typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef; typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef typename MatrixType::PlainObject FullMatrixType;
typedef MatrixType ExpressionType; typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject DenseMatrixType;
enum { enum {
Mode = _Mode, Mode = _Mode,
Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) | Mode, FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
CoeffReadCost = MatrixTypeNestedCleaned::CoeffReadCost Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)))
}; };
}; };
} }
template<int Mode, bool LhsIsTriangular, template<typename _MatrixType, unsigned int _Mode, typename StorageKind> class TriangularViewImpl;
typename Lhs, bool LhsIsVector,
typename Rhs, bool RhsIsVector>
struct TriangularProduct;
template<typename _MatrixType, unsigned int _Mode> class TriangularView template<typename _MatrixType, unsigned int _Mode> class TriangularView
: public TriangularBase<TriangularView<_MatrixType, _Mode> > : public TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind >
{ {
public: public:
typedef TriangularBase<TriangularView> Base; typedef TriangularViewImpl<_MatrixType, _Mode, typename internal::traits<_MatrixType>::StorageKind > Base;
typedef typename internal::traits<TriangularView>::Scalar Scalar; typedef typename internal::traits<TriangularView>::Scalar Scalar;
typedef _MatrixType MatrixType; typedef _MatrixType MatrixType;
typedef typename internal::traits<TriangularView>::DenseMatrixType DenseMatrixType;
typedef DenseMatrixType PlainObject;
protected: protected:
typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested; typedef typename internal::traits<TriangularView>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef; typedef typename internal::traits<TriangularView>::MatrixTypeNestedNonRef MatrixTypeNestedNonRef;
typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned MatrixTypeNestedCleaned;
typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType; typedef typename internal::remove_all<typename MatrixType::ConjugateReturnType>::type MatrixConjugateReturnType;
public: public:
using Base::evalToLazy;
typedef typename internal::traits<TriangularView>::StorageKind StorageKind; typedef typename internal::traits<TriangularView>::StorageKind StorageKind;
typedef typename internal::traits<TriangularView>::Index Index; typedef typename internal::traits<TriangularView>::MatrixTypeNestedCleaned NestedExpression;
enum { enum {
Mode = _Mode, Mode = _Mode,
Flags = internal::traits<TriangularView>::Flags,
TransposeMode = (Mode & Upper ? Lower : 0) TransposeMode = (Mode & Upper ? Lower : 0)
| (Mode & Lower ? Upper : 0) | (Mode & Lower ? Upper : 0)
| (Mode & (UnitDiag)) | (Mode & (UnitDiag))
| (Mode & (ZeroDiag)) | (Mode & (ZeroDiag)),
IsVectorAtCompileTime = false
}; };
inline TriangularView(const MatrixType& matrix) : m_matrix(matrix) EIGEN_DEVICE_FUNC
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
{} {}
using Base::operator=;
TriangularView& operator=(const TriangularView &other)
{ return Base::operator=(other); }
/** \copydoc EigenBase::rows() */
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }
/** \copydoc EigenBase::cols() */
EIGEN_DEVICE_FUNC
inline Index cols() const { return m_matrix.cols(); } inline Index cols() const { return m_matrix.cols(); }
inline Index outerStride() const { return m_matrix.outerStride(); }
inline Index innerStride() const { return m_matrix.innerStride(); } /** \returns a const reference to the nested expression */
EIGEN_DEVICE_FUNC
const NestedExpression& nestedExpression() const { return m_matrix; }
/** \returns a reference to the nested expression */
EIGEN_DEVICE_FUNC
NestedExpression& nestedExpression() { return m_matrix; }
typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */
EIGEN_DEVICE_FUNC
inline const ConjugateReturnType conjugate() const
{ return ConjugateReturnType(m_matrix.conjugate()); }
typedef TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> AdjointReturnType;
/** \sa MatrixBase::adjoint() const */
EIGEN_DEVICE_FUNC
inline const AdjointReturnType adjoint() const
{ return AdjointReturnType(m_matrix.adjoint()); }
typedef TriangularView<typename MatrixType::TransposeReturnType,TransposeMode> TransposeReturnType;
/** \sa MatrixBase::transpose() */
EIGEN_DEVICE_FUNC
inline TransposeReturnType transpose()
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
typename MatrixType::TransposeReturnType tmp(m_matrix);
return TransposeReturnType(tmp);
}
typedef TriangularView<const typename MatrixType::ConstTransposeReturnType,TransposeMode> ConstTransposeReturnType;
/** \sa MatrixBase::transpose() const */
EIGEN_DEVICE_FUNC
inline const ConstTransposeReturnType transpose() const
{
return ConstTransposeReturnType(m_matrix.transpose());
}
template<typename Other>
EIGEN_DEVICE_FUNC
inline const Solve<TriangularView, Other>
solve(const MatrixBase<Other>& other) const
{ return Solve<TriangularView, Other>(*this, other.derived()); }
// workaround MSVC ICE
#if EIGEN_COMP_MSVC
template<int Side, typename Other>
EIGEN_DEVICE_FUNC
inline const internal::triangular_solve_retval<Side,TriangularView, Other>
solve(const MatrixBase<Other>& other) const
{ return Base::template solve<Side>(other); }
#else
using Base::solve;
#endif
/** \returns a selfadjoint view of the referenced triangular part which must be either \c #Upper or \c #Lower.
*
* This is a shortcut for \code this->nestedExpression().selfadjointView<(*this)::Mode>() \endcode
* \sa MatrixBase::selfadjointView() */
EIGEN_DEVICE_FUNC
SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
{
EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
}
/** This is the const version of selfadjointView() */
EIGEN_DEVICE_FUNC
const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
{
EIGEN_STATIC_ASSERT((Mode&(UnitDiag|ZeroDiag))==0,PROGRAMMING_ERROR);
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
}
/** \returns the determinant of the triangular matrix
* \sa MatrixBase::determinant() */
EIGEN_DEVICE_FUNC
Scalar determinant() const
{
if (Mode & UnitDiag)
return 1;
else if (Mode & ZeroDiag)
return 0;
else
return m_matrix.diagonal().prod();
}
protected:
MatrixTypeNested m_matrix;
};
/** \ingroup Core_Module
*
* \brief Base class for a triangular part in a \b dense matrix
*
* This class is an abstract base class of class TriangularView, and objects of type TriangularViewImpl cannot be instantiated.
* It extends class TriangularView with additional methods which available for dense expressions only.
*
* \sa class TriangularView, MatrixBase::triangularView()
*/
template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_MatrixType,_Mode,Dense>
: public TriangularBase<TriangularView<_MatrixType, _Mode> >
{
public:
typedef TriangularView<_MatrixType, _Mode> TriangularViewType;
typedef TriangularBase<TriangularViewType> Base;
typedef typename internal::traits<TriangularViewType>::Scalar Scalar;
typedef _MatrixType MatrixType;
typedef typename MatrixType::PlainObject DenseMatrixType;
typedef DenseMatrixType PlainObject;
public:
using Base::evalToLazy;
using Base::derived;
typedef typename internal::traits<TriangularViewType>::StorageKind StorageKind;
enum {
Mode = _Mode,
Flags = internal::traits<TriangularViewType>::Flags
};
/** \returns the outer-stride of the underlying dense matrix
* \sa DenseCoeffsBase::outerStride() */
EIGEN_DEVICE_FUNC
inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
/** \returns the inner-stride of the underlying dense matrix
* \sa DenseCoeffsBase::innerStride() */
EIGEN_DEVICE_FUNC
inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
/** \sa MatrixBase::operator+=() */ /** \sa MatrixBase::operator+=() */
template<typename Other> TriangularView& operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); } template<typename Other>
EIGEN_DEVICE_FUNC
TriangularViewType& operator+=(const DenseBase<Other>& other) {
internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar,typename Other::Scalar>());
return derived();
}
/** \sa MatrixBase::operator-=() */ /** \sa MatrixBase::operator-=() */
template<typename Other> TriangularView& operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); } template<typename Other>
EIGEN_DEVICE_FUNC
TriangularViewType& operator-=(const DenseBase<Other>& other) {
internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename Other::Scalar>());
return derived();
}
/** \sa MatrixBase::operator*=() */ /** \sa MatrixBase::operator*=() */
TriangularView& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; } EIGEN_DEVICE_FUNC
/** \sa MatrixBase::operator/=() */ TriangularViewType& operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() * other; }
TriangularView& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; } /** \sa DenseBase::operator/=() */
EIGEN_DEVICE_FUNC
TriangularViewType& operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = derived().nestedExpression() / other; }
/** \sa MatrixBase::fill() */ /** \sa MatrixBase::fill() */
EIGEN_DEVICE_FUNC
void fill(const Scalar& value) { setConstant(value); } void fill(const Scalar& value) { setConstant(value); }
/** \sa MatrixBase::setConstant() */ /** \sa MatrixBase::setConstant() */
TriangularView& setConstant(const Scalar& value) EIGEN_DEVICE_FUNC
{ return *this = MatrixType::Constant(rows(), cols(), value); } TriangularViewType& setConstant(const Scalar& value)
{ return *this = MatrixType::Constant(derived().rows(), derived().cols(), value); }
/** \sa MatrixBase::setZero() */ /** \sa MatrixBase::setZero() */
TriangularView& setZero() { return setConstant(Scalar(0)); } EIGEN_DEVICE_FUNC
TriangularViewType& setZero() { return setConstant(Scalar(0)); }
/** \sa MatrixBase::setOnes() */ /** \sa MatrixBase::setOnes() */
TriangularView& setOnes() { return setConstant(Scalar(1)); } EIGEN_DEVICE_FUNC
TriangularViewType& setOnes() { return setConstant(Scalar(1)); }
/** \sa MatrixBase::coeff() /** \sa MatrixBase::coeff()
* \warning the coordinates must fit into the referenced triangular part * \warning the coordinates must fit into the referenced triangular part
*/ */
EIGEN_DEVICE_FUNC
inline Scalar coeff(Index row, Index col) const inline Scalar coeff(Index row, Index col) const
{ {
Base::check_coordinates_internal(row, col); Base::check_coordinates_internal(row, col);
return m_matrix.coeff(row, col); return derived().nestedExpression().coeff(row, col);
} }
/** \sa MatrixBase::coeffRef() /** \sa MatrixBase::coeffRef()
* \warning the coordinates must fit into the referenced triangular part * \warning the coordinates must fit into the referenced triangular part
*/ */
EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index col) inline Scalar& coeffRef(Index row, Index col)
{ {
EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
Base::check_coordinates_internal(row, col); Base::check_coordinates_internal(row, col);
return m_matrix.const_cast_derived().coeffRef(row, col); return derived().nestedExpression().coeffRef(row, col);
} }
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
/** Assigns a triangular matrix to a triangular part of a dense matrix */ /** Assigns a triangular matrix to a triangular part of a dense matrix */
template<typename OtherDerived> template<typename OtherDerived>
TriangularView& operator=(const TriangularBase<OtherDerived>& other); EIGEN_DEVICE_FUNC
TriangularViewType& operator=(const TriangularBase<OtherDerived>& other);
/** Shortcut for\code *this = other.other.triangularView<(*this)::Mode>() \endcode */
template<typename OtherDerived> template<typename OtherDerived>
TriangularView& operator=(const MatrixBase<OtherDerived>& other); EIGEN_DEVICE_FUNC
TriangularViewType& operator=(const MatrixBase<OtherDerived>& other);
TriangularView& operator=(const TriangularView& other) #ifndef EIGEN_PARSED_BY_DOXYGEN
{ return *this = other.nestedExpression(); } EIGEN_DEVICE_FUNC
TriangularViewType& operator=(const TriangularViewImpl& other)
{ return *this = other.derived().nestedExpression(); }
/** \deprecated */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void lazyAssign(const TriangularBase<OtherDerived>& other); void lazyAssign(const TriangularBase<OtherDerived>& other);
/** \deprecated */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void lazyAssign(const MatrixBase<OtherDerived>& other); void lazyAssign(const MatrixBase<OtherDerived>& other);
#endif
/** \sa MatrixBase::conjugate() */
inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::conjugate() const */
inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
{ return m_matrix.conjugate(); }
/** \sa MatrixBase::adjoint() const */
inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
{ return m_matrix.adjoint(); }
/** \sa MatrixBase::transpose() */
inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
{
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().transpose();
}
/** \sa MatrixBase::transpose() const */
inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
{
return m_matrix.transpose();
}
/** Efficient triangular matrix times vector/matrix product */ /** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived> template<typename OtherDerived>
TriangularProduct<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1> EIGEN_DEVICE_FUNC
const Product<TriangularViewType,OtherDerived>
operator*(const MatrixBase<OtherDerived>& rhs) const operator*(const MatrixBase<OtherDerived>& rhs) const
{ {
return TriangularProduct return Product<TriangularViewType,OtherDerived>(derived(), rhs.derived());
<Mode, true, MatrixType, false, OtherDerived, OtherDerived::ColsAtCompileTime==1>
(m_matrix, rhs.derived());
} }
/** Efficient vector/matrix times triangular matrix product */ /** Efficient vector/matrix times triangular matrix product */
template<typename OtherDerived> friend template<typename OtherDerived> friend
TriangularProduct<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false> EIGEN_DEVICE_FUNC
operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs) const Product<OtherDerived,TriangularViewType>
{ operator*(const MatrixBase<OtherDerived>& lhs, const TriangularViewImpl& rhs)
return TriangularProduct {
<Mode, false, OtherDerived, OtherDerived::RowsAtCompileTime==1, MatrixType, false> return Product<OtherDerived,TriangularViewType>(lhs.derived(),rhs.derived());
(lhs.derived(),rhs.m_matrix); }
}
/** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
#ifdef EIGEN2_SUPPORT *
template<typename OtherDerived> * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if
struct eigen2_product_return_type * \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if
{ * \a Side==OnTheRight.
typedef typename TriangularView<MatrixType,Mode>::DenseMatrixType DenseMatrixType; *
typedef typename OtherDerived::PlainObject::DenseType OtherPlainObject; * Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
typedef typename ProductReturnType<DenseMatrixType, OtherPlainObject>::Type ProdRetType; *
typedef typename ProdRetType::PlainObject type; * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the
}; * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this
template<typename OtherDerived> * is an upper (resp. lower) triangular matrix.
const typename eigen2_product_return_type<OtherDerived>::type *
operator*(const EigenBase<OtherDerived>& rhs) const * Example: \include Triangular_solve.cpp
{ * Output: \verbinclude Triangular_solve.out
typename OtherDerived::PlainObject::DenseType rhsPlainObject; *
rhs.evalTo(rhsPlainObject); * This function returns an expression of the inverse-multiply and can works in-place if it is assigned
return this->toDenseMatrix() * rhsPlainObject; * to the same matrix or vector \a other.
} *
template<typename OtherMatrixType> * For users coming from BLAS, this function (and more specifically solveInPlace()) offer
bool isApprox(const TriangularView<OtherMatrixType, Mode>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
{ *
return this->toDenseMatrix().isApprox(other.toDenseMatrix(), precision); * \sa TriangularView::solveInPlace()
} */
template<typename OtherDerived>
bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
{
return this->toDenseMatrix().isApprox(other, precision);
}
#endif // EIGEN2_SUPPORT
template<int Side, typename Other> template<int Side, typename Other>
inline const internal::triangular_solve_retval<Side,TriangularView, Other> EIGEN_DEVICE_FUNC
inline const internal::triangular_solve_retval<Side,TriangularViewType, Other>
solve(const MatrixBase<Other>& other) const; solve(const MatrixBase<Other>& other) const;
/** "in-place" version of TriangularView::solve() where the result is written in \a other
*
* \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here.
* This function will const_cast it, so constness isn't honored here.
*
* Note that the template parameter \c Side can be ommitted, in which case \c Side==OnTheLeft
*
* See TriangularView:solve() for the details.
*/
template<int Side, typename OtherDerived> template<int Side, typename OtherDerived>
EIGEN_DEVICE_FUNC
void solveInPlace(const MatrixBase<OtherDerived>& other) const; void solveInPlace(const MatrixBase<OtherDerived>& other) const;
template<typename Other>
inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other>
solve(const MatrixBase<Other>& other) const
{ return solve<OnTheLeft>(other); }
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void solveInPlace(const MatrixBase<OtherDerived>& other) const void solveInPlace(const MatrixBase<OtherDerived>& other) const
{ return solveInPlace<OnTheLeft>(other); } { return solveInPlace<OnTheLeft>(other); }
const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const /** Swaps the coefficients of the common triangular parts of two matrices */
{
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
}
SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
{
EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
}
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
#ifdef EIGEN_PARSED_BY_DOXYGEN
void swap(TriangularBase<OtherDerived> &other)
#else
void swap(TriangularBase<OtherDerived> const & other) void swap(TriangularBase<OtherDerived> const & other)
#endif
{ {
TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived()); EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
} }
/** \deprecated
* Shortcut for \code (*this).swap(other.triangularView<(*this)::Mode>()) \endcode */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
void swap(MatrixBase<OtherDerived> const & other) void swap(MatrixBase<OtherDerived> const & other)
{ {
SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix)); EIGEN_STATIC_ASSERT_LVALUE(OtherDerived);
TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived()); call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
} }
Scalar determinant() const template<typename RhsType, typename DstType>
{ EIGEN_DEVICE_FUNC
if (Mode & UnitDiag) EIGEN_STRONG_INLINE void _solve_impl(const RhsType &rhs, DstType &dst) const {
return 1; if(!internal::is_same_dense(dst,rhs))
else if (Mode & ZeroDiag) dst = rhs;
return 0; this->solveInPlace(dst);
else
return m_matrix.diagonal().prod();
}
// TODO simplify the following:
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
setZero();
return assignProduct(other.derived(),1);
}
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
return assignProduct(other.derived(),1);
}
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
{
return assignProduct(other.derived(),-1);
}
template<typename ProductDerived>
EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
{
setZero();
return assignProduct(other.derived(),other.alpha());
}
template<typename ProductDerived>
EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
{
return assignProduct(other.derived(),other.alpha());
}
template<typename ProductDerived>
EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
{
return assignProduct(other.derived(),-other.alpha());
}
protected:
template<typename ProductDerived, typename Lhs, typename Rhs>
EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
template<int Mode, bool LhsIsTriangular,
typename Lhs, bool LhsIsVector,
typename Rhs, bool RhsIsVector>
EIGEN_STRONG_INLINE TriangularView& assignProduct(const TriangularProduct<Mode, LhsIsTriangular, Lhs, LhsIsVector, Rhs, RhsIsVector>& prod, const Scalar& alpha)
{
lazyAssign(alpha*prod.eval());
return *this;
} }
MatrixTypeNested m_matrix; template<typename ProductType>
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, bool beta);
}; };
/*************************************************************************** /***************************************************************************
* Implementation of triangular evaluation/assignment * Implementation of triangular evaluation/assignment
***************************************************************************/ ***************************************************************************/
namespace internal { #ifndef EIGEN_PARSED_BY_DOXYGEN
template<typename Derived1, typename Derived2, unsigned int Mode, int UnrollCount, bool ClearOpposite>
struct triangular_assignment_selector
{
enum {
col = (UnrollCount-1) / Derived1::RowsAtCompileTime,
row = (UnrollCount-1) % Derived1::RowsAtCompileTime
};
typedef typename Derived1::Scalar Scalar;
static inline void run(Derived1 &dst, const Derived2 &src)
{
triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
eigen_assert( Mode == Upper || Mode == Lower
|| Mode == StrictlyUpper || Mode == StrictlyLower
|| Mode == UnitUpper || Mode == UnitLower);
if((Mode == Upper && row <= col)
|| (Mode == Lower && row >= col)
|| (Mode == StrictlyUpper && row < col)
|| (Mode == StrictlyLower && row > col)
|| (Mode == UnitUpper && row < col)
|| (Mode == UnitLower && row > col))
dst.copyCoeff(row, col, src);
else if(ClearOpposite)
{
if (Mode&UnitDiag && row==col)
dst.coeffRef(row, col) = Scalar(1);
else
dst.coeffRef(row, col) = Scalar(0);
}
}
};
// prevent buggy user code from causing an infinite recursion
template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
{
static inline void run(Derived1 &, const Derived2 &) {}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
typedef typename Derived1::Scalar Scalar;
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = (std::min)(j, dst.rows()-1);
for(Index i = 0; i <= maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
for(Index i = maxi+1; i < dst.rows(); ++i)
dst.coeffRef(i, j) = Scalar(0);
}
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
for(Index i = j; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
Index maxi = (std::min)(j, dst.rows());
if (ClearOpposite)
for(Index i = 0; i < maxi; ++i)
dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
}
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
typedef typename Derived1::Scalar Scalar;
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = (std::min)(j, dst.rows());
for(Index i = 0; i < maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
for(Index i = maxi; i < dst.rows(); ++i)
dst.coeffRef(i, j) = Scalar(0);
}
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
for(Index i = j+1; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
Index maxi = (std::min)(j, dst.rows()-1);
if (ClearOpposite)
for(Index i = 0; i <= maxi; ++i)
dst.coeffRef(i, j) = static_cast<typename Derived1::Scalar>(0);
}
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = (std::min)(j, dst.rows());
for(Index i = 0; i < maxi; ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
{
for(Index i = maxi+1; i < dst.rows(); ++i)
dst.coeffRef(i, j) = 0;
}
}
dst.diagonal().setOnes();
}
};
template<typename Derived1, typename Derived2, bool ClearOpposite>
struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
{
typedef typename Derived1::Index Index;
static inline void run(Derived1 &dst, const Derived2 &src)
{
for(Index j = 0; j < dst.cols(); ++j)
{
Index maxi = (std::min)(j, dst.rows());
for(Index i = maxi+1; i < dst.rows(); ++i)
dst.copyCoeff(i, j, src);
if (ClearOpposite)
{
for(Index i = 0; i < maxi; ++i)
dst.coeffRef(i, j) = 0;
}
}
dst.diagonal().setOnes();
}
};
} // end namespace internal
// FIXME should we keep that possibility // FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode> template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived> template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>& inline TriangularView<MatrixType, Mode>&
TriangularView<MatrixType, Mode>::operator=(const MatrixBase<OtherDerived>& other) TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
{ {
if(OtherDerived::Flags & EvalBeforeAssigningBit) internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
{ return derived();
typename internal::plain_matrix_type<OtherDerived>::type other_evaluated(other.rows(), other.cols());
other_evaluated.template triangularView<Mode>().lazyAssign(other.derived());
lazyAssign(other_evaluated);
}
else
lazyAssign(other.derived());
return *this;
} }
// FIXME should we keep that possibility // FIXME should we keep that possibility
template<typename MatrixType, unsigned int Mode> template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived> template<typename OtherDerived>
void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>& other) void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other)
{ {
enum { internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>());
unroll = MatrixType::SizeAtCompileTime != Dynamic
&& internal::traits<OtherDerived>::CoeffReadCost != Dynamic
&& MatrixType::SizeAtCompileTime*internal::traits<OtherDerived>::CoeffReadCost/2 <= EIGEN_UNROLLING_LIMIT
};
eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
internal::triangular_assignment_selector
<MatrixType, OtherDerived, int(Mode),
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
false // do not change the opposite triangular part
>::run(m_matrix.const_cast_derived(), other.derived());
} }
...@@ -637,38 +574,21 @@ void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived> ...@@ -637,38 +574,21 @@ void TriangularView<MatrixType, Mode>::lazyAssign(const MatrixBase<OtherDerived>
template<typename MatrixType, unsigned int Mode> template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived> template<typename OtherDerived>
inline TriangularView<MatrixType, Mode>& inline TriangularView<MatrixType, Mode>&
TriangularView<MatrixType, Mode>::operator=(const TriangularBase<OtherDerived>& other) TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other)
{ {
eigen_assert(Mode == int(OtherDerived::Mode)); eigen_assert(Mode == int(OtherDerived::Mode));
if(internal::traits<OtherDerived>::Flags & EvalBeforeAssigningBit) internal::call_assignment(derived(), other.derived());
{ return derived();
typename OtherDerived::DenseMatrixType other_evaluated(other.rows(), other.cols());
other_evaluated.template triangularView<Mode>().lazyAssign(other.derived().nestedExpression());
lazyAssign(other_evaluated);
}
else
lazyAssign(other.derived().nestedExpression());
return *this;
} }
template<typename MatrixType, unsigned int Mode> template<typename MatrixType, unsigned int Mode>
template<typename OtherDerived> template<typename OtherDerived>
void TriangularView<MatrixType, Mode>::lazyAssign(const TriangularBase<OtherDerived>& other) void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other)
{ {
enum { eigen_assert(Mode == int(OtherDerived::Mode));
unroll = MatrixType::SizeAtCompileTime != Dynamic internal::call_assignment_no_alias(derived(), other.derived());
&& internal::traits<OtherDerived>::CoeffReadCost != Dynamic
&& MatrixType::SizeAtCompileTime * internal::traits<OtherDerived>::CoeffReadCost / 2
<= EIGEN_UNROLLING_LIMIT
};
eigen_assert(m_matrix.rows() == other.rows() && m_matrix.cols() == other.cols());
internal::triangular_assignment_selector
<MatrixType, OtherDerived, int(Mode),
unroll ? int(MatrixType::SizeAtCompileTime) : Dynamic,
false // preserve the opposite triangular part
>::run(m_matrix.const_cast_derived(), other.derived().nestedExpression());
} }
#endif
/*************************************************************************** /***************************************************************************
* Implementation of TriangularBase methods * Implementation of TriangularBase methods
...@@ -680,35 +600,7 @@ template<typename Derived> ...@@ -680,35 +600,7 @@ template<typename Derived>
template<typename DenseDerived> template<typename DenseDerived>
void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
{ {
if(internal::traits<Derived>::Flags & EvalBeforeAssigningBit) evalToLazy(other.derived());
{
typename internal::plain_matrix_type<Derived>::type other_evaluated(rows(), cols());
evalToLazy(other_evaluated);
other.derived().swap(other_evaluated);
}
else
evalToLazy(other.derived());
}
/** Assigns a triangular or selfadjoint matrix to a dense matrix.
* If the matrix is triangular, the opposite part is set to zero. */
template<typename Derived>
template<typename DenseDerived>
void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
{
enum {
unroll = DenseDerived::SizeAtCompileTime != Dynamic
&& internal::traits<Derived>::CoeffReadCost != Dynamic
&& DenseDerived::SizeAtCompileTime * internal::traits<Derived>::CoeffReadCost / 2
<= EIGEN_UNROLLING_LIMIT
};
other.derived().resize(this->rows(), this->cols());
internal::triangular_assignment_selector
<DenseDerived, typename internal::traits<Derived>::MatrixTypeNestedCleaned, Derived::Mode,
unroll ? int(DenseDerived::SizeAtCompileTime) : Dynamic,
true // clear the opposite triangular part
>::run(other.derived(), derived().nestedExpression());
} }
/*************************************************************************** /***************************************************************************
...@@ -719,49 +611,14 @@ void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const ...@@ -719,49 +611,14 @@ void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
* Implementation of MatrixBase methods * Implementation of MatrixBase methods
***************************************************************************/ ***************************************************************************/
#ifdef EIGEN2_SUPPORT
// implementation of part<>(), including the SelfAdjoint case.
namespace internal {
template<typename MatrixType, unsigned int Mode>
struct eigen2_part_return_type
{
typedef TriangularView<MatrixType, Mode> type;
};
template<typename MatrixType>
struct eigen2_part_return_type<MatrixType, SelfAdjoint>
{
typedef SelfAdjointView<MatrixType, Upper> type;
};
}
/** \deprecated use MatrixBase::triangularView() */
template<typename Derived>
template<unsigned int Mode>
const typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part() const
{
return derived();
}
/** \deprecated use MatrixBase::triangularView() */
template<typename Derived>
template<unsigned int Mode>
typename internal::eigen2_part_return_type<Derived, Mode>::type MatrixBase<Derived>::part()
{
return derived();
}
#endif
/** /**
* \returns an expression of a triangular view extracted from the current matrix * \returns an expression of a triangular view extracted from the current matrix
* *
* The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper, * The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper,
* \c #Lower, \c #StrictlyLower, \c #UnitLower. * \c #Lower, \c #StrictlyLower, \c #UnitLower.
* *
* Example: \include MatrixBase_extract.cpp * Example: \include MatrixBase_triangularView.cpp
* Output: \verbinclude MatrixBase_extract.out * Output: \verbinclude MatrixBase_triangularView.out
* *
* \sa class TriangularView * \sa class TriangularView
*/ */
...@@ -770,7 +627,7 @@ template<unsigned int Mode> ...@@ -770,7 +627,7 @@ template<unsigned int Mode>
typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView() MatrixBase<Derived>::triangularView()
{ {
return derived(); return typename TriangularViewReturnType<Mode>::Type(derived());
} }
/** This is the const version of MatrixBase::triangularView() */ /** This is the const version of MatrixBase::triangularView() */
...@@ -779,7 +636,7 @@ template<unsigned int Mode> ...@@ -779,7 +636,7 @@ template<unsigned int Mode>
typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type
MatrixBase<Derived>::triangularView() const MatrixBase<Derived>::triangularView() const
{ {
return derived(); return typename ConstTriangularViewReturnType<Mode>::Type(derived());
} }
/** \returns true if *this is approximately equal to an upper triangular matrix, /** \returns true if *this is approximately equal to an upper triangular matrix,
...@@ -790,21 +647,20 @@ MatrixBase<Derived>::triangularView() const ...@@ -790,21 +647,20 @@ MatrixBase<Derived>::triangularView() const
template<typename Derived> template<typename Derived>
bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
{ {
using std::abs;
RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1); RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
{ {
Index maxi = (std::min)(j, rows()-1); Index maxi = numext::mini(j, rows()-1);
for(Index i = 0; i <= maxi; ++i) for(Index i = 0; i <= maxi; ++i)
{ {
RealScalar absValue = abs(coeff(i,j)); RealScalar absValue = numext::abs(coeff(i,j));
if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue; if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
} }
} }
RealScalar threshold = maxAbsOnUpperPart * prec; RealScalar threshold = maxAbsOnUpperPart * prec;
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
for(Index i = j+1; i < rows(); ++i) for(Index i = j+1; i < rows(); ++i)
if(abs(coeff(i, j)) > threshold) return false; if(numext::abs(coeff(i, j)) > threshold) return false;
return true; return true;
} }
...@@ -816,24 +672,312 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const ...@@ -816,24 +672,312 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
template<typename Derived> template<typename Derived>
bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
{ {
using std::abs;
RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1); RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
for(Index j = 0; j < cols(); ++j) for(Index j = 0; j < cols(); ++j)
for(Index i = j; i < rows(); ++i) for(Index i = j; i < rows(); ++i)
{ {
RealScalar absValue = abs(coeff(i,j)); RealScalar absValue = numext::abs(coeff(i,j));
if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue; if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
} }
RealScalar threshold = maxAbsOnLowerPart * prec; RealScalar threshold = maxAbsOnLowerPart * prec;
for(Index j = 1; j < cols(); ++j) for(Index j = 1; j < cols(); ++j)
{ {
Index maxi = (std::min)(j, rows()-1); Index maxi = numext::mini(j, rows()-1);
for(Index i = 0; i < maxi; ++i) for(Index i = 0; i < maxi; ++i)
if(abs(coeff(i, j)) > threshold) return false; if(numext::abs(coeff(i, j)) > threshold) return false;
} }
return true; return true;
} }
/***************************************************************************
****************************************************************************
* Evaluators and Assignment of triangular expressions
***************************************************************************
***************************************************************************/
namespace internal {
// TODO currently a triangular expression has the form TriangularView<.,.>
// in the future triangular-ness should be defined by the expression traits
// such that Transpose<TriangularView<.,.> > is valid. (currently TriangularBase::transpose() is overloaded to make it work)
template<typename MatrixType, unsigned int Mode>
struct evaluator_traits<TriangularView<MatrixType,Mode> >
{
typedef typename storage_kind_to_evaluator_kind<typename MatrixType::StorageKind>::Kind Kind;
typedef typename glue_shapes<typename evaluator_traits<MatrixType>::Shape, TriangularShape>::type Shape;
};
template<typename MatrixType, unsigned int Mode>
struct unary_evaluator<TriangularView<MatrixType,Mode>, IndexBased>
: evaluator<typename internal::remove_all<MatrixType>::type>
{
typedef TriangularView<MatrixType,Mode> XprType;
typedef evaluator<typename internal::remove_all<MatrixType>::type> Base;
unary_evaluator(const XprType &xpr) : Base(xpr.nestedExpression()) {}
};
// Additional assignment kinds:
struct Triangular2Triangular {};
struct Triangular2Dense {};
struct Dense2Triangular {};
template<typename Kernel, unsigned int Mode, int UnrollCount, bool ClearOpposite> struct triangular_assignment_loop;
/** \internal Specialization of the dense assignment kernel for triangular matrices.
* The main difference is that the triangular, diagonal, and opposite parts are processed through three different functions.
* \tparam UpLo must be either Lower or Upper
* \tparam Mode must be either 0, UnitDiag, ZeroDiag, or SelfAdjoint
*/
template<int UpLo, int Mode, int SetOpposite, typename DstEvaluatorTypeT, typename SrcEvaluatorTypeT, typename Functor, int Version = Specialized>
class triangular_dense_assignment_kernel : public generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version>
{
protected:
typedef generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor, Version> Base;
typedef typename Base::DstXprType DstXprType;
typedef typename Base::SrcXprType SrcXprType;
using Base::m_dst;
using Base::m_src;
using Base::m_functor;
public:
typedef typename Base::DstEvaluatorType DstEvaluatorType;
typedef typename Base::SrcEvaluatorType SrcEvaluatorType;
typedef typename Base::Scalar Scalar;
typedef typename Base::AssignmentTraits AssignmentTraits;
EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr)
: Base(dst, src, func, dstExpr)
{}
#ifdef EIGEN_INTERNAL_DEBUGGING
EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col)
{
eigen_internal_assert(row!=col);
Base::assignCoeff(row,col);
}
#else
using Base::assignCoeff;
#endif
EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id)
{
if(Mode==UnitDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(1));
else if(Mode==ZeroDiag && SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(id,id), Scalar(0));
else if(Mode==0) Base::assignCoeff(id,id);
}
EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col)
{
eigen_internal_assert(row!=col);
if(SetOpposite)
m_functor.assignCoeff(m_dst.coeffRef(row,col), Scalar(0));
}
};
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func)
{
typedef evaluator<DstXprType> DstEvaluatorType;
typedef evaluator<SrcXprType> SrcEvaluatorType;
SrcEvaluatorType srcEvaluator(src);
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
DstEvaluatorType dstEvaluator(dst);
typedef triangular_dense_assignment_kernel< Mode&(Lower|Upper),Mode&(UnitDiag|ZeroDiag|SelfAdjoint),SetOpposite,
DstEvaluatorType,SrcEvaluatorType,Functor> Kernel;
Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived());
enum {
unroll = DstXprType::SizeAtCompileTime != Dynamic
&& SrcEvaluatorType::CoeffReadCost < HugeCost
&& DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT
};
triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
}
template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src)
{
call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
}
template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; };
template<> struct AssignmentKind<DenseShape,TriangularShape> { typedef Triangular2Dense Kind; };
template<> struct AssignmentKind<TriangularShape,DenseShape> { typedef Dense2Triangular Kind; };
template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Triangular>
{
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode));
call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
}
};
template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Triangular2Dense>
{
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
call_triangular_assignment_loop<SrcXprType::Mode, (SrcXprType::Mode&SelfAdjoint)==0>(dst, src, func);
}
};
template< typename DstXprType, typename SrcXprType, typename Functor>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Triangular>
{
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{
call_triangular_assignment_loop<DstXprType::Mode, false>(dst, src, func);
}
};
template<typename Kernel, unsigned int Mode, int UnrollCount, bool SetOpposite>
struct triangular_assignment_loop
{
// FIXME: this is not very clean, perhaps this information should be provided by the kernel?
typedef typename Kernel::DstEvaluatorType DstEvaluatorType;
typedef typename DstEvaluatorType::XprType DstXprType;
enum {
col = (UnrollCount-1) / DstXprType::RowsAtCompileTime,
row = (UnrollCount-1) % DstXprType::RowsAtCompileTime
};
typedef typename Kernel::Scalar Scalar;
EIGEN_DEVICE_FUNC
static inline void run(Kernel &kernel)
{
triangular_assignment_loop<Kernel, Mode, UnrollCount-1, SetOpposite>::run(kernel);
if(row==col)
kernel.assignDiagonalCoeff(row);
else if( ((Mode&Lower) && row>col) || ((Mode&Upper) && row<col) )
kernel.assignCoeff(row,col);
else if(SetOpposite)
kernel.assignOppositeCoeff(row,col);
}
};
// prevent buggy user code from causing an infinite recursion
template<typename Kernel, unsigned int Mode, bool SetOpposite>
struct triangular_assignment_loop<Kernel, Mode, 0, SetOpposite>
{
EIGEN_DEVICE_FUNC
static inline void run(Kernel &) {}
};
// TODO: experiment with a recursive assignment procedure splitting the current
// triangular part into one rectangular and two triangular parts.
template<typename Kernel, unsigned int Mode, bool SetOpposite>
struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
{
typedef typename Kernel::Scalar Scalar;
EIGEN_DEVICE_FUNC
static inline void run(Kernel &kernel)
{
for(Index j = 0; j < kernel.cols(); ++j)
{
Index maxi = numext::mini(j, kernel.rows());
Index i = 0;
if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
{
for(; i < maxi; ++i)
if(Mode&Upper) kernel.assignCoeff(i, j);
else kernel.assignOppositeCoeff(i, j);
}
else
i = maxi;
if(i<kernel.rows()) // then i==j
kernel.assignDiagonalCoeff(i++);
if (((Mode&Upper) && SetOpposite) || (Mode&Lower))
{
for(; i < kernel.rows(); ++i)
if(Mode&Lower) kernel.assignCoeff(i, j);
else kernel.assignOppositeCoeff(i, j);
}
}
}
};
} // end namespace internal
/** Assigns a triangular or selfadjoint matrix to a dense matrix.
* If the matrix is triangular, the opposite part is set to zero. */
template<typename Derived>
template<typename DenseDerived>
void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const
{
other.derived().resize(this->rows(), this->cols());
internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression());
}
namespace internal {
// Triangular = Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
{
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
{
Index dstRows = src.rows();
Index dstCols = src.cols();
if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
dst.resize(dstRows, dstCols);
dst._assignProduct(src, 1, 0);
}
};
// Triangular += Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
{
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
{
dst._assignProduct(src, 1, 1);
}
};
// Triangular -= Product
template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular>
{
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
{
dst._assignProduct(src, -1, 1);
}
};
} // end namespace internal
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_TRIANGULARMATRIX_H #endif // EIGEN_TRIANGULARMATRIX_H
...@@ -13,13 +13,23 @@ ...@@ -13,13 +13,23 @@
namespace Eigen { namespace Eigen {
namespace internal {
template<typename VectorType, int Size>
struct traits<VectorBlock<VectorType, Size> >
: public traits<Block<VectorType,
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
{
};
}
/** \class VectorBlock /** \class VectorBlock
* \ingroup Core_Module * \ingroup Core_Module
* *
* \brief Expression of a fixed-size or dynamic-size sub-vector * \brief Expression of a fixed-size or dynamic-size sub-vector
* *
* \param VectorType the type of the object in which we are taking a sub-vector * \tparam VectorType the type of the object in which we are taking a sub-vector
* \param Size size of the sub-vector we are taking at compile time (optional) * \tparam Size size of the sub-vector we are taking at compile time (optional)
* *
* This class represents an expression of either a fixed-size or dynamic-size sub-vector. * This class represents an expression of either a fixed-size or dynamic-size sub-vector.
* It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and * It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment<int>(Index) and
...@@ -43,17 +53,6 @@ namespace Eigen { ...@@ -43,17 +53,6 @@ namespace Eigen {
* *
* \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index) * \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index)
*/ */
namespace internal {
template<typename VectorType, int Size>
struct traits<VectorBlock<VectorType, Size> >
: public traits<Block<VectorType,
traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
traits<VectorType>::Flags & RowMajorBit ? Size : 1> >
{
};
}
template<typename VectorType, int Size> class VectorBlock template<typename VectorType, int Size> class VectorBlock
: public Block<VectorType, : public Block<VectorType,
internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size, internal::traits<VectorType>::Flags & RowMajorBit ? 1 : Size,
...@@ -72,6 +71,7 @@ template<typename VectorType, int Size> class VectorBlock ...@@ -72,6 +71,7 @@ template<typename VectorType, int Size> class VectorBlock
/** Dynamic-size constructor /** Dynamic-size constructor
*/ */
EIGEN_DEVICE_FUNC
inline VectorBlock(VectorType& vector, Index start, Index size) inline VectorBlock(VectorType& vector, Index start, Index size)
: Base(vector, : Base(vector,
IsColVector ? start : 0, IsColVector ? 0 : start, IsColVector ? start : 0, IsColVector ? 0 : start,
...@@ -82,6 +82,7 @@ template<typename VectorType, int Size> class VectorBlock ...@@ -82,6 +82,7 @@ template<typename VectorType, int Size> class VectorBlock
/** Fixed-size constructor /** Fixed-size constructor
*/ */
EIGEN_DEVICE_FUNC
inline VectorBlock(VectorType& vector, Index start) inline VectorBlock(VectorType& vector, Index start)
: Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start) : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
{ {
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#ifndef EIGEN_PARTIAL_REDUX_H #ifndef EIGEN_PARTIAL_REDUX_H
#define EIGEN_PARTIAL_REDUX_H #define EIGEN_PARTIAL_REDUX_H
namespace Eigen { namespace Eigen {
/** \class PartialReduxExpr /** \class PartialReduxExpr
* \ingroup Core_Module * \ingroup Core_Module
...@@ -41,64 +41,43 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> > ...@@ -41,64 +41,43 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
typedef typename traits<MatrixType>::StorageKind StorageKind; typedef typename traits<MatrixType>::StorageKind StorageKind;
typedef typename traits<MatrixType>::XprKind XprKind; typedef typename traits<MatrixType>::XprKind XprKind;
typedef typename MatrixType::Scalar InputScalar; typedef typename MatrixType::Scalar InputScalar;
typedef typename nested<MatrixType>::type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
enum { enum {
RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime, RowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::RowsAtCompileTime,
ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime, ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime, MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits, Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
}; };
#if EIGEN_GNUC_AT_LEAST(3,4)
typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
#else
typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
#endif
enum {
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
: TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
};
}; };
} }
template< typename MatrixType, typename MemberOp, int Direction> template< typename MatrixType, typename MemberOp, int Direction>
class PartialReduxExpr : internal::no_assignment_operator, class PartialReduxExpr : public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type,
public internal::dense_xpr_base< PartialReduxExpr<MatrixType, MemberOp, Direction> >::type internal::no_assignment_operator
{ {
public: public:
typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base; typedef typename internal::dense_xpr_base<PartialReduxExpr>::type Base;
EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr) EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr)
typedef typename internal::traits<PartialReduxExpr>::MatrixTypeNested MatrixTypeNested;
typedef typename internal::traits<PartialReduxExpr>::_MatrixTypeNested _MatrixTypeNested;
PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp()) EIGEN_DEVICE_FUNC
explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp())
: m_matrix(mat), m_functor(func) {} : m_matrix(mat), m_functor(func) {}
EIGEN_DEVICE_FUNC
Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); } Index rows() const { return (Direction==Vertical ? 1 : m_matrix.rows()); }
EIGEN_DEVICE_FUNC
Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); } Index cols() const { return (Direction==Horizontal ? 1 : m_matrix.cols()); }
EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const EIGEN_DEVICE_FUNC
{ typename MatrixType::Nested nestedExpression() const { return m_matrix; }
if (Direction==Vertical)
return m_functor(m_matrix.col(j));
else
return m_functor(m_matrix.row(i));
}
const Scalar coeff(Index index) const EIGEN_DEVICE_FUNC
{ const MemberOp& functor() const { return m_functor; }
if (Direction==Vertical)
return m_functor(m_matrix.col(index));
else
return m_functor(m_matrix.row(index));
}
protected: protected:
MatrixTypeNested m_matrix; typename MatrixType::Nested m_matrix;
const MemberOp m_functor; const MemberOp m_functor;
}; };
...@@ -110,7 +89,8 @@ class PartialReduxExpr : internal::no_assignment_operator, ...@@ -110,7 +89,8 @@ class PartialReduxExpr : internal::no_assignment_operator,
template<typename Scalar, int Size> struct Cost \ template<typename Scalar, int Size> struct Cost \
{ enum { value = COST }; }; \ { enum { value = COST }; }; \
template<typename XprType> \ template<typename XprType> \
EIGEN_STRONG_INLINE ResultType operator()(const XprType& mat) const \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
ResultType operator()(const XprType& mat) const \
{ return mat.MEMBER(); } \ { return mat.MEMBER(); } \
} }
...@@ -130,17 +110,27 @@ EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost); ...@@ -130,17 +110,27 @@ EIGEN_MEMBER_FUNCTOR(any, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost); EIGEN_MEMBER_FUNCTOR(count, (Size-1)*NumTraits<Scalar>::AddCost);
EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost); EIGEN_MEMBER_FUNCTOR(prod, (Size-1)*NumTraits<Scalar>::MulCost);
template <int p, typename ResultType>
struct member_lpnorm {
typedef ResultType result_type;
template<typename Scalar, int Size> struct Cost
{ enum { value = (Size+5) * NumTraits<Scalar>::MulCost + (Size-1)*NumTraits<Scalar>::AddCost }; };
EIGEN_DEVICE_FUNC member_lpnorm() {}
template<typename XprType>
EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const
{ return mat.template lpNorm<p>(); }
};
template <typename BinaryOp, typename Scalar> template <typename BinaryOp, typename Scalar>
struct member_redux { struct member_redux {
typedef typename result_of< typedef typename result_of<
BinaryOp(Scalar) BinaryOp(const Scalar&,const Scalar&)
>::type result_type; >::type result_type;
template<typename _Scalar, int Size> struct Cost template<typename _Scalar, int Size> struct Cost
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; }; { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
member_redux(const BinaryOp func) : m_functor(func) {} EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {}
template<typename Derived> template<typename Derived>
inline result_type operator()(const DenseBase<Derived>& mat) const EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase<Derived>& mat) const
{ return mat.redux(m_functor); } { return mat.redux(m_functor); }
const BinaryOp m_functor; const BinaryOp m_functor;
}; };
...@@ -151,8 +141,8 @@ struct member_redux { ...@@ -151,8 +141,8 @@ struct member_redux {
* *
* \brief Pseudo expression providing partial reduction operations * \brief Pseudo expression providing partial reduction operations
* *
* \param ExpressionType the type of the object on which to do partial reductions * \tparam ExpressionType the type of the object on which to do partial reductions
* \param Direction indicates the direction of the redux (#Vertical or #Horizontal) * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal)
* *
* This class represents a pseudo expression with partial reduction features. * This class represents a pseudo expression with partial reduction features.
* It is the return type of DenseBase::colwise() and DenseBase::rowwise() * It is the return type of DenseBase::colwise() and DenseBase::rowwise()
...@@ -169,16 +159,15 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -169,16 +159,15 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
typedef typename ExpressionType::Scalar Scalar; typedef typename ExpressionType::Scalar Scalar;
typedef typename ExpressionType::RealScalar RealScalar; typedef typename ExpressionType::RealScalar RealScalar;
typedef typename ExpressionType::Index Index; typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3
typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret, typedef typename internal::ref_selector<ExpressionType>::non_const_type ExpressionTypeNested;
ExpressionType, ExpressionType&>::type ExpressionTypeNested;
typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned; typedef typename internal::remove_all<ExpressionTypeNested>::type ExpressionTypeNestedCleaned;
template<template<typename _Scalar> class Functor, template<template<typename _Scalar> class Functor,
typename Scalar=typename internal::traits<ExpressionType>::Scalar> struct ReturnType typename Scalar_=Scalar> struct ReturnType
{ {
typedef PartialReduxExpr<ExpressionType, typedef PartialReduxExpr<ExpressionType,
Functor<Scalar>, Functor<Scalar_>,
Direction Direction
> Type; > Type;
}; };
...@@ -186,23 +175,24 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -186,23 +175,24 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
template<typename BinaryOp> struct ReduxReturnType template<typename BinaryOp> struct ReduxReturnType
{ {
typedef PartialReduxExpr<ExpressionType, typedef PartialReduxExpr<ExpressionType,
internal::member_redux<BinaryOp,typename internal::traits<ExpressionType>::Scalar>, internal::member_redux<BinaryOp,Scalar>,
Direction Direction
> Type; > Type;
}; };
enum { enum {
IsVertical = (Direction==Vertical) ? 1 : 0, isVertical = (Direction==Vertical) ? 1 : 0,
IsHorizontal = (Direction==Horizontal) ? 1 : 0 isHorizontal = (Direction==Horizontal) ? 1 : 0
}; };
protected: protected:
/** \internal typedef typename internal::conditional<isVertical,
* \returns the i-th subvector according to the \c Direction */
typedef typename internal::conditional<Direction==Vertical,
typename ExpressionType::ColXpr, typename ExpressionType::ColXpr,
typename ExpressionType::RowXpr>::type SubVector; typename ExpressionType::RowXpr>::type SubVector;
/** \internal
* \returns the i-th subvector according to the \c Direction */
EIGEN_DEVICE_FUNC
SubVector subVector(Index i) SubVector subVector(Index i)
{ {
return SubVector(m_matrix.derived(),i); return SubVector(m_matrix.derived(),i);
...@@ -210,58 +200,62 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -210,58 +200,62 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** \internal /** \internal
* \returns the number of subvectors in the direction \c Direction */ * \returns the number of subvectors in the direction \c Direction */
EIGEN_DEVICE_FUNC
Index subVectors() const Index subVectors() const
{ return Direction==Vertical?m_matrix.cols():m_matrix.rows(); } { return isVertical?m_matrix.cols():m_matrix.rows(); }
template<typename OtherDerived> struct ExtendedType { template<typename OtherDerived> struct ExtendedType {
typedef Replicate<OtherDerived, typedef Replicate<OtherDerived,
Direction==Vertical ? 1 : ExpressionType::RowsAtCompileTime, isVertical ? 1 : ExpressionType::RowsAtCompileTime,
Direction==Horizontal ? 1 : ExpressionType::ColsAtCompileTime> Type; isHorizontal ? 1 : ExpressionType::ColsAtCompileTime> Type;
}; };
/** \internal /** \internal
* Replicates a vector to match the size of \c *this */ * Replicates a vector to match the size of \c *this */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
typename ExtendedType<OtherDerived>::Type typename ExtendedType<OtherDerived>::Type
extendedTo(const DenseBase<OtherDerived>& other) const extendedTo(const DenseBase<OtherDerived>& other) const
{ {
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxColsAtCompileTime==1), EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxColsAtCompileTime==1),
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED) YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxRowsAtCompileTime==1), EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxRowsAtCompileTime==1),
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED) YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
return typename ExtendedType<OtherDerived>::Type return typename ExtendedType<OtherDerived>::Type
(other.derived(), (other.derived(),
Direction==Vertical ? 1 : m_matrix.rows(), isVertical ? 1 : m_matrix.rows(),
Direction==Horizontal ? 1 : m_matrix.cols()); isHorizontal ? 1 : m_matrix.cols());
} }
template<typename OtherDerived> struct OppositeExtendedType { template<typename OtherDerived> struct OppositeExtendedType {
typedef Replicate<OtherDerived, typedef Replicate<OtherDerived,
Direction==Horizontal ? 1 : ExpressionType::RowsAtCompileTime, isHorizontal ? 1 : ExpressionType::RowsAtCompileTime,
Direction==Vertical ? 1 : ExpressionType::ColsAtCompileTime> Type; isVertical ? 1 : ExpressionType::ColsAtCompileTime> Type;
}; };
/** \internal /** \internal
* Replicates a vector in the opposite direction to match the size of \c *this */ * Replicates a vector in the opposite direction to match the size of \c *this */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
typename OppositeExtendedType<OtherDerived>::Type typename OppositeExtendedType<OtherDerived>::Type
extendedToOpposite(const DenseBase<OtherDerived>& other) const extendedToOpposite(const DenseBase<OtherDerived>& other) const
{ {
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxColsAtCompileTime==1), EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isHorizontal, OtherDerived::MaxColsAtCompileTime==1),
YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED) YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxRowsAtCompileTime==1), EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(isVertical, OtherDerived::MaxRowsAtCompileTime==1),
YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED) YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
return typename OppositeExtendedType<OtherDerived>::Type return typename OppositeExtendedType<OtherDerived>::Type
(other.derived(), (other.derived(),
Direction==Horizontal ? 1 : m_matrix.rows(), isHorizontal ? 1 : m_matrix.rows(),
Direction==Vertical ? 1 : m_matrix.cols()); isVertical ? 1 : m_matrix.cols());
} }
public: public:
EIGEN_DEVICE_FUNC
inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {} explicit inline VectorwiseOp(ExpressionType& matrix) : m_matrix(matrix) {}
/** \internal */ /** \internal */
EIGEN_DEVICE_FUNC
inline const ExpressionType& _expression() const { return m_matrix; } inline const ExpressionType& _expression() const { return m_matrix; }
/** \returns a row or column vector expression of \c *this reduxed by \a func /** \returns a row or column vector expression of \c *this reduxed by \a func
...@@ -272,80 +266,126 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -272,80 +266,126 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* \sa class VectorwiseOp, DenseBase::colwise(), DenseBase::rowwise() * \sa class VectorwiseOp, DenseBase::colwise(), DenseBase::rowwise()
*/ */
template<typename BinaryOp> template<typename BinaryOp>
EIGEN_DEVICE_FUNC
const typename ReduxReturnType<BinaryOp>::Type const typename ReduxReturnType<BinaryOp>::Type
redux(const BinaryOp& func = BinaryOp()) const redux(const BinaryOp& func = BinaryOp()) const
{ return typename ReduxReturnType<BinaryOp>::Type(_expression(), func); } { return typename ReduxReturnType<BinaryOp>::Type(_expression(), internal::member_redux<BinaryOp,Scalar>(func)); }
typedef typename ReturnType<internal::member_minCoeff>::Type MinCoeffReturnType;
typedef typename ReturnType<internal::member_maxCoeff>::Type MaxCoeffReturnType;
typedef typename ReturnType<internal::member_squaredNorm,RealScalar>::Type SquaredNormReturnType;
typedef typename ReturnType<internal::member_norm,RealScalar>::Type NormReturnType;
typedef typename ReturnType<internal::member_blueNorm,RealScalar>::Type BlueNormReturnType;
typedef typename ReturnType<internal::member_stableNorm,RealScalar>::Type StableNormReturnType;
typedef typename ReturnType<internal::member_hypotNorm,RealScalar>::Type HypotNormReturnType;
typedef typename ReturnType<internal::member_sum>::Type SumReturnType;
typedef typename ReturnType<internal::member_mean>::Type MeanReturnType;
typedef typename ReturnType<internal::member_all>::Type AllReturnType;
typedef typename ReturnType<internal::member_any>::Type AnyReturnType;
typedef PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> CountReturnType;
typedef typename ReturnType<internal::member_prod>::Type ProdReturnType;
typedef Reverse<const ExpressionType, Direction> ConstReverseReturnType;
typedef Reverse<ExpressionType, Direction> ReverseReturnType;
template<int p> struct LpNormReturnType {
typedef PartialReduxExpr<ExpressionType, internal::member_lpnorm<p,RealScalar>,Direction> Type;
};
/** \returns a row (or column) vector expression of the smallest coefficient /** \returns a row (or column) vector expression of the smallest coefficient
* of each column (or row) of the referenced expression. * of each column (or row) of the referenced expression.
* *
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* Example: \include PartialRedux_minCoeff.cpp * Example: \include PartialRedux_minCoeff.cpp
* Output: \verbinclude PartialRedux_minCoeff.out * Output: \verbinclude PartialRedux_minCoeff.out
* *
* \sa DenseBase::minCoeff() */ * \sa DenseBase::minCoeff() */
const typename ReturnType<internal::member_minCoeff>::Type minCoeff() const EIGEN_DEVICE_FUNC
{ return _expression(); } const MinCoeffReturnType minCoeff() const
{ return MinCoeffReturnType(_expression()); }
/** \returns a row (or column) vector expression of the largest coefficient /** \returns a row (or column) vector expression of the largest coefficient
* of each column (or row) of the referenced expression. * of each column (or row) of the referenced expression.
* *
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* Example: \include PartialRedux_maxCoeff.cpp * Example: \include PartialRedux_maxCoeff.cpp
* Output: \verbinclude PartialRedux_maxCoeff.out * Output: \verbinclude PartialRedux_maxCoeff.out
* *
* \sa DenseBase::maxCoeff() */ * \sa DenseBase::maxCoeff() */
const typename ReturnType<internal::member_maxCoeff>::Type maxCoeff() const EIGEN_DEVICE_FUNC
{ return _expression(); } const MaxCoeffReturnType maxCoeff() const
{ return MaxCoeffReturnType(_expression()); }
/** \returns a row (or column) vector expression of the squared norm /** \returns a row (or column) vector expression of the squared norm
* of each column (or row) of the referenced expression. * of each column (or row) of the referenced expression.
* This is a vector with real entries, even if the original matrix has complex entries.
* *
* Example: \include PartialRedux_squaredNorm.cpp * Example: \include PartialRedux_squaredNorm.cpp
* Output: \verbinclude PartialRedux_squaredNorm.out * Output: \verbinclude PartialRedux_squaredNorm.out
* *
* \sa DenseBase::squaredNorm() */ * \sa DenseBase::squaredNorm() */
const typename ReturnType<internal::member_squaredNorm,RealScalar>::Type squaredNorm() const EIGEN_DEVICE_FUNC
{ return _expression(); } const SquaredNormReturnType squaredNorm() const
{ return SquaredNormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression.
* This is a vector with real entries, even if the original matrix has complex entries.
*
* Example: \include PartialRedux_norm.cpp
* Output: \verbinclude PartialRedux_norm.out
*
* \sa DenseBase::norm() */
EIGEN_DEVICE_FUNC
const NormReturnType norm() const
{ return NormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the norm /** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression. * of each column (or row) of the referenced expression.
* This is a vector with real entries, even if the original matrix has complex entries.
* *
* Example: \include PartialRedux_norm.cpp * Example: \include PartialRedux_norm.cpp
* Output: \verbinclude PartialRedux_norm.out * Output: \verbinclude PartialRedux_norm.out
* *
* \sa DenseBase::norm() */ * \sa DenseBase::norm() */
const typename ReturnType<internal::member_norm,RealScalar>::Type norm() const template<int p>
{ return _expression(); } EIGEN_DEVICE_FUNC
const typename LpNormReturnType<p>::Type lpNorm() const
{ return typename LpNormReturnType<p>::Type(_expression()); }
/** \returns a row (or column) vector expression of the norm /** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression, using * of each column (or row) of the referenced expression, using
* blue's algorithm. * Blue's algorithm.
* This is a vector with real entries, even if the original matrix has complex entries.
* *
* \sa DenseBase::blueNorm() */ * \sa DenseBase::blueNorm() */
const typename ReturnType<internal::member_blueNorm,RealScalar>::Type blueNorm() const EIGEN_DEVICE_FUNC
{ return _expression(); } const BlueNormReturnType blueNorm() const
{ return BlueNormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the norm /** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression, avoiding * of each column (or row) of the referenced expression, avoiding
* underflow and overflow. * underflow and overflow.
* This is a vector with real entries, even if the original matrix has complex entries.
* *
* \sa DenseBase::stableNorm() */ * \sa DenseBase::stableNorm() */
const typename ReturnType<internal::member_stableNorm,RealScalar>::Type stableNorm() const EIGEN_DEVICE_FUNC
{ return _expression(); } const StableNormReturnType stableNorm() const
{ return StableNormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the norm /** \returns a row (or column) vector expression of the norm
* of each column (or row) of the referenced expression, avoiding * of each column (or row) of the referenced expression, avoiding
* underflow and overflow using a concatenation of hypot() calls. * underflow and overflow using a concatenation of hypot() calls.
* This is a vector with real entries, even if the original matrix has complex entries.
* *
* \sa DenseBase::hypotNorm() */ * \sa DenseBase::hypotNorm() */
const typename ReturnType<internal::member_hypotNorm,RealScalar>::Type hypotNorm() const EIGEN_DEVICE_FUNC
{ return _expression(); } const HypotNormReturnType hypotNorm() const
{ return HypotNormReturnType(_expression()); }
/** \returns a row (or column) vector expression of the sum /** \returns a row (or column) vector expression of the sum
* of each column (or row) of the referenced expression. * of each column (or row) of the referenced expression.
...@@ -354,39 +394,48 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -354,39 +394,48 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_sum.out * Output: \verbinclude PartialRedux_sum.out
* *
* \sa DenseBase::sum() */ * \sa DenseBase::sum() */
const typename ReturnType<internal::member_sum>::Type sum() const EIGEN_DEVICE_FUNC
{ return _expression(); } const SumReturnType sum() const
{ return SumReturnType(_expression()); }
/** \returns a row (or column) vector expression of the mean /** \returns a row (or column) vector expression of the mean
* of each column (or row) of the referenced expression. * of each column (or row) of the referenced expression.
* *
* \sa DenseBase::mean() */ * \sa DenseBase::mean() */
const typename ReturnType<internal::member_mean>::Type mean() const EIGEN_DEVICE_FUNC
{ return _expression(); } const MeanReturnType mean() const
{ return MeanReturnType(_expression()); }
/** \returns a row (or column) vector expression representing /** \returns a row (or column) vector expression representing
* whether \b all coefficients of each respective column (or row) are \c true. * whether \b all coefficients of each respective column (or row) are \c true.
* This expression can be assigned to a vector with entries of type \c bool.
* *
* \sa DenseBase::all() */ * \sa DenseBase::all() */
const typename ReturnType<internal::member_all>::Type all() const EIGEN_DEVICE_FUNC
{ return _expression(); } const AllReturnType all() const
{ return AllReturnType(_expression()); }
/** \returns a row (or column) vector expression representing /** \returns a row (or column) vector expression representing
* whether \b at \b least one coefficient of each respective column (or row) is \c true. * whether \b at \b least one coefficient of each respective column (or row) is \c true.
* This expression can be assigned to a vector with entries of type \c bool.
* *
* \sa DenseBase::any() */ * \sa DenseBase::any() */
const typename ReturnType<internal::member_any>::Type any() const EIGEN_DEVICE_FUNC
{ return _expression(); } const AnyReturnType any() const
{ return AnyReturnType(_expression()); }
/** \returns a row (or column) vector expression representing /** \returns a row (or column) vector expression representing
* the number of \c true coefficients of each respective column (or row). * the number of \c true coefficients of each respective column (or row).
* This expression can be assigned to a vector whose entries have the same type as is used to
* index entries of the original matrix; for dense matrices, this is \c std::ptrdiff_t .
* *
* Example: \include PartialRedux_count.cpp * Example: \include PartialRedux_count.cpp
* Output: \verbinclude PartialRedux_count.out * Output: \verbinclude PartialRedux_count.out
* *
* \sa DenseBase::count() */ * \sa DenseBase::count() */
const PartialReduxExpr<ExpressionType, internal::member_count<Index>, Direction> count() const EIGEN_DEVICE_FUNC
{ return _expression(); } const CountReturnType count() const
{ return CountReturnType(_expression()); }
/** \returns a row (or column) vector expression of the product /** \returns a row (or column) vector expression of the product
* of each column (or row) of the referenced expression. * of each column (or row) of the referenced expression.
...@@ -395,8 +444,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -395,8 +444,9 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude PartialRedux_prod.out * Output: \verbinclude PartialRedux_prod.out
* *
* \sa DenseBase::prod() */ * \sa DenseBase::prod() */
const typename ReturnType<internal::member_prod>::Type prod() const EIGEN_DEVICE_FUNC
{ return _expression(); } const ProdReturnType prod() const
{ return ProdReturnType(_expression()); }
/** \returns a matrix expression /** \returns a matrix expression
...@@ -406,10 +456,20 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -406,10 +456,20 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* Output: \verbinclude Vectorwise_reverse.out * Output: \verbinclude Vectorwise_reverse.out
* *
* \sa DenseBase::reverse() */ * \sa DenseBase::reverse() */
const Reverse<ExpressionType, Direction> reverse() const EIGEN_DEVICE_FUNC
{ return Reverse<ExpressionType, Direction>( _expression() ); } const ConstReverseReturnType reverse() const
{ return ConstReverseReturnType( _expression() ); }
typedef Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1> ReplicateReturnType; /** \returns a writable matrix expression
* where each column (or row) are reversed.
*
* \sa reverse() const */
EIGEN_DEVICE_FUNC
ReverseReturnType reverse()
{ return ReverseReturnType( _expression() ); }
typedef Replicate<ExpressionType,(isVertical?Dynamic:1),(isHorizontal?Dynamic:1)> ReplicateReturnType;
EIGEN_DEVICE_FUNC
const ReplicateReturnType replicate(Index factor) const; const ReplicateReturnType replicate(Index factor) const;
/** /**
...@@ -421,17 +481,20 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -421,17 +481,20 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
* \sa VectorwiseOp::replicate(Index), DenseBase::replicate(), class Replicate * \sa VectorwiseOp::replicate(Index), DenseBase::replicate(), class Replicate
*/ */
// NOTE implemented here because of sunstudio's compilation errors // NOTE implemented here because of sunstudio's compilation errors
template<int Factor> const Replicate<ExpressionType,(IsVertical?Factor:1),(IsHorizontal?Factor:1)> // isVertical*Factor+isHorizontal instead of (isVertical?Factor:1) to handle CUDA bug with ternary operator
template<int Factor> const Replicate<ExpressionType,isVertical*Factor+isHorizontal,isHorizontal*Factor+isVertical>
EIGEN_DEVICE_FUNC
replicate(Index factor = Factor) const replicate(Index factor = Factor) const
{ {
return Replicate<ExpressionType,Direction==Vertical?Factor:1,Direction==Horizontal?Factor:1> return Replicate<ExpressionType,(isVertical?Factor:1),(isHorizontal?Factor:1)>
(_expression(),Direction==Vertical?factor:1,Direction==Horizontal?factor:1); (_expression(),isVertical?factor:1,isHorizontal?factor:1);
} }
/////////// Artithmetic operators /////////// /////////// Artithmetic operators ///////////
/** Copies the vector \a other to each subvector of \c *this */ /** Copies the vector \a other to each subvector of \c *this */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator=(const DenseBase<OtherDerived>& other) ExpressionType& operator=(const DenseBase<OtherDerived>& other)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
...@@ -442,6 +505,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -442,6 +505,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Adds the vector \a other to each subvector of \c *this */ /** Adds the vector \a other to each subvector of \c *this */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator+=(const DenseBase<OtherDerived>& other) ExpressionType& operator+=(const DenseBase<OtherDerived>& other)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
...@@ -451,6 +515,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -451,6 +515,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Substracts the vector \a other to each subvector of \c *this */ /** Substracts the vector \a other to each subvector of \c *this */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator-=(const DenseBase<OtherDerived>& other) ExpressionType& operator-=(const DenseBase<OtherDerived>& other)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
...@@ -460,6 +525,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -460,6 +525,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Multiples each subvector of \c *this by the vector \a other */ /** Multiples each subvector of \c *this by the vector \a other */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator*=(const DenseBase<OtherDerived>& other) ExpressionType& operator*=(const DenseBase<OtherDerived>& other)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
...@@ -471,6 +537,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -471,6 +537,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Divides each subvector of \c *this by the vector \a other */ /** Divides each subvector of \c *this by the vector \a other */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
ExpressionType& operator/=(const DenseBase<OtherDerived>& other) ExpressionType& operator/=(const DenseBase<OtherDerived>& other)
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
...@@ -481,8 +548,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -481,8 +548,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
} }
/** Returns the expression of the sum of the vector \a other to each subvector of \c *this */ /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
template<typename OtherDerived> EIGEN_STRONG_INLINE template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_sum_op<Scalar>, CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,
const ExpressionTypeNestedCleaned, const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type> const typename ExtendedType<OtherDerived>::Type>
operator+(const DenseBase<OtherDerived>& other) const operator+(const DenseBase<OtherDerived>& other) const
...@@ -494,7 +561,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -494,7 +561,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression of the difference between each subvector of \c *this and the vector \a other */ /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
template<typename OtherDerived> template<typename OtherDerived>
CwiseBinaryOp<internal::scalar_difference_op<Scalar>, EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,
const ExpressionTypeNestedCleaned, const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type> const typename ExtendedType<OtherDerived>::Type>
operator-(const DenseBase<OtherDerived>& other) const operator-(const DenseBase<OtherDerived>& other) const
...@@ -506,10 +574,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -506,10 +574,11 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression where each subvector is the product of the vector \a other /** Returns the expression where each subvector is the product of the vector \a other
* by the corresponding subvector of \c *this */ * by the corresponding subvector of \c *this */
template<typename OtherDerived> EIGEN_STRONG_INLINE template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_product_op<Scalar>, CwiseBinaryOp<internal::scalar_product_op<Scalar>,
const ExpressionTypeNestedCleaned, const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type> const typename ExtendedType<OtherDerived>::Type>
EIGEN_DEVICE_FUNC
operator*(const DenseBase<OtherDerived>& other) const operator*(const DenseBase<OtherDerived>& other) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
...@@ -521,6 +590,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -521,6 +590,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
/** Returns the expression where each subvector is the quotient of the corresponding /** Returns the expression where each subvector is the quotient of the corresponding
* subvector of \c *this by the vector \a other */ * subvector of \c *this by the vector \a other */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
const ExpressionTypeNestedCleaned, const ExpressionTypeNestedCleaned,
const typename ExtendedType<OtherDerived>::Type> const typename ExtendedType<OtherDerived>::Type>
...@@ -531,32 +601,36 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -531,32 +601,36 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived) EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
return m_matrix / extendedTo(other.derived()); return m_matrix / extendedTo(other.derived());
} }
/** \returns an expression where each column of row of the referenced matrix are normalized. /** \returns an expression where each column (or row) of the referenced matrix are normalized.
* The referenced matrix is \b not modified. * The referenced matrix is \b not modified.
* \sa MatrixBase::normalized(), normalize() * \sa MatrixBase::normalized(), normalize()
*/ */
EIGEN_DEVICE_FUNC
CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
const ExpressionTypeNestedCleaned, const ExpressionTypeNestedCleaned,
const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type> const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); } normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
/** Normalize in-place each row or columns of the referenced matrix. /** Normalize in-place each row or columns of the referenced matrix.
* \sa MatrixBase::normalize(), normalized() * \sa MatrixBase::normalize(), normalized()
*/ */
void normalize() { EIGEN_DEVICE_FUNC void normalize() {
m_matrix = this->normalized(); m_matrix = this->normalized();
} }
EIGEN_DEVICE_FUNC inline void reverseInPlace();
/////////// Geometry module /////////// /////////// Geometry module ///////////
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
Homogeneous<ExpressionType,Direction> homogeneous() const; EIGEN_DEVICE_FUNC
#endif HomogeneousReturnType homogeneous() const;
typedef typename ExpressionType::PlainObject CrossReturnType; typedef typename ExpressionType::PlainObject CrossReturnType;
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC
const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const; const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
enum { enum {
...@@ -581,25 +655,15 @@ template<typename ExpressionType, int Direction> class VectorwiseOp ...@@ -581,25 +655,15 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
Direction==Horizontal ? HNormalized_SizeMinusOne : 1> > Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
HNormalizedReturnType; HNormalizedReturnType;
EIGEN_DEVICE_FUNC
const HNormalizedReturnType hnormalized() const; const HNormalizedReturnType hnormalized() const;
protected: protected:
ExpressionTypeNested m_matrix; ExpressionTypeNested m_matrix;
}; };
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations //const colwise moved to DenseBase.h due to CUDA compiler bug
*
* Example: \include MatrixBase_colwise.cpp
* Output: \verbinclude MatrixBase_colwise.out
*
* \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstColwiseReturnType
DenseBase<Derived>::colwise() const
{
return derived();
}
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
* *
...@@ -609,22 +673,11 @@ template<typename Derived> ...@@ -609,22 +673,11 @@ template<typename Derived>
inline typename DenseBase<Derived>::ColwiseReturnType inline typename DenseBase<Derived>::ColwiseReturnType
DenseBase<Derived>::colwise() DenseBase<Derived>::colwise()
{ {
return derived(); return ColwiseReturnType(derived());
} }
/** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations //const rowwise moved to DenseBase.h due to CUDA compiler bug
*
* Example: \include MatrixBase_rowwise.cpp
* Output: \verbinclude MatrixBase_rowwise.out
*
* \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
*/
template<typename Derived>
inline const typename DenseBase<Derived>::ConstRowwiseReturnType
DenseBase<Derived>::rowwise() const
{
return derived();
}
/** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations /** \returns a writable VectorwiseOp wrapper of *this providing additional partial reduction operations
* *
...@@ -634,7 +687,7 @@ template<typename Derived> ...@@ -634,7 +687,7 @@ template<typename Derived>
inline typename DenseBase<Derived>::RowwiseReturnType inline typename DenseBase<Derived>::RowwiseReturnType
DenseBase<Derived>::rowwise() DenseBase<Derived>::rowwise()
{ {
return derived(); return RowwiseReturnType(derived());
} }
} // end namespace Eigen } // end namespace Eigen
......
...@@ -22,6 +22,7 @@ struct visitor_impl ...@@ -22,6 +22,7 @@ struct visitor_impl
row = (UnrollCount-1) % Derived::RowsAtCompileTime row = (UnrollCount-1) % Derived::RowsAtCompileTime
}; };
EIGEN_DEVICE_FUNC
static inline void run(const Derived &mat, Visitor& visitor) static inline void run(const Derived &mat, Visitor& visitor)
{ {
visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor); visitor_impl<Visitor, Derived, UnrollCount-1>::run(mat, visitor);
...@@ -32,6 +33,7 @@ struct visitor_impl ...@@ -32,6 +33,7 @@ struct visitor_impl
template<typename Visitor, typename Derived> template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, 1> struct visitor_impl<Visitor, Derived, 1>
{ {
EIGEN_DEVICE_FUNC
static inline void run(const Derived &mat, Visitor& visitor) static inline void run(const Derived &mat, Visitor& visitor)
{ {
return visitor.init(mat.coeff(0, 0), 0, 0); return visitor.init(mat.coeff(0, 0), 0, 0);
...@@ -41,7 +43,7 @@ struct visitor_impl<Visitor, Derived, 1> ...@@ -41,7 +43,7 @@ struct visitor_impl<Visitor, Derived, 1>
template<typename Visitor, typename Derived> template<typename Visitor, typename Derived>
struct visitor_impl<Visitor, Derived, Dynamic> struct visitor_impl<Visitor, Derived, Dynamic>
{ {
typedef typename Derived::Index Index; EIGEN_DEVICE_FUNC
static inline void run(const Derived& mat, Visitor& visitor) static inline void run(const Derived& mat, Visitor& visitor)
{ {
visitor.init(mat.coeff(0,0), 0, 0); visitor.init(mat.coeff(0,0), 0, 0);
...@@ -53,6 +55,33 @@ struct visitor_impl<Visitor, Derived, Dynamic> ...@@ -53,6 +55,33 @@ struct visitor_impl<Visitor, Derived, Dynamic>
} }
}; };
// evaluator adaptor
template<typename XprType>
class visitor_evaluator
{
public:
EIGEN_DEVICE_FUNC
explicit visitor_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {}
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
enum {
RowsAtCompileTime = XprType::RowsAtCompileTime,
CoeffReadCost = internal::evaluator<XprType>::CoeffReadCost
};
EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
{ return m_evaluator.coeff(row, col); }
protected:
internal::evaluator<XprType> m_evaluator;
const XprType &m_xpr;
};
} // end namespace internal } // end namespace internal
/** Applies the visitor \a visitor to the whole coefficients of the matrix or vector. /** Applies the visitor \a visitor to the whole coefficients of the matrix or vector.
...@@ -74,19 +103,17 @@ struct visitor_impl<Visitor, Derived, Dynamic> ...@@ -74,19 +103,17 @@ struct visitor_impl<Visitor, Derived, Dynamic>
*/ */
template<typename Derived> template<typename Derived>
template<typename Visitor> template<typename Visitor>
EIGEN_DEVICE_FUNC
void DenseBase<Derived>::visit(Visitor& visitor) const void DenseBase<Derived>::visit(Visitor& visitor) const
{ {
typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested; typedef typename internal::visitor_evaluator<Derived> ThisEvaluator;
typename Derived::Nested thisNested(derived()); ThisEvaluator thisEval(derived());
enum { unroll = SizeAtCompileTime != Dynamic enum {
&& CoeffReadCost != Dynamic unroll = SizeAtCompileTime != Dynamic
&& (SizeAtCompileTime == 1 || internal::functor_traits<Visitor>::Cost != Dynamic) && SizeAtCompileTime * ThisEvaluator::CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost <= EIGEN_UNROLLING_LIMIT
&& SizeAtCompileTime * CoeffReadCost + (SizeAtCompileTime-1) * internal::functor_traits<Visitor>::Cost };
<= EIGEN_UNROLLING_LIMIT }; return internal::visitor_impl<Visitor, ThisEvaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(thisEval, visitor);
return internal::visitor_impl<Visitor, ThisNested,
unroll ? int(SizeAtCompileTime) : Dynamic
>::run(thisNested, visitor);
} }
namespace internal { namespace internal {
...@@ -97,10 +124,10 @@ namespace internal { ...@@ -97,10 +124,10 @@ namespace internal {
template <typename Derived> template <typename Derived>
struct coeff_visitor struct coeff_visitor
{ {
typedef typename Derived::Index Index;
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
Index row, col; Index row, col;
Scalar res; Scalar res;
EIGEN_DEVICE_FUNC
inline void init(const Scalar& value, Index i, Index j) inline void init(const Scalar& value, Index i, Index j)
{ {
res = value; res = value;
...@@ -117,8 +144,8 @@ struct coeff_visitor ...@@ -117,8 +144,8 @@ struct coeff_visitor
template <typename Derived> template <typename Derived>
struct min_coeff_visitor : coeff_visitor<Derived> struct min_coeff_visitor : coeff_visitor<Derived>
{ {
typedef typename Derived::Index Index;
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
EIGEN_DEVICE_FUNC
void operator() (const Scalar& value, Index i, Index j) void operator() (const Scalar& value, Index i, Index j)
{ {
if(value < this->res) if(value < this->res)
...@@ -145,8 +172,8 @@ struct functor_traits<min_coeff_visitor<Scalar> > { ...@@ -145,8 +172,8 @@ struct functor_traits<min_coeff_visitor<Scalar> > {
template <typename Derived> template <typename Derived>
struct max_coeff_visitor : coeff_visitor<Derived> struct max_coeff_visitor : coeff_visitor<Derived>
{ {
typedef typename Derived::Index Index; typedef typename Derived::Scalar Scalar;
typedef typename Derived::Scalar Scalar; EIGEN_DEVICE_FUNC
void operator() (const Scalar& value, Index i, Index j) void operator() (const Scalar& value, Index i, Index j)
{ {
if(value > this->res) if(value > this->res)
...@@ -167,13 +194,15 @@ struct functor_traits<max_coeff_visitor<Scalar> > { ...@@ -167,13 +194,15 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
} // end namespace internal } // end namespace internal
/** \returns the minimum of all coefficients of *this and puts in *row and *col its location. /** \fn DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
* \returns the minimum of all coefficients of *this and puts in *row and *col its location.
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff() * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>
EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
{ {
...@@ -187,27 +216,30 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const ...@@ -187,27 +216,30 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
/** \returns the minimum of all coefficients of *this and puts in *index its location. /** \returns the minimum of all coefficients of *this and puts in *index its location.
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff() * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>
EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar typename internal::traits<Derived>::Scalar
DenseBase<Derived>::minCoeff(IndexType* index) const DenseBase<Derived>::minCoeff(IndexType* index) const
{ {
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
internal::min_coeff_visitor<Derived> minVisitor; internal::min_coeff_visitor<Derived> minVisitor;
this->visit(minVisitor); this->visit(minVisitor);
*index = (RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row; *index = IndexType((RowsAtCompileTime==1) ? minVisitor.col : minVisitor.row);
return minVisitor.res; return minVisitor.res;
} }
/** \returns the maximum of all coefficients of *this and puts in *row and *col its location. /** \fn DenseBase<Derived>::maxCoeff(IndexType* rowId, IndexType* colId) const
* \returns the maximum of all coefficients of *this and puts in *row and *col its location.
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff() * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>
EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
{ {
...@@ -225,6 +257,7 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const ...@@ -225,6 +257,7 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>
EIGEN_DEVICE_FUNC
typename internal::traits<Derived>::Scalar typename internal::traits<Derived>::Scalar
DenseBase<Derived>::maxCoeff(IndexType* index) const DenseBase<Derived>::maxCoeff(IndexType* index) const
{ {
......
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_COMPLEX_AVX_H
#define EIGEN_COMPLEX_AVX_H
namespace Eigen {
namespace internal {
//---------- float ----------
struct Packet4cf
{
EIGEN_STRONG_INLINE Packet4cf() {}
EIGEN_STRONG_INLINE explicit Packet4cf(const __m256& a) : v(a) {}
__m256 v;
};
template<> struct packet_traits<std::complex<float> > : default_packet_traits
{
typedef Packet4cf type;
typedef Packet2cf half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size = 4,
HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct unpacket_traits<Packet4cf> { typedef std::complex<float> type; enum {size=4, alignment=Aligned32}; typedef Packet2cf half; };
template<> EIGEN_STRONG_INLINE Packet4cf padd<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_add_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf psub<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_sub_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pnegate(const Packet4cf& a)
{
return Packet4cf(pnegate(a.v));
}
template<> EIGEN_STRONG_INLINE Packet4cf pconj(const Packet4cf& a)
{
const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
return Packet4cf(_mm256_xor_ps(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet4cf pmul<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
{
__m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(a.v), b.v);
__m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(a.v), _mm256_permute_ps(b.v, _MM_SHUFFLE(2,3,0,1)));
__m256 result = _mm256_addsub_ps(tmp1, tmp2);
return Packet4cf(result);
}
template<> EIGEN_STRONG_INLINE Packet4cf pand <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_and_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf por <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_or_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pxor <Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_xor_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pandnot<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { return Packet4cf(_mm256_andnot_ps(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet4cf pload <Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet4cf(pload<Packet8f>(&numext::real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet4cf ploadu<Packet4cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet4cf(ploadu<Packet8f>(&numext::real_ref(*from))); }
template<> EIGEN_STRONG_INLINE Packet4cf pset1<Packet4cf>(const std::complex<float>& from)
{
return Packet4cf(_mm256_castpd_ps(_mm256_broadcast_sd((const double*)(const void*)&from)));
}
template<> EIGEN_STRONG_INLINE Packet4cf ploaddup<Packet4cf>(const std::complex<float>* from)
{
// FIXME The following might be optimized using _mm256_movedup_pd
Packet2cf a = ploaddup<Packet2cf>(from);
Packet2cf b = ploaddup<Packet2cf>(from+1);
return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(a.v), b.v, 1));
}
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float>* to, const Packet4cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), from.v); }
template<> EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(const std::complex<float>* from, Index stride)
{
return Packet4cf(_mm256_set_ps(std::imag(from[3*stride]), std::real(from[3*stride]),
std::imag(from[2*stride]), std::real(from[2*stride]),
std::imag(from[1*stride]), std::real(from[1*stride]),
std::imag(from[0*stride]), std::real(from[0*stride])));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index stride)
{
__m128 low = _mm256_extractf128_ps(from.v, 0);
to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
_mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
__m128 high = _mm256_extractf128_ps(from.v, 1);
to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
_mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
}
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet4cf>(const Packet4cf& a)
{
return pfirst(Packet2cf(_mm256_castps256_ps128(a.v)));
}
template<> EIGEN_STRONG_INLINE Packet4cf preverse(const Packet4cf& a) {
__m128 low = _mm256_extractf128_ps(a.v, 0);
__m128 high = _mm256_extractf128_ps(a.v, 1);
__m128d lowd = _mm_castps_pd(low);
__m128d highd = _mm_castps_pd(high);
low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
__m256 result = _mm256_setzero_ps();
result = _mm256_insertf128_ps(result, low, 1);
result = _mm256_insertf128_ps(result, high, 0);
return Packet4cf(result);
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet4cf>(const Packet4cf& a)
{
return predux(padd(Packet2cf(_mm256_extractf128_ps(a.v,0)),
Packet2cf(_mm256_extractf128_ps(a.v,1))));
}
template<> EIGEN_STRONG_INLINE Packet4cf preduxp<Packet4cf>(const Packet4cf* vecs)
{
Packet8f t0 = _mm256_shuffle_ps(vecs[0].v, vecs[0].v, _MM_SHUFFLE(3, 1, 2 ,0));
Packet8f t1 = _mm256_shuffle_ps(vecs[1].v, vecs[1].v, _MM_SHUFFLE(3, 1, 2 ,0));
t0 = _mm256_hadd_ps(t0,t1);
Packet8f t2 = _mm256_shuffle_ps(vecs[2].v, vecs[2].v, _MM_SHUFFLE(3, 1, 2 ,0));
Packet8f t3 = _mm256_shuffle_ps(vecs[3].v, vecs[3].v, _MM_SHUFFLE(3, 1, 2 ,0));
t2 = _mm256_hadd_ps(t2,t3);
t1 = _mm256_permute2f128_ps(t0,t2, 0 + (2<<4));
t3 = _mm256_permute2f128_ps(t0,t2, 1 + (3<<4));
return Packet4cf(_mm256_add_ps(t1,t3));
}
template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet4cf>(const Packet4cf& a)
{
return predux_mul(pmul(Packet2cf(_mm256_extractf128_ps(a.v, 0)),
Packet2cf(_mm256_extractf128_ps(a.v, 1))));
}
template<int Offset>
struct palign_impl<Offset,Packet4cf>
{
static EIGEN_STRONG_INLINE void run(Packet4cf& first, const Packet4cf& second)
{
if (Offset==0) return;
palign_impl<Offset*2,Packet8f>::run(first.v, second.v);
}
};
template<> struct conj_helper<Packet4cf, Packet4cf, false,true>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet4cf, Packet4cf, true,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& a, const Packet4cf& b) const
{
return pconj(internal::pmul(a, b));
}
};
template<> struct conj_helper<Packet8f, Packet4cf, false,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
{ return Packet4cf(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet4cf, Packet8f, false,false>
{
EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
{ return Packet4cf(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
{
Packet4cf num = pmul(a, pconj(b));
__m256 tmp = _mm256_mul_ps(b.v, b.v);
__m256 tmp2 = _mm256_shuffle_ps(tmp,tmp,0xB1);
__m256 denom = _mm256_add_ps(tmp, tmp2);
return Packet4cf(_mm256_div_ps(num.v, denom));
}
template<> EIGEN_STRONG_INLINE Packet4cf pcplxflip<Packet4cf>(const Packet4cf& x)
{
return Packet4cf(_mm256_shuffle_ps(x.v, x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
}
//---------- double ----------
struct Packet2cd
{
EIGEN_STRONG_INLINE Packet2cd() {}
EIGEN_STRONG_INLINE explicit Packet2cd(const __m256d& a) : v(a) {}
__m256d v;
};
template<> struct packet_traits<std::complex<double> > : default_packet_traits
{
typedef Packet2cd type;
typedef Packet1cd half;
enum {
Vectorizable = 1,
AlignedOnScalar = 0,
size = 2,
HasHalfPacket = 1,
HasAdd = 1,
HasSub = 1,
HasMul = 1,
HasDiv = 1,
HasNegate = 1,
HasAbs = 0,
HasAbs2 = 0,
HasMin = 0,
HasMax = 0,
HasSetLinear = 0
};
};
template<> struct unpacket_traits<Packet2cd> { typedef std::complex<double> type; enum {size=2, alignment=Aligned32}; typedef Packet1cd half; };
template<> EIGEN_STRONG_INLINE Packet2cd padd<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_add_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd psub<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_sub_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pnegate(const Packet2cd& a) { return Packet2cd(pnegate(a.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pconj(const Packet2cd& a)
{
const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
return Packet2cd(_mm256_xor_pd(a.v,mask));
}
template<> EIGEN_STRONG_INLINE Packet2cd pmul<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
{
__m256d tmp1 = _mm256_shuffle_pd(a.v,a.v,0x0);
__m256d even = _mm256_mul_pd(tmp1, b.v);
__m256d tmp2 = _mm256_shuffle_pd(a.v,a.v,0xF);
__m256d tmp3 = _mm256_shuffle_pd(b.v,b.v,0x5);
__m256d odd = _mm256_mul_pd(tmp2, tmp3);
return Packet2cd(_mm256_addsub_pd(even, odd));
}
template<> EIGEN_STRONG_INLINE Packet2cd pand <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_and_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd por <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_or_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pxor <Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_xor_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pandnot<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { return Packet2cd(_mm256_andnot_pd(a.v,b.v)); }
template<> EIGEN_STRONG_INLINE Packet2cd pload <Packet2cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet2cd(pload<Packet4d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cd ploadu<Packet2cd>(const std::complex<double>* from)
{ EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cd(ploadu<Packet4d>((const double*)from)); }
template<> EIGEN_STRONG_INLINE Packet2cd pset1<Packet2cd>(const std::complex<double>& from)
{
// in case casting to a __m128d* is really not safe, then we can still fallback to this version: (much slower though)
// return Packet2cd(_mm256_loadu2_m128d((const double*)&from,(const double*)&from));
return Packet2cd(_mm256_broadcast_pd((const __m128d*)(const void*)&from));
}
template<> EIGEN_STRONG_INLINE Packet2cd ploaddup<Packet2cd>(const std::complex<double>* from) { return pset1<Packet2cd>(*from); }
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet2cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
template<> EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(const std::complex<double>* from, Index stride)
{
return Packet2cd(_mm256_set_pd(std::imag(from[1*stride]), std::real(from[1*stride]),
std::imag(from[0*stride]), std::real(from[0*stride])));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from, Index stride)
{
__m128d low = _mm256_extractf128_pd(from.v, 0);
to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
__m128d high = _mm256_extractf128_pd(from.v, 1);
to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
}
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet2cd>(const Packet2cd& a)
{
__m128d low = _mm256_extractf128_pd(a.v, 0);
EIGEN_ALIGN16 double res[2];
_mm_store_pd(res, low);
return std::complex<double>(res[0],res[1]);
}
template<> EIGEN_STRONG_INLINE Packet2cd preverse(const Packet2cd& a) {
__m256d result = _mm256_permute2f128_pd(a.v, a.v, 1);
return Packet2cd(result);
}
template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet2cd>(const Packet2cd& a)
{
return predux(padd(Packet1cd(_mm256_extractf128_pd(a.v,0)),
Packet1cd(_mm256_extractf128_pd(a.v,1))));
}
template<> EIGEN_STRONG_INLINE Packet2cd preduxp<Packet2cd>(const Packet2cd* vecs)
{
Packet4d t0 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 0 + (2<<4));
Packet4d t1 = _mm256_permute2f128_pd(vecs[0].v,vecs[1].v, 1 + (3<<4));
return Packet2cd(_mm256_add_pd(t0,t1));
}
template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet2cd>(const Packet2cd& a)
{
return predux(pmul(Packet1cd(_mm256_extractf128_pd(a.v,0)),
Packet1cd(_mm256_extractf128_pd(a.v,1))));
}
template<int Offset>
struct palign_impl<Offset,Packet2cd>
{
static EIGEN_STRONG_INLINE void run(Packet2cd& first, const Packet2cd& second)
{
if (Offset==0) return;
palign_impl<Offset*2,Packet4d>::run(first.v, second.v);
}
};
template<> struct conj_helper<Packet2cd, Packet2cd, false,true>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
{
return internal::pmul(a, pconj(b));
}
};
template<> struct conj_helper<Packet2cd, Packet2cd, true,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
{
return internal::pmul(pconj(a), b);
}
};
template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(pmul(x,y),c); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& a, const Packet2cd& b) const
{
return pconj(internal::pmul(a, b));
}
};
template<> struct conj_helper<Packet4d, Packet2cd, false,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
{ return Packet2cd(Eigen::internal::pmul(x, y.v)); }
};
template<> struct conj_helper<Packet2cd, Packet4d, false,false>
{
EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
{ return Packet2cd(Eigen::internal::pmul(x.v, y)); }
};
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
{
Packet2cd num = pmul(a, pconj(b));
__m256d tmp = _mm256_mul_pd(b.v, b.v);
__m256d denom = _mm256_hadd_pd(tmp, tmp);
return Packet2cd(_mm256_div_pd(num.v, denom));
}
template<> EIGEN_STRONG_INLINE Packet2cd pcplxflip<Packet2cd>(const Packet2cd& x)
{
return Packet2cd(_mm256_shuffle_pd(x.v, x.v, 0x5));
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4cf,4>& kernel) {
__m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
__m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
__m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
__m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
__m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
__m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
__m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
__m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet2cd,2>& kernel) {
__m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
kernel.packet[0].v = tmp;
}
template<> EIGEN_STRONG_INLINE Packet4cf pinsertfirst(const Packet4cf& a, std::complex<float> b)
{
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,1|2));
}
template<> EIGEN_STRONG_INLINE Packet2cd pinsertfirst(const Packet2cd& a, std::complex<double> b)
{
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,1|2));
}
template<> EIGEN_STRONG_INLINE Packet4cf pinsertlast(const Packet4cf& a, std::complex<float> b)
{
return Packet4cf(_mm256_blend_ps(a.v,pset1<Packet4cf>(b).v,(1<<7)|(1<<6)));
}
template<> EIGEN_STRONG_INLINE Packet2cd pinsertlast(const Packet2cd& a, std::complex<double> b)
{
return Packet2cd(_mm256_blend_pd(a.v,pset1<Packet2cd>(b).v,(1<<3)|(1<<2)));
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_COMPLEX_AVX_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_MATH_FUNCTIONS_AVX_H
#define EIGEN_MATH_FUNCTIONS_AVX_H
/* The sin, cos, exp, and log functions of this file are loosely derived from
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
*/
namespace Eigen {
namespace internal {
inline Packet8i pshiftleft(Packet8i v, int n)
{
#ifdef EIGEN_VECTORIZE_AVX2
return _mm256_slli_epi32(v, n);
#else
__m128i lo = _mm_slli_epi32(_mm256_extractf128_si256(v, 0), n);
__m128i hi = _mm_slli_epi32(_mm256_extractf128_si256(v, 1), n);
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
#endif
}
inline Packet8f pshiftright(Packet8f v, int n)
{
#ifdef EIGEN_VECTORIZE_AVX2
return _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(v), n));
#else
__m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 0), n);
__m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(v), 1), n);
return _mm256_cvtepi32_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1));
#endif
}
// Sine function
// Computes sin(x) by wrapping x to the interval [-Pi/4,3*Pi/4] and
// evaluating interpolants in [-Pi/4,Pi/4] or [Pi/4,3*Pi/4]. The interpolants
// are (anti-)symmetric and thus have only odd/even coefficients
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
psin<Packet8f>(const Packet8f& _x) {
Packet8f x = _x;
// Some useful values.
_EIGEN_DECLARE_CONST_Packet8i(one, 1);
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
_EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
_EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
_EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
_EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f);
_EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f);
_EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f);
_EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f);
// Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period.
Packet8f z = pmul(x, p8f_one_over_pi);
Packet8f shift = _mm256_floor_ps(padd(z, p8f_one_over_four));
x = pmadd(shift, p8f_neg_pi_first, x);
x = pmadd(shift, p8f_neg_pi_second, x);
x = pmadd(shift, p8f_neg_pi_third, x);
z = pmul(x, p8f_four_over_pi);
// Make a mask for the entries that need flipping, i.e. wherever the shift
// is odd.
Packet8i shift_ints = _mm256_cvtps_epi32(shift);
Packet8i shift_isodd = _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
Packet8i sign_flip_mask = pshiftleft(shift_isodd, 31);
// Create a mask for which interpolant to use, i.e. if z > 1, then the mask
// is set to ones for that entry.
Packet8f ival_mask = _mm256_cmp_ps(z, p8f_one, _CMP_GT_OQ);
// Evaluate the polynomial for the interval [1,3] in z.
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f);
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f);
_EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f);
Packet8f z_minus_two = psub(z, p8f_two);
Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
right = pmadd(right, z_minus_two2, p8f_coeff_right_2);
right = pmadd(right, z_minus_two2, p8f_coeff_right_0);
// Evaluate the polynomial for the interval [-1,1] in z.
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f);
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f);
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f);
_EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f);
Packet8f z2 = pmul(z, z);
Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
left = pmadd(left, z2, p8f_coeff_left_3);
left = pmadd(left, z2, p8f_coeff_left_1);
left = pmul(left, z);
// Assemble the results, i.e. select the left and right polynomials.
left = _mm256_andnot_ps(ival_mask, left);
right = _mm256_and_ps(ival_mask, right);
Packet8f res = _mm256_or_ps(left, right);
// Flip the sign on the odd intervals and return the result.
res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
return res;
}
// Natural logarithm
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
// be easily approximated by a polynomial centered on m=1 for stability.
// TODO(gonnet): Further reduce the interval allowing for lower-degree
// polynomial interpolants -> ... -> profit!
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
plog<Packet8f>(const Packet8f& _x) {
Packet8f x = _x;
_EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
_EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet8f(126f, 126.0f);
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inv_mant_mask, ~0x7f800000);
// The smallest non denormalized float number.
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(min_norm_pos, 0x00800000);
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(minus_inf, 0xff800000);
// Polynomial coefficients.
_EIGEN_DECLARE_CONST_Packet8f(cephes_SQRTHF, 0.707106781186547524f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p0, 7.0376836292E-2f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p1, -1.1514610310E-1f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p2, 1.1676998740E-1f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p3, -1.2420140846E-1f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p4, +1.4249322787E-1f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p5, -1.6668057665E-1f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p6, +2.0000714765E-1f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p7, -2.4999993993E-1f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_p8, +3.3333331174E-1f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_q1, -2.12194440e-4f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_log_q2, 0.693359375f);
Packet8f invalid_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_NGE_UQ); // not greater equal is true if x is NaN
Packet8f iszero_mask = _mm256_cmp_ps(x, _mm256_setzero_ps(), _CMP_EQ_OQ);
// Truncate input values to the minimum positive normal.
x = pmax(x, p8f_min_norm_pos);
Packet8f emm0 = pshiftright(x,23);
Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
// Set the exponents to -1, i.e. x are in the range [0.5,1).
x = _mm256_and_ps(x, p8f_inv_mant_mask);
x = _mm256_or_ps(x, p8f_half);
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
// and shift by -1. The values are then centered around 0, which improves
// the stability of the polynomial evaluation.
// if( x < SQRTHF ) {
// e -= 1;
// x = x + x - 1.0;
// } else { x = x - 1.0; }
Packet8f mask = _mm256_cmp_ps(x, p8f_cephes_SQRTHF, _CMP_LT_OQ);
Packet8f tmp = _mm256_and_ps(x, mask);
x = psub(x, p8f_1);
e = psub(e, _mm256_and_ps(p8f_1, mask));
x = padd(x, tmp);
Packet8f x2 = pmul(x, x);
Packet8f x3 = pmul(x2, x);
// Evaluate the polynomial approximant of degree 8 in three parts, probably
// to improve instruction-level parallelism.
Packet8f y, y1, y2;
y = pmadd(p8f_cephes_log_p0, x, p8f_cephes_log_p1);
y1 = pmadd(p8f_cephes_log_p3, x, p8f_cephes_log_p4);
y2 = pmadd(p8f_cephes_log_p6, x, p8f_cephes_log_p7);
y = pmadd(y, x, p8f_cephes_log_p2);
y1 = pmadd(y1, x, p8f_cephes_log_p5);
y2 = pmadd(y2, x, p8f_cephes_log_p8);
y = pmadd(y, x3, y1);
y = pmadd(y, x3, y2);
y = pmul(y, x3);
// Add the logarithm of the exponent back to the result of the interpolation.
y1 = pmul(e, p8f_cephes_log_q1);
tmp = pmul(x2, p8f_half);
y = padd(y, y1);
x = psub(x, tmp);
y2 = pmul(e, p8f_cephes_log_q2);
x = padd(x, y);
x = padd(x, y2);
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
return _mm256_or_ps(
_mm256_andnot_ps(iszero_mask, _mm256_or_ps(x, invalid_mask)),
_mm256_and_ps(iszero_mask, p8f_minus_inf));
}
// Exponential function. Works by writing "x = m*log(2) + r" where
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
pexp<Packet8f>(const Packet8f& _x) {
_EIGEN_DECLARE_CONST_Packet8f(1, 1.0f);
_EIGEN_DECLARE_CONST_Packet8f(half, 0.5f);
_EIGEN_DECLARE_CONST_Packet8f(127, 127.0f);
_EIGEN_DECLARE_CONST_Packet8f(exp_hi, 88.3762626647950f);
_EIGEN_DECLARE_CONST_Packet8f(exp_lo, -88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_LOG2EF, 1.44269504088896341f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p0, 1.9875691500E-4f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p1, 1.3981999507E-3f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p2, 8.3334519073E-3f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p3, 4.1665795894E-2f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p4, 1.6666665459E-1f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_p5, 5.0000001201E-1f);
// Clamp x.
Packet8f x = pmax(pmin(_x, p8f_exp_hi), p8f_exp_lo);
// Express exp(x) as exp(m*ln(2) + r), start by extracting
// m = floor(x/ln(2) + 0.5).
Packet8f m = _mm256_floor_ps(pmadd(x, p8f_cephes_LOG2EF, p8f_half));
// Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is
// subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating
// truncation errors. Note that we don't use the "pmadd" function here to
// ensure that a precision-preserving FMA instruction is used.
#ifdef EIGEN_VECTORIZE_FMA
_EIGEN_DECLARE_CONST_Packet8f(nln2, -0.6931471805599453f);
Packet8f r = _mm256_fmadd_ps(m, p8f_nln2, x);
#else
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C1, 0.693359375f);
_EIGEN_DECLARE_CONST_Packet8f(cephes_exp_C2, -2.12194440e-4f);
Packet8f r = psub(x, pmul(m, p8f_cephes_exp_C1));
r = psub(r, pmul(m, p8f_cephes_exp_C2));
#endif
Packet8f r2 = pmul(r, r);
// TODO(gonnet): Split into odd/even polynomials and try to exploit
// instruction-level parallelism.
Packet8f y = p8f_cephes_exp_p0;
y = pmadd(y, r, p8f_cephes_exp_p1);
y = pmadd(y, r, p8f_cephes_exp_p2);
y = pmadd(y, r, p8f_cephes_exp_p3);
y = pmadd(y, r, p8f_cephes_exp_p4);
y = pmadd(y, r, p8f_cephes_exp_p5);
y = pmadd(y, r2, r);
y = padd(y, p8f_1);
// Build emm0 = 2^m.
Packet8i emm0 = _mm256_cvttps_epi32(padd(m, p8f_127));
emm0 = pshiftleft(emm0, 23);
// Return 2^m * exp(r).
return pmax(pmul(y, _mm256_castsi256_ps(emm0)), _x);
}
// Hyperbolic Tangent function.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
ptanh<Packet8f>(const Packet8f& x) {
return internal::generic_fast_tanh_float(x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
pexp<Packet4d>(const Packet4d& _x) {
Packet4d x = _x;
_EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
_EIGEN_DECLARE_CONST_Packet4d(2, 2.0);
_EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
_EIGEN_DECLARE_CONST_Packet4d(exp_hi, 709.437);
_EIGEN_DECLARE_CONST_Packet4d(exp_lo, -709.436139303);
_EIGEN_DECLARE_CONST_Packet4d(cephes_LOG2EF, 1.4426950408889634073599);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p0, 1.26177193074810590878e-4);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p1, 3.02994407707441961300e-2);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_p2, 9.99999999999999999910e-1);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q0, 3.00198505138664455042e-6);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q1, 2.52448340349684104192e-3);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q2, 2.27265548208155028766e-1);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_q3, 2.00000000000000000009e0);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C1, 0.693145751953125);
_EIGEN_DECLARE_CONST_Packet4d(cephes_exp_C2, 1.42860682030941723212e-6);
_EIGEN_DECLARE_CONST_Packet4i(1023, 1023);
Packet4d tmp, fx;
// clamp x
x = pmax(pmin(x, p4d_exp_hi), p4d_exp_lo);
// Express exp(x) as exp(g + n*log(2)).
fx = pmadd(p4d_cephes_LOG2EF, x, p4d_half);
// Get the integer modulus of log(2), i.e. the "n" described above.
fx = _mm256_floor_pd(fx);
// Get the remainder modulo log(2), i.e. the "g" described above. Subtract
// n*log(2) out in two steps, i.e. n*C1 + n*C2, C1+C2=log2 to get the last
// digits right.
tmp = pmul(fx, p4d_cephes_exp_C1);
Packet4d z = pmul(fx, p4d_cephes_exp_C2);
x = psub(x, tmp);
x = psub(x, z);
Packet4d x2 = pmul(x, x);
// Evaluate the numerator polynomial of the rational interpolant.
Packet4d px = p4d_cephes_exp_p0;
px = pmadd(px, x2, p4d_cephes_exp_p1);
px = pmadd(px, x2, p4d_cephes_exp_p2);
px = pmul(px, x);
// Evaluate the denominator polynomial of the rational interpolant.
Packet4d qx = p4d_cephes_exp_q0;
qx = pmadd(qx, x2, p4d_cephes_exp_q1);
qx = pmadd(qx, x2, p4d_cephes_exp_q2);
qx = pmadd(qx, x2, p4d_cephes_exp_q3);
// I don't really get this bit, copied from the SSE2 routines, so...
// TODO(gonnet): Figure out what is going on here, perhaps find a better
// rational interpolant?
x = _mm256_div_pd(px, psub(qx, px));
x = pmadd(p4d_2, x, p4d_1);
// Build e=2^n by constructing the exponents in a 128-bit vector and
// shifting them to where they belong in double-precision values.
__m128i emm0 = _mm256_cvtpd_epi32(fx);
emm0 = _mm_add_epi32(emm0, p4i_1023);
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
__m128i lo = _mm_slli_epi64(emm0, 52);
__m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
__m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
e = _mm256_insertf128_si256(e, hi, 1);
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
// non-finite values in the input.
return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
}
// Functions for sqrt.
// The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step
// of Newton's method, at a cost of 1-2 bits of precision as opposed to the
// exact solution. It does not handle +inf, or denormalized numbers correctly.
// The main advantage of this approach is not just speed, but also the fact that
// it can be inlined and pipelined with other computations, further reducing its
// effective latency. This is similar to Quake3's fast inverse square root.
// For detail see here: http://www.beyond3d.com/content/articles/8/
#if EIGEN_FAST_MATH
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
psqrt<Packet8f>(const Packet8f& _x) {
Packet8f half = pmul(_x, pset1<Packet8f>(.5f));
Packet8f denormal_mask = _mm256_and_ps(
_mm256_cmp_ps(_x, pset1<Packet8f>((std::numeric_limits<float>::min)()),
_CMP_LT_OQ),
_mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_GE_OQ));
// Compute approximate reciprocal sqrt.
Packet8f x = _mm256_rsqrt_ps(_x);
// Do a single step of Newton's iteration.
x = pmul(x, psub(pset1<Packet8f>(1.5f), pmul(half, pmul(x,x))));
// Flush results for denormals to zero.
return _mm256_andnot_ps(denormal_mask, pmul(_x,x));
}
#else
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet8f psqrt<Packet8f>(const Packet8f& x) {
return _mm256_sqrt_ps(x);
}
#endif
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4d psqrt<Packet4d>(const Packet4d& x) {
return _mm256_sqrt_pd(x);
}
#if EIGEN_FAST_MATH
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
_EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
_EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
_EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
Packet8f neg_half = pmul(_x, p8f_minus_half);
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
Packet8f le_zero_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
Packet8f x = _mm256_andnot_ps(le_zero_mask, _mm256_rsqrt_ps(_x));
// Fill in NaNs and Infs for the negative/zero entries.
Packet8f neg_mask = _mm256_cmp_ps(_x, _mm256_setzero_ps(), _CMP_LT_OQ);
Packet8f zero_mask = _mm256_andnot_ps(neg_mask, le_zero_mask);
Packet8f infs_and_nans = _mm256_or_ps(_mm256_and_ps(neg_mask, p8f_nan),
_mm256_and_ps(zero_mask, p8f_inf));
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8f_one_point_five));
// Insert NaNs and Infs in all the right places.
return _mm256_or_ps(x, infs_and_nans);
}
#else
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet8f prsqrt<Packet8f>(const Packet8f& x) {
_EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(x));
}
#endif
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4d prsqrt<Packet4d>(const Packet4d& x) {
_EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(x));
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_AVX_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner (benoit.steiner.goog@gmail.com)
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_PACKET_MATH_AVX_H
#define EIGEN_PACKET_MATH_AVX_H
namespace Eigen {
namespace internal {
#ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
#endif
#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
#ifdef __FMA__
#ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
#endif
#endif
typedef __m256 Packet8f;
typedef __m256i Packet8i;
typedef __m256d Packet4d;
template<> struct is_arithmetic<__m256> { enum { value = true }; };
template<> struct is_arithmetic<__m256i> { enum { value = true }; };
template<> struct is_arithmetic<__m256d> { enum { value = true }; };
#define _EIGEN_DECLARE_CONST_Packet8f(NAME,X) \
const Packet8f p8f_##NAME = pset1<Packet8f>(X)
#define _EIGEN_DECLARE_CONST_Packet4d(NAME,X) \
const Packet4d p4d_##NAME = pset1<Packet4d>(X)
#define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))
#define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
const Packet8i p8i_##NAME = pset1<Packet8i>(X)
// Use the packet_traits defined in AVX512/PacketMath.h instead if we're going
// to leverage AVX512 instructions.
#ifndef EIGEN_VECTORIZE_AVX512
template<> struct packet_traits<float> : default_packet_traits
{
typedef Packet8f type;
typedef Packet4f half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=8,
HasHalfPacket = 1,
HasDiv = 1,
HasSin = EIGEN_FAST_MATH,
HasCos = 0,
HasLog = 1,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasTanh = EIGEN_FAST_MATH,
HasBlend = 1,
HasRound = 1,
HasFloor = 1,
HasCeil = 1
};
};
template<> struct packet_traits<double> : default_packet_traits
{
typedef Packet4d type;
typedef Packet2d half;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=4,
HasHalfPacket = 1,
HasDiv = 1,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasBlend = 1,
HasRound = 1,
HasFloor = 1,
HasCeil = 1
};
};
#endif
template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
/* Proper support for integers is only provided by AVX2. In the meantime, we'll
use SSE instructions and packets to deal with integers.
template<> struct packet_traits<int> : default_packet_traits
{
typedef Packet8i type;
enum {
Vectorizable = 1,
AlignedOnScalar = 1,
size=8
};
};
*/
template<> struct unpacket_traits<Packet8f> { typedef float type; typedef Packet4f half; enum {size=8, alignment=Aligned32}; };
template<> struct unpacket_traits<Packet4d> { typedef double type; typedef Packet2d half; enum {size=4, alignment=Aligned32}; };
template<> struct unpacket_traits<Packet8i> { typedef int type; typedef Packet4i half; enum {size=8, alignment=Aligned32}; };
template<> EIGEN_STRONG_INLINE Packet8f pset1<Packet8f>(const float& from) { return _mm256_set1_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d pset1<Packet4d>(const double& from) { return _mm256_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet8i pset1<Packet8i>(const int& from) { return _mm256_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); }
template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); }
template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); }
template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); }
template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
{
return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
}
template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
{
return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
}
template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4d pconj(const Packet4d& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet8i pconj(const Packet8i& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet8f pmul<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_mul_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pmul<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_mul_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pdiv<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_div_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pdiv<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_div_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, const Packet8i& /*b*/)
{ eigen_assert(false && "packet integer division are not supported by AVX");
return pset1<Packet8i>(0);
}
#ifdef __FMA__
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
// and gcc stupidly generates a vfmadd132ps instruction,
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
// the result of the product.
Packet8f res = c;
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
return res;
#else
return _mm256_fmadd_ps(a,b,c);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
// see above
Packet4d res = c;
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
return res;
#else
return _mm256_fmadd_pd(a,b,c);
#endif
}
#endif
template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
template<> EIGEN_STRONG_INLINE Packet8f pceil<Packet8f>(const Packet8f& a) { return _mm256_ceil_ps(a); }
template<> EIGEN_STRONG_INLINE Packet4d pceil<Packet4d>(const Packet4d& a) { return _mm256_ceil_pd(a); }
template<> EIGEN_STRONG_INLINE Packet8f pfloor<Packet8f>(const Packet8f& a) { return _mm256_floor_ps(a); }
template<> EIGEN_STRONG_INLINE Packet4d pfloor<Packet4d>(const Packet4d& a) { return _mm256_floor_pd(a); }
template<> EIGEN_STRONG_INLINE Packet8f pand<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pand<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f por<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_or_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d por<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_or_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pxor<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_xor_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pxor<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_xor_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pandnot<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_andnot_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet4d pandnot<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_andnot_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet8f pload<Packet8f>(const float* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d pload<Packet4d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_pd(from); }
template<> EIGEN_STRONG_INLINE Packet8i pload<Packet8i>(const int* from) { EIGEN_DEBUG_ALIGNED_LOAD return _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_ps(from); }
template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_pd(from); }
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
// Loads 4 floats from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3}
template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from)
{
// TODO try to find a way to avoid the need of a temporary register
// Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from));
// tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1);
// return _mm256_unpacklo_ps(tmp,tmp);
// _mm256_insertf128_ps is very slow on Haswell, thus:
Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from);
// mimic an "inplace" permutation of the lower 128bits using a blend
tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15);
// then we can perform a consistent permutation on the global register to get everything in shape:
return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2));
}
// Loads 2 doubles from memory a returns the packet {a0, a0 a1, a1}
template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from)
{
Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from);
return _mm256_permute_pd(tmp, 3<<2);
}
// Loads 2 floats from memory a returns the packet {a0, a0 a0, a0, a1, a1, a1, a1}
template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from)
{
Packet8f tmp = _mm256_castps128_ps256(_mm_broadcast_ss(from));
return _mm256_insertf128_ps(tmp, _mm_broadcast_ss(from+1), 1);
}
template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_pd(to, from); }
template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_ps(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet4d& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_pd(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet8i& from) { EIGEN_DEBUG_UNALIGNED_STORE _mm256_storeu_si256(reinterpret_cast<__m256i*>(to), from); }
// NOTE: leverage _mm256_i32gather_ps and _mm256_i32gather_pd if AVX2 instructions are available
// NOTE: for the record the following seems to be slower: return _mm256_i32gather_ps(from, _mm256_set1_epi32(stride), 4);
template<> EIGEN_DEVICE_FUNC inline Packet8f pgather<float, Packet8f>(const float* from, Index stride)
{
return _mm256_set_ps(from[7*stride], from[6*stride], from[5*stride], from[4*stride],
from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
template<> EIGEN_DEVICE_FUNC inline Packet4d pgather<double, Packet4d>(const double* from, Index stride)
{
return _mm256_set_pd(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet8f>(float* to, const Packet8f& from, Index stride)
{
__m128 low = _mm256_extractf128_ps(from, 0);
to[stride*0] = _mm_cvtss_f32(low);
to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1));
to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 2));
to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3));
__m128 high = _mm256_extractf128_ps(from, 1);
to[stride*4] = _mm_cvtss_f32(high);
to[stride*5] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1));
to[stride*6] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 2));
to[stride*7] = _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3));
}
template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, const Packet4d& from, Index stride)
{
__m128d low = _mm256_extractf128_pd(from, 0);
to[stride*0] = _mm_cvtsd_f64(low);
to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1));
__m128d high = _mm256_extractf128_pd(from, 1);
to[stride*2] = _mm_cvtsd_f64(high);
to[stride*3] = _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1));
}
template<> EIGEN_STRONG_INLINE void pstore1<Packet8f>(float* to, const float& a)
{
Packet8f pa = pset1<Packet8f>(a);
pstore(to, pa);
}
template<> EIGEN_STRONG_INLINE void pstore1<Packet4d>(double* to, const double& a)
{
Packet4d pa = pset1<Packet4d>(a);
pstore(to, pa);
}
template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
{
Packet8i pa = pset1<Packet8i>(a);
pstore(to, pa);
}
#ifndef EIGEN_VECTORIZE_AVX512
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
#endif
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
return _mm_cvtss_f32(_mm256_castps256_ps128(a));
}
template<> EIGEN_STRONG_INLINE double pfirst<Packet4d>(const Packet4d& a) {
return _mm_cvtsd_f64(_mm256_castpd256_pd128(a));
}
template<> EIGEN_STRONG_INLINE int pfirst<Packet8i>(const Packet8i& a) {
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
}
template<> EIGEN_STRONG_INLINE Packet8f preverse(const Packet8f& a)
{
__m256 tmp = _mm256_shuffle_ps(a,a,0x1b);
return _mm256_permute2f128_ps(tmp, tmp, 1);
}
template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
{
__m256d tmp = _mm256_shuffle_pd(a,a,5);
return _mm256_permute2f128_pd(tmp, tmp, 1);
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
return _mm256_permute_pd(swap_halves,5);
}
// pabs should be ok
template<> EIGEN_STRONG_INLINE Packet8f pabs(const Packet8f& a)
{
const Packet8f mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
return _mm256_and_ps(a,mask);
}
template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a)
{
const Packet4d mask = _mm256_castsi256_pd(_mm256_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
return _mm256_and_pd(a,mask);
}
// preduxp should be ok
// FIXME: why is this ok? why isn't the simply implementation working as expected?
template<> EIGEN_STRONG_INLINE Packet8f preduxp<Packet8f>(const Packet8f* vecs)
{
__m256 hsum1 = _mm256_hadd_ps(vecs[0], vecs[1]);
__m256 hsum2 = _mm256_hadd_ps(vecs[2], vecs[3]);
__m256 hsum3 = _mm256_hadd_ps(vecs[4], vecs[5]);
__m256 hsum4 = _mm256_hadd_ps(vecs[6], vecs[7]);
__m256 hsum5 = _mm256_hadd_ps(hsum1, hsum1);
__m256 hsum6 = _mm256_hadd_ps(hsum2, hsum2);
__m256 hsum7 = _mm256_hadd_ps(hsum3, hsum3);
__m256 hsum8 = _mm256_hadd_ps(hsum4, hsum4);
__m256 perm1 = _mm256_permute2f128_ps(hsum5, hsum5, 0x23);
__m256 perm2 = _mm256_permute2f128_ps(hsum6, hsum6, 0x23);
__m256 perm3 = _mm256_permute2f128_ps(hsum7, hsum7, 0x23);
__m256 perm4 = _mm256_permute2f128_ps(hsum8, hsum8, 0x23);
__m256 sum1 = _mm256_add_ps(perm1, hsum5);
__m256 sum2 = _mm256_add_ps(perm2, hsum6);
__m256 sum3 = _mm256_add_ps(perm3, hsum7);
__m256 sum4 = _mm256_add_ps(perm4, hsum8);
__m256 blend1 = _mm256_blend_ps(sum1, sum2, 0xcc);
__m256 blend2 = _mm256_blend_ps(sum3, sum4, 0xcc);
__m256 final = _mm256_blend_ps(blend1, blend2, 0xf0);
return final;
}
template<> EIGEN_STRONG_INLINE Packet4d preduxp<Packet4d>(const Packet4d* vecs)
{
Packet4d tmp0, tmp1;
tmp0 = _mm256_hadd_pd(vecs[0], vecs[1]);
tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1));
tmp1 = _mm256_hadd_pd(vecs[2], vecs[3]);
tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1));
return _mm256_blend_pd(tmp0, tmp1, 0xC);
}
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
{
return predux(Packet4f(_mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1))));
}
template<> EIGEN_STRONG_INLINE double predux<Packet4d>(const Packet4d& a)
{
return predux(Packet2d(_mm_add_pd(_mm256_castpd256_pd128(a),_mm256_extractf128_pd(a,1))));
}
template<> EIGEN_STRONG_INLINE Packet4f predux_downto4<Packet8f>(const Packet8f& a)
{
return _mm_add_ps(_mm256_castps256_ps128(a),_mm256_extractf128_ps(a,1));
}
template<> EIGEN_STRONG_INLINE float predux_mul<Packet8f>(const Packet8f& a)
{
Packet8f tmp;
tmp = _mm256_mul_ps(a, _mm256_permute2f128_ps(a,a,1));
tmp = _mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
return pfirst(_mm256_mul_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
}
template<> EIGEN_STRONG_INLINE double predux_mul<Packet4d>(const Packet4d& a)
{
Packet4d tmp;
tmp = _mm256_mul_pd(a, _mm256_permute2f128_pd(a,a,1));
return pfirst(_mm256_mul_pd(tmp, _mm256_shuffle_pd(tmp,tmp,1)));
}
template<> EIGEN_STRONG_INLINE float predux_min<Packet8f>(const Packet8f& a)
{
Packet8f tmp = _mm256_min_ps(a, _mm256_permute2f128_ps(a,a,1));
tmp = _mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
return pfirst(_mm256_min_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
}
template<> EIGEN_STRONG_INLINE double predux_min<Packet4d>(const Packet4d& a)
{
Packet4d tmp = _mm256_min_pd(a, _mm256_permute2f128_pd(a,a,1));
return pfirst(_mm256_min_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
}
template<> EIGEN_STRONG_INLINE float predux_max<Packet8f>(const Packet8f& a)
{
Packet8f tmp = _mm256_max_ps(a, _mm256_permute2f128_ps(a,a,1));
tmp = _mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,_MM_SHUFFLE(1,0,3,2)));
return pfirst(_mm256_max_ps(tmp, _mm256_shuffle_ps(tmp,tmp,1)));
}
template<> EIGEN_STRONG_INLINE double predux_max<Packet4d>(const Packet4d& a)
{
Packet4d tmp = _mm256_max_pd(a, _mm256_permute2f128_pd(a,a,1));
return pfirst(_mm256_max_pd(tmp, _mm256_shuffle_pd(tmp, tmp, 1)));
}
template<int Offset>
struct palign_impl<Offset,Packet8f>
{
static EIGEN_STRONG_INLINE void run(Packet8f& first, const Packet8f& second)
{
if (Offset==1)
{
first = _mm256_blend_ps(first, second, 1);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_blend_ps(tmp1, tmp2, 0x88);
}
else if (Offset==2)
{
first = _mm256_blend_ps(first, second, 3);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_blend_ps(tmp1, tmp2, 0xcc);
}
else if (Offset==3)
{
first = _mm256_blend_ps(first, second, 7);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_blend_ps(tmp1, tmp2, 0xee);
}
else if (Offset==4)
{
first = _mm256_blend_ps(first, second, 15);
Packet8f tmp1 = _mm256_permute_ps (first, _MM_SHUFFLE(3,2,1,0));
Packet8f tmp2 = _mm256_permute2f128_ps (tmp1, tmp1, 1);
first = _mm256_permute_ps(tmp2, _MM_SHUFFLE(3,2,1,0));
}
else if (Offset==5)
{
first = _mm256_blend_ps(first, second, 31);
first = _mm256_permute2f128_ps(first, first, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(0,3,2,1));
first = _mm256_permute2f128_ps(tmp, tmp, 1);
first = _mm256_blend_ps(tmp, first, 0x88);
}
else if (Offset==6)
{
first = _mm256_blend_ps(first, second, 63);
first = _mm256_permute2f128_ps(first, first, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(1,0,3,2));
first = _mm256_permute2f128_ps(tmp, tmp, 1);
first = _mm256_blend_ps(tmp, first, 0xcc);
}
else if (Offset==7)
{
first = _mm256_blend_ps(first, second, 127);
first = _mm256_permute2f128_ps(first, first, 1);
Packet8f tmp = _mm256_permute_ps (first, _MM_SHUFFLE(2,1,0,3));
first = _mm256_permute2f128_ps(tmp, tmp, 1);
first = _mm256_blend_ps(tmp, first, 0xee);
}
}
};
template<int Offset>
struct palign_impl<Offset,Packet4d>
{
static EIGEN_STRONG_INLINE void run(Packet4d& first, const Packet4d& second)
{
if (Offset==1)
{
first = _mm256_blend_pd(first, second, 1);
__m256d tmp = _mm256_permute_pd(first, 5);
first = _mm256_permute2f128_pd(tmp, tmp, 1);
first = _mm256_blend_pd(tmp, first, 0xA);
}
else if (Offset==2)
{
first = _mm256_blend_pd(first, second, 3);
first = _mm256_permute2f128_pd(first, first, 1);
}
else if (Offset==3)
{
first = _mm256_blend_pd(first, second, 7);
__m256d tmp = _mm256_permute_pd(first, 5);
first = _mm256_permute2f128_pd(tmp, tmp, 1);
first = _mm256_blend_pd(tmp, first, 5);
}
}
};
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet8f,8>& kernel) {
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
__m256 T4 = _mm256_unpacklo_ps(kernel.packet[4], kernel.packet[5]);
__m256 T5 = _mm256_unpackhi_ps(kernel.packet[4], kernel.packet[5]);
__m256 T6 = _mm256_unpacklo_ps(kernel.packet[6], kernel.packet[7]);
__m256 T7 = _mm256_unpackhi_ps(kernel.packet[6], kernel.packet[7]);
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
__m256 S4 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(1,0,1,0));
__m256 S5 = _mm256_shuffle_ps(T4,T6,_MM_SHUFFLE(3,2,3,2));
__m256 S6 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(1,0,1,0));
__m256 S7 = _mm256_shuffle_ps(T5,T7,_MM_SHUFFLE(3,2,3,2));
kernel.packet[0] = _mm256_permute2f128_ps(S0, S4, 0x20);
kernel.packet[1] = _mm256_permute2f128_ps(S1, S5, 0x20);
kernel.packet[2] = _mm256_permute2f128_ps(S2, S6, 0x20);
kernel.packet[3] = _mm256_permute2f128_ps(S3, S7, 0x20);
kernel.packet[4] = _mm256_permute2f128_ps(S0, S4, 0x31);
kernel.packet[5] = _mm256_permute2f128_ps(S1, S5, 0x31);
kernel.packet[6] = _mm256_permute2f128_ps(S2, S6, 0x31);
kernel.packet[7] = _mm256_permute2f128_ps(S3, S7, 0x31);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet8f,4>& kernel) {
__m256 T0 = _mm256_unpacklo_ps(kernel.packet[0], kernel.packet[1]);
__m256 T1 = _mm256_unpackhi_ps(kernel.packet[0], kernel.packet[1]);
__m256 T2 = _mm256_unpacklo_ps(kernel.packet[2], kernel.packet[3]);
__m256 T3 = _mm256_unpackhi_ps(kernel.packet[2], kernel.packet[3]);
__m256 S0 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(1,0,1,0));
__m256 S1 = _mm256_shuffle_ps(T0,T2,_MM_SHUFFLE(3,2,3,2));
__m256 S2 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(1,0,1,0));
__m256 S3 = _mm256_shuffle_ps(T1,T3,_MM_SHUFFLE(3,2,3,2));
kernel.packet[0] = _mm256_permute2f128_ps(S0, S1, 0x20);
kernel.packet[1] = _mm256_permute2f128_ps(S2, S3, 0x20);
kernel.packet[2] = _mm256_permute2f128_ps(S0, S1, 0x31);
kernel.packet[3] = _mm256_permute2f128_ps(S2, S3, 0x31);
}
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<Packet4d,4>& kernel) {
__m256d T0 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 15);
__m256d T1 = _mm256_shuffle_pd(kernel.packet[0], kernel.packet[1], 0);
__m256d T2 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 15);
__m256d T3 = _mm256_shuffle_pd(kernel.packet[2], kernel.packet[3], 0);
kernel.packet[1] = _mm256_permute2f128_pd(T0, T2, 32);
kernel.packet[3] = _mm256_permute2f128_pd(T0, T2, 49);
kernel.packet[0] = _mm256_permute2f128_pd(T1, T3, 32);
kernel.packet[2] = _mm256_permute2f128_pd(T1, T3, 49);
}
template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
const __m256 zero = _mm256_setzero_ps();
const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
}
template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
const __m256d zero = _mm256_setzero_pd();
const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
}
template<> EIGEN_STRONG_INLINE Packet8f pinsertfirst(const Packet8f& a, float b)
{
return _mm256_blend_ps(a,pset1<Packet8f>(b),1);
}
template<> EIGEN_STRONG_INLINE Packet4d pinsertfirst(const Packet4d& a, double b)
{
return _mm256_blend_pd(a,pset1<Packet4d>(b),1);
}
template<> EIGEN_STRONG_INLINE Packet8f pinsertlast(const Packet8f& a, float b)
{
return _mm256_blend_ps(a,pset1<Packet8f>(b),(1<<7));
}
template<> EIGEN_STRONG_INLINE Packet4d pinsertlast(const Packet4d& a, double b)
{
return _mm256_blend_pd(a,pset1<Packet4d>(b),(1<<3));
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_PACKET_MATH_AVX_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TYPE_CASTING_AVX_H
#define EIGEN_TYPE_CASTING_AVX_H
namespace Eigen {
namespace internal {
// For now we use SSE to handle integers, so we can't use AVX instructions to cast
// from int to float
template <>
struct type_casting_traits<float, int> {
enum {
VectorizedCast = 0,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template <>
struct type_casting_traits<int, float> {
enum {
VectorizedCast = 0,
SrcCoeffRatio = 1,
TgtCoeffRatio = 1
};
};
template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8f, Packet8i>(const Packet8f& a) {
return _mm256_cvtps_epi32(a);
}
template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8i, Packet8f>(const Packet8i& a) {
return _mm256_cvtepi32_ps(a);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TYPE_CASTING_AVX_H