a394b22a · a394b22a · a394b22a · a394b22a · a394b22a · a394b22a
--- a/external/eigen3/Eigen/src/Core/CwiseTernaryOp.h
+++ b/external/eigen3/Eigen/src/Core/CwiseTernaryOp.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CWISE_TERNARY_OP_H
+#define EIGEN_CWISE_TERNARY_OP_H
+
+namespace Eigen {
+
+namespace internal {
+template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3>
+struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
+  // we must not inherit from traits<Arg1> since it has
+  // the potential to cause problems with MSVC
+  typedef typename remove_all<Arg1>::type Ancestor;
+  typedef typename traits<Ancestor>::XprKind XprKind;
+  enum {
+    RowsAtCompileTime = traits<Ancestor>::RowsAtCompileTime,
+    ColsAtCompileTime = traits<Ancestor>::ColsAtCompileTime,
+    MaxRowsAtCompileTime = traits<Ancestor>::MaxRowsAtCompileTime,
+    MaxColsAtCompileTime = traits<Ancestor>::MaxColsAtCompileTime
+  };
+
+  // even though we require Arg1, Arg2, and Arg3 to have the same scalar type
+  // (see CwiseTernaryOp constructor),
+  // we still want to handle the case when the result type is different.
+  typedef typename result_of<TernaryOp(
+      const typename Arg1::Scalar&, const typename Arg2::Scalar&,
+      const typename Arg3::Scalar&)>::type Scalar;
+
+  typedef typename internal::traits<Arg1>::StorageKind StorageKind;
+  typedef typename internal::traits<Arg1>::StorageIndex StorageIndex;
+
+  typedef typename Arg1::Nested Arg1Nested;
+  typedef typename Arg2::Nested Arg2Nested;
+  typedef typename Arg3::Nested Arg3Nested;
+  typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
+  typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
+  typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
+  enum { Flags = _Arg1Nested::Flags & RowMajorBit };
+};
+}  // end namespace internal
+
+template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
+          typename StorageKind>
+class CwiseTernaryOpImpl;
+
+/** \class CwiseTernaryOp
+  * \ingroup Core_Module
+  *
+  * \brief Generic expression where a coefficient-wise ternary operator is
+ * applied to two expressions
+  *
+  * \tparam TernaryOp template functor implementing the operator
+  * \tparam Arg1Type the type of the first argument
+  * \tparam Arg2Type the type of the second argument
+  * \tparam Arg3Type the type of the third argument
+  *
+  * This class represents an expression where a coefficient-wise ternary
+ * operator is applied to three expressions.
+  * It is the return type of ternary operators, by which we mean only those
+ * ternary operators where
+  * all three arguments are Eigen expressions.
+  * For example, the return type of betainc(matrix1, matrix2, matrix3) is a
+ * CwiseTernaryOp.
+  *
+  * Most of the time, this is the only way that it is used, so you typically
+ * don't have to name
+  * CwiseTernaryOp types explicitly.
+  *
+  * \sa MatrixBase::ternaryExpr(const MatrixBase<Argument2> &, const
+ * MatrixBase<Argument3> &, const CustomTernaryOp &) const, class CwiseBinaryOp,
+ * class CwiseUnaryOp, class CwiseNullaryOp
+  */
+template <typename TernaryOp, typename Arg1Type, typename Arg2Type,
+          typename Arg3Type>
+class CwiseTernaryOp : public CwiseTernaryOpImpl<
+                           TernaryOp, Arg1Type, Arg2Type, Arg3Type,
+                           typename internal::traits<Arg1Type>::StorageKind>,
+                       internal::no_assignment_operator
+{
+ public:
+  typedef typename internal::remove_all<Arg1Type>::type Arg1;
+  typedef typename internal::remove_all<Arg2Type>::type Arg2;
+  typedef typename internal::remove_all<Arg3Type>::type Arg3;
+
+  typedef typename CwiseTernaryOpImpl<
+      TernaryOp, Arg1Type, Arg2Type, Arg3Type,
+      typename internal::traits<Arg1Type>::StorageKind>::Base Base;
+  EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp)
+
+  typedef typename internal::ref_selector<Arg1Type>::type Arg1Nested;
+  typedef typename internal::ref_selector<Arg2Type>::type Arg2Nested;
+  typedef typename internal::ref_selector<Arg3Type>::type Arg3Nested;
+  typedef typename internal::remove_reference<Arg1Nested>::type _Arg1Nested;
+  typedef typename internal::remove_reference<Arg2Nested>::type _Arg2Nested;
+  typedef typename internal::remove_reference<Arg3Nested>::type _Arg3Nested;
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2,
+                                     const Arg3& a3,
+                                     const TernaryOp& func = TernaryOp())
+      : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) {
+    // require the sizes to match
+    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
+    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
+
+    // The index types should match
+    EIGEN_STATIC_ASSERT((internal::is_same<
+                         typename internal::traits<Arg1Type>::StorageKind,
+                         typename internal::traits<Arg2Type>::StorageKind>::value),
+                        STORAGE_KIND_MUST_MATCH)
+    EIGEN_STATIC_ASSERT((internal::is_same<
+                         typename internal::traits<Arg1Type>::StorageKind,
+                         typename internal::traits<Arg3Type>::StorageKind>::value),
+                        STORAGE_KIND_MUST_MATCH)
+
+    eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&
+                 a1.rows() == a3.rows() && a1.cols() == a3.cols());
+  }
+
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE Index rows() const {
+    // return the fixed size type if available to enable compile time
+    // optimizations
+    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+                RowsAtCompileTime == Dynamic &&
+        internal::traits<typename internal::remove_all<Arg2Nested>::type>::
+                RowsAtCompileTime == Dynamic)
+      return m_arg3.rows();
+    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+                     RowsAtCompileTime == Dynamic &&
+             internal::traits<typename internal::remove_all<Arg3Nested>::type>::
+                     RowsAtCompileTime == Dynamic)
+      return m_arg2.rows();
+    else
+      return m_arg1.rows();
+  }
+  EIGEN_DEVICE_FUNC
+  EIGEN_STRONG_INLINE Index cols() const {
+    // return the fixed size type if available to enable compile time
+    // optimizations
+    if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+                ColsAtCompileTime == Dynamic &&
+        internal::traits<typename internal::remove_all<Arg2Nested>::type>::
+                ColsAtCompileTime == Dynamic)
+      return m_arg3.cols();
+    else if (internal::traits<typename internal::remove_all<Arg1Nested>::type>::
+                     ColsAtCompileTime == Dynamic &&
+             internal::traits<typename internal::remove_all<Arg3Nested>::type>::
+                     ColsAtCompileTime == Dynamic)
+      return m_arg2.cols();
+    else
+      return m_arg1.cols();
+  }
+
+  /** \returns the first argument nested expression */
+  EIGEN_DEVICE_FUNC
+  const _Arg1Nested& arg1() const { return m_arg1; }
+  /** \returns the first argument nested expression */
+  EIGEN_DEVICE_FUNC
+  const _Arg2Nested& arg2() const { return m_arg2; }
+  /** \returns the third argument nested expression */
+  EIGEN_DEVICE_FUNC
+  const _Arg3Nested& arg3() const { return m_arg3; }
+  /** \returns the functor representing the ternary operation */
+  EIGEN_DEVICE_FUNC
+  const TernaryOp& functor() const { return m_functor; }
+
+ protected:
+  Arg1Nested m_arg1;
+  Arg2Nested m_arg2;
+  Arg3Nested m_arg3;
+  const TernaryOp m_functor;
+};
+
+// Generic API dispatcher
+template <typename TernaryOp, typename Arg1, typename Arg2, typename Arg3,
+          typename StorageKind>
+class CwiseTernaryOpImpl
+    : public internal::generic_xpr_base<
+          CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type {
+ public:
+  typedef typename internal::generic_xpr_base<
+      CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> >::type Base;
+};
+
+}  // end namespace Eigen
+
+#endif  // EIGEN_CWISE_TERNARY_OP_H
--- a/external/eigen3/Eigen/src/Core/CwiseUnaryOp.h
+++ b/external/eigen3/Eigen/src/Core/CwiseUnaryOp.h
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -13,41 +13,18 @@

 namespace Eigen { 

-/** \class CwiseUnaryOp
-  * \ingroup Core_Module
-  *
-  * \brief Generic expression where a coefficient-wise unary operator is applied to an expression
-  *
-  * \param UnaryOp template functor implementing the operator
-  * \param XprType the type of the expression to which we are applying the unary operator
-  *
-  * This class represents an expression where a unary operator is applied to an expression.
-  * It is the return type of all operations taking exactly 1 input expression, regardless of the
-  * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
-  * is considered unary, because only the right-hand side is an expression, and its
-  * return type is a specialization of CwiseUnaryOp.
-  *
-  * Most of the time, this is the only way that it is used, so you typically don't have to name
-  * CwiseUnaryOp types explicitly.
-  *
-  * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
-  */
-
 namespace internal {
 template<typename UnaryOp, typename XprType>
 struct traits<CwiseUnaryOp<UnaryOp, XprType> >
 : traits<XprType>
 {
  typedef typename result_of<
-                     UnaryOp(typename XprType::Scalar)
+                     UnaryOp(const typename XprType::Scalar&)
                   >::type Scalar;
  typedef typename XprType::Nested XprTypeNested;
  typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
  enum {
-    Flags = _XprTypeNested::Flags & (
-      HereditaryBits | LinearAccessBit | AlignedBit
-      | (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
-    CoeffReadCost = EIGEN_ADD_COST(_XprTypeNested::CoeffReadCost, functor_traits<UnaryOp>::Cost)
+    Flags = _XprTypeNested::Flags & RowMajorBit 
  };
 };
 }
@@ -55,70 +32,70 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
 template<typename UnaryOp, typename XprType, typename StorageKind>
 class CwiseUnaryOpImpl;

+/** \class CwiseUnaryOp
+  * \ingroup Core_Module
+  *
+  * \brief Generic expression where a coefficient-wise unary operator is applied to an expression
+  *
+  * \tparam UnaryOp template functor implementing the operator
+  * \tparam XprType the type of the expression to which we are applying the unary operator
+  *
+  * This class represents an expression where a unary operator is applied to an expression.
+  * It is the return type of all operations taking exactly 1 input expression, regardless of the
+  * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix
+  * is considered unary, because only the right-hand side is an expression, and its
+  * return type is a specialization of CwiseUnaryOp.
+  *
+  * Most of the time, this is the only way that it is used, so you typically don't have to name
+  * CwiseUnaryOp types explicitly.
+  *
+  * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp
+  */
 template<typename UnaryOp, typename XprType>
-class CwiseUnaryOp : internal::no_assignment_operator,
-  public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>
+class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal::traits<XprType>::StorageKind>, internal::no_assignment_operator
 {
  public:

    typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
+    typedef typename internal::ref_selector<XprType>::type XprTypeNested;
+    typedef typename internal::remove_all<XprType>::type NestedExpression;

-    inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
      : m_xpr(xpr), m_functor(func) {}

-    EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
-    EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Index rows() const { return m_xpr.rows(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Index cols() const { return m_xpr.cols(); }

    /** \returns the functor representing the unary operation */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const UnaryOp& functor() const { return m_functor; }

    /** \returns the nested expression */
-    const typename internal::remove_all<typename XprType::Nested>::type&
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    const typename internal::remove_all<XprTypeNested>::type&
    nestedExpression() const { return m_xpr; }

    /** \returns the nested expression */
-    typename internal::remove_all<typename XprType::Nested>::type&
-    nestedExpression() { return m_xpr.const_cast_derived(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    typename internal::remove_all<XprTypeNested>::type&
+    nestedExpression() { return m_xpr; }

  protected:
-    typename XprType::Nested m_xpr;
+    XprTypeNested m_xpr;
    const UnaryOp m_functor;
 };

-// This is the generic implementation for dense storage.
-// It can be used for any expression types implementing the dense concept.
-template<typename UnaryOp, typename XprType>
-class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
-  : public internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
+// Generic API dispatcher
+template<typename UnaryOp, typename XprType, typename StorageKind>
+class CwiseUnaryOpImpl
+  : public internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type
 {
-  public:
-
-    typedef CwiseUnaryOp<UnaryOp, XprType> Derived;
-    typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
-
-    EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
-    {
-      return derived().functor()(derived().nestedExpression().coeff(rowId, colId));
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const
-    {
-      return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId));
-    }
-
-    EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
-    {
-      return derived().functor()(derived().nestedExpression().coeff(index));
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
-    {
-      return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));
-    }
+public:
+  typedef typename internal::generic_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
 };

 } // end namespace Eigen

--- a/external/eigen3/Eigen/src/Core/CwiseUnaryView.h
+++ b/external/eigen3/Eigen/src/Core/CwiseUnaryView.h
@@ -12,33 +12,19 @@

 namespace Eigen {

-/** \class CwiseUnaryView
-  * \ingroup Core_Module
-  *
-  * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
-  *
-  * \param ViewOp template functor implementing the view
-  * \param MatrixType the type of the matrix we are applying the unary operator
-  *
-  * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
-  * It is the return type of real() and imag(), and most of the time this is the only way it is used.
-  *
-  * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
-  */
-
 namespace internal {
 template<typename ViewOp, typename MatrixType>
 struct traits<CwiseUnaryView<ViewOp, MatrixType> >
 : traits<MatrixType>
 {
  typedef typename result_of<
-                     ViewOp(typename traits<MatrixType>::Scalar)
+                     ViewOp(const typename traits<MatrixType>::Scalar&)
                   >::type Scalar;
  typedef typename MatrixType::Nested MatrixTypeNested;
  typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
  enum {
-    Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
-    CoeffReadCost = EIGEN_ADD_COST(traits<_MatrixTypeNested>::CoeffReadCost, functor_traits<ViewOp>::Cost),
+    FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
+    Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
    MatrixTypeInnerStride =  inner_stride_at_compile_time<MatrixType>::ret,
    // need to cast the sizeof's from size_t to int explicitly, otherwise:
    // "error: no integral type can represent all of the enumerator values
@@ -55,6 +41,19 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
 template<typename ViewOp, typename MatrixType, typename StorageKind>
 class CwiseUnaryViewImpl;

+/** \class CwiseUnaryView
+  * \ingroup Core_Module
+  *
+  * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector
+  *
+  * \tparam ViewOp template functor implementing the view
+  * \tparam MatrixType the type of the matrix we are applying the unary operator
+  *
+  * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector.
+  * It is the return type of real() and imag(), and most of the time this is the only way it is used.
+  *
+  * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp
+  */
 template<typename ViewOp, typename MatrixType>
 class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename internal::traits<MatrixType>::StorageKind>
 {
@@ -62,8 +61,10 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in

    typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
    EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
+    typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
+    typedef typename internal::remove_all<MatrixType>::type NestedExpression;

-    inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp())
+    explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
      : m_matrix(mat), m_functor(func) {}

    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView)
@@ -75,19 +76,27 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
    const ViewOp& functor() const { return m_functor; }

    /** \returns the nested expression */
-    const typename internal::remove_all<typename MatrixType::Nested>::type&
+    const typename internal::remove_all<MatrixTypeNested>::type&
    nestedExpression() const { return m_matrix; }

    /** \returns the nested expression */
-    typename internal::remove_all<typename MatrixType::Nested>::type&
+    typename internal::remove_reference<MatrixTypeNested>::type&
    nestedExpression() { return m_matrix.const_cast_derived(); }

  protected:
-    // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC
-    typename internal::nested<MatrixType>::type m_matrix;
+    MatrixTypeNested m_matrix;
    ViewOp m_functor;
 };

+// Generic API dispatcher
+template<typename ViewOp, typename XprType, typename StorageKind>
+class CwiseUnaryViewImpl
+  : public internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type
+{
+public:
+  typedef typename internal::generic_xpr_base<CwiseUnaryView<ViewOp, XprType> >::type Base;
+};
+
 template<typename ViewOp, typename MatrixType>
 class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
  : public internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type
@@ -100,38 +109,18 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl)
    
-    inline Scalar* data() { return &coeffRef(0); }
-    inline const Scalar* data() const { return &coeff(0); }
+    EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); }
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); }

-    inline Index innerStride() const
+    EIGEN_DEVICE_FUNC inline Index innerStride() const
    {
      return derived().nestedExpression().innerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
    }

-    inline Index outerStride() const
+    EIGEN_DEVICE_FUNC inline Index outerStride() const
    {
      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
    }
-
-    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
-    {
-      return derived().functor()(derived().nestedExpression().coeff(row, col));
-    }
-
-    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
-    {
-      return derived().functor()(derived().nestedExpression().coeff(index));
-    }
-
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
-    {
-      return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col));
-    }
-
-    EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
-    {
-      return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index));
-    }
 };

 } // end namespace Eigen

--- a/external/eigen3/Eigen/src/Core/DenseBase.h
+++ b/external/eigen3/Eigen/src/Core/DenseBase.h
@@ -34,37 +34,45 @@ static inline void check_DenseIndex_is_signed() {
  * \tparam Derived is the derived type, e.g., a matrix type or an expression.
  *
  * This class can be extended with the help of the plugin mechanism described on the page
-  * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
+  * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN.
  *
-  * \sa \ref TopicClassHierarchy
+  * \sa \blank \ref TopicClassHierarchy
  */
 template<typename Derived> class DenseBase
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-  : public internal::special_scalar_op_base<Derived, typename internal::traits<Derived>::Scalar,
-                                            typename NumTraits<typename internal::traits<Derived>::Scalar>::Real,
-                                            DenseCoeffsBase<Derived> >
-#else
  : public DenseCoeffsBase<Derived>
+#else
+  : public DenseCoeffsBase<Derived,DirectWriteAccessors>
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 {
  public:

-    class InnerIterator;
+    /** Inner iterator type to iterate over the coefficients of a row or column.
+      * \sa class InnerIterator
+      */
+    typedef Eigen::InnerIterator<Derived> InnerIterator;

    typedef typename internal::traits<Derived>::StorageKind StorageKind;

-    /** \brief The type of indices 
-      * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
-      * \sa \ref TopicPreprocessorDirectives.
-      */
-    typedef typename internal::traits<Derived>::Index Index; 
+    /**
+      * \brief The type used to store indices
+      * \details This typedef is relevant for types that store multiple indices such as
+      *          PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index
+      * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase.
+     */
+    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;

+    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc. */
    typedef typename internal::traits<Derived>::Scalar Scalar;
-    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+    
+    /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex<float>, etc.
+      *
+      * It is an alias for the Scalar type */
+    typedef Scalar value_type;
+    
    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef internal::special_scalar_op_base<Derived,Scalar,RealScalar, DenseCoeffsBase<Derived> > Base;
+    typedef DenseCoeffsBase<Derived> Base;

-    using Base::operator*;
    using Base::derived;
    using Base::const_cast_derived;
    using Base::rows;
@@ -74,16 +82,6 @@ template<typename Derived> class DenseBase
    using Base::colIndexByOuterInner;
    using Base::coeff;
    using Base::coeffByOuterInner;
-    using Base::packet;
-    using Base::packetByOuterInner;
-    using Base::writePacket;
-    using Base::writePacketByOuterInner;
-    using Base::coeffRef;
-    using Base::coeffRefByOuterInner;
-    using Base::copyCoeff;
-    using Base::copyCoeffByOuterInner;
-    using Base::copyPacket;
-    using Base::copyPacketByOuterInner;
    using Base::operator();
    using Base::operator[];
    using Base::x;
@@ -169,19 +167,46 @@ template<typename Derived> class DenseBase
      InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime)
                             : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime),

-      CoeffReadCost = internal::traits<Derived>::CoeffReadCost,
-        /**< This is a rough measure of how expensive it is to read one coefficient from
-          * this expression.
-          */
-
      InnerStrideAtCompileTime = internal::inner_stride_at_compile_time<Derived>::ret,
      OuterStrideAtCompileTime = internal::outer_stride_at_compile_time<Derived>::ret
    };
+    
+    typedef typename internal::find_best_packet<Scalar,SizeAtCompileTime>::type PacketScalar;

-    enum { ThisConstantIsPrivateInPlainObjectBase };
+    enum { IsPlainObjectBase = 0 };
+    
+    /** The plain matrix type corresponding to this expression.
+      * \sa PlainObject */
+    typedef Matrix<typename internal::traits<Derived>::Scalar,
+                internal::traits<Derived>::RowsAtCompileTime,
+                internal::traits<Derived>::ColsAtCompileTime,
+                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                internal::traits<Derived>::MaxRowsAtCompileTime,
+                internal::traits<Derived>::MaxColsAtCompileTime
+          > PlainMatrix;
+    
+    /** The plain array type corresponding to this expression.
+      * \sa PlainObject */
+    typedef Array<typename internal::traits<Derived>::Scalar,
+                internal::traits<Derived>::RowsAtCompileTime,
+                internal::traits<Derived>::ColsAtCompileTime,
+                AutoAlign | (internal::traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
+                internal::traits<Derived>::MaxRowsAtCompileTime,
+                internal::traits<Derived>::MaxColsAtCompileTime
+          > PlainArray;
+
+    /** \brief The plain matrix or array type corresponding to this expression.
+      *
+      * This is not necessarily exactly the return type of eval(). In the case of plain matrices,
+      * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
+      * that the return type of eval() is either PlainObject or const PlainObject&.
+      */
+    typedef typename internal::conditional<internal::is_same<typename internal::traits<Derived>::XprKind,MatrixXpr >::value,
+                                 PlainMatrix, PlainArray>::type PlainObject;

    /** \returns the number of nonzero coefficients which is in practice the number
      * of stored coefficients. */
+    EIGEN_DEVICE_FUNC
    inline Index nonZeros() const { return size(); }

    /** \returns the outer size.
@@ -189,6 +214,7 @@ template<typename Derived> class DenseBase
      * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
      * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
      * column-major matrix, and the number of rows for a row-major matrix. */
+    EIGEN_DEVICE_FUNC
    Index outerSize() const
    {
      return IsVectorAtCompileTime ? 1
@@ -200,6 +226,7 @@ template<typename Derived> class DenseBase
      * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
      * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a 
      * column-major matrix, and the number of columns for a row-major matrix. */
+    EIGEN_DEVICE_FUNC
    Index innerSize() const
    {
      return IsVectorAtCompileTime ? this->size()
@@ -210,6 +237,7 @@ template<typename Derived> class DenseBase
      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
      * nothing else.
      */
+    EIGEN_DEVICE_FUNC
    void resize(Index newSize)
    {
      EIGEN_ONLY_USED_FOR_DEBUG(newSize);
@@ -220,22 +248,22 @@ template<typename Derived> class DenseBase
      * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
      * nothing else.
      */
-    void resize(Index nbRows, Index nbCols)
+    EIGEN_DEVICE_FUNC
+    void resize(Index rows, Index cols)
    {
-      EIGEN_ONLY_USED_FOR_DEBUG(nbRows);
-      EIGEN_ONLY_USED_FOR_DEBUG(nbCols);
-      eigen_assert(nbRows == this->rows() && nbCols == this->cols()
+      EIGEN_ONLY_USED_FOR_DEBUG(rows);
+      EIGEN_ONLY_USED_FOR_DEBUG(cols);
+      eigen_assert(rows == this->rows() && cols == this->cols()
                && "DenseBase::resize() does not actually allow to resize.");
    }

 #ifndef EIGEN_PARSED_BY_DOXYGEN
-
    /** \internal Represents a matrix with all coefficients equal to one another*/
-    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,Derived> ConstantReturnType;
-    /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
-    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
+    typedef CwiseNullaryOp<internal::scalar_constant_op<Scalar>,PlainObject> ConstantReturnType;
+    /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> SequentialLinSpacedReturnType;
    /** \internal Represents a vector with linearly spaced coefficients that allows random access. */
-    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
+    typedef CwiseNullaryOp<internal::linspaced_op<Scalar,PacketScalar>,PlainObject> RandomAccessLinSpacedReturnType;
    /** \internal the return type of MatrixBase::eigenvalues() */
    typedef Matrix<typename NumTraits<typename internal::traits<Derived>::Scalar>::Real, internal::traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;

@@ -243,120 +271,133 @@ template<typename Derived> class DenseBase

    /** Copies \a other into *this. \returns a reference to *this. */
    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator=(const DenseBase<OtherDerived>& other);

    /** Special case of the template operator=, in order to prevent the compiler
      * from generating a default operator= (issue hit with g++ 4.1)
      */
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    Derived& operator=(const DenseBase& other);

    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    Derived& operator=(const EigenBase<OtherDerived> &other);

    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    Derived& operator+=(const EigenBase<OtherDerived> &other);

    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    Derived& operator-=(const EigenBase<OtherDerived> &other);

    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    Derived& operator=(const ReturnByValue<OtherDerived>& func);

-    /** \internal Copies \a other into *this without evaluating other. \returns a reference to *this. */
+    /** \internal
+      * Copies \a other into *this without evaluating other. \returns a reference to *this.
+      * \deprecated */
    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    Derived& lazyAssign(const DenseBase<OtherDerived>& other);

-    /** \internal Evaluates \a other into *this. \returns a reference to *this. */
-    template<typename OtherDerived>
-    Derived& lazyAssign(const ReturnByValue<OtherDerived>& other);
-
+    EIGEN_DEVICE_FUNC
    CommaInitializer<Derived> operator<< (const Scalar& s);

+    /** \deprecated it now returns \c *this */
    template<unsigned int Added,unsigned int Removed>
-    const Flagged<Derived, Added, Removed> flagged() const;
+    EIGEN_DEPRECATED
+    const Derived& flagged() const
+    { return derived(); }

    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);

-    Eigen::Transpose<Derived> transpose();
-	typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    typedef Transpose<Derived> TransposeReturnType;
+    EIGEN_DEVICE_FUNC
+    TransposeReturnType transpose();
+    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    EIGEN_DEVICE_FUNC
    ConstTransposeReturnType transpose() const;
+    EIGEN_DEVICE_FUNC
    void transposeInPlace();
-#ifndef EIGEN_NO_DEBUG
-  protected:
-    template<typename OtherDerived>
-    void checkTransposeAliasing(const OtherDerived& other) const;
-  public:
-#endif
-

-    static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
    Constant(Index rows, Index cols, const Scalar& value);
-    static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
    Constant(Index size, const Scalar& value);
-    static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
    Constant(const Scalar& value);

-    static const SequentialLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
    LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
-    static const RandomAccessLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
    LinSpaced(Index size, const Scalar& low, const Scalar& high);
-    static const SequentialLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
    LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
-    static const RandomAccessLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
    LinSpaced(const Scalar& low, const Scalar& high);

-    template<typename CustomNullaryOp>
-    static const CwiseNullaryOp<CustomNullaryOp, Derived>
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
    NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp>
-    static const CwiseNullaryOp<CustomNullaryOp, Derived>
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
    NullaryExpr(Index size, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp>
-    static const CwiseNullaryOp<CustomNullaryOp, Derived>
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
+    static const CwiseNullaryOp<CustomNullaryOp, PlainObject>
    NullaryExpr(const CustomNullaryOp& func);

-    static const ConstantReturnType Zero(Index rows, Index cols);
-    static const ConstantReturnType Zero(Index size);
-    static const ConstantReturnType Zero();
-    static const ConstantReturnType Ones(Index rows, Index cols);
-    static const ConstantReturnType Ones(Index size);
-    static const ConstantReturnType Ones();
-
-    void fill(const Scalar& value);
-    Derived& setConstant(const Scalar& value);
-    Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
-    Derived& setLinSpaced(const Scalar& low, const Scalar& high);
-    Derived& setZero();
-    Derived& setOnes();
-    Derived& setRandom();
-
-    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
+
+    EIGEN_DEVICE_FUNC void fill(const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC Derived& setZero();
+    EIGEN_DEVICE_FUNC Derived& setOnes();
+    EIGEN_DEVICE_FUNC Derived& setRandom();
+
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC
    bool isApprox(const DenseBase<OtherDerived>& other,
                  const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC 
    bool isMuchSmallerThan(const RealScalar& other,
                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    template<typename OtherDerived>
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC
    bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
                           const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;

-    bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
    
    inline bool hasNaN() const;
    inline bool allFinite() const;

-    inline Derived& operator*=(const Scalar& other);
-    inline Derived& operator/=(const Scalar& other);
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator*=(const Scalar& other);
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    Derived& operator/=(const Scalar& other);

    typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
    /** \returns the matrix or vector obtained by evaluating this expression.
      *
      * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
      * a const reference, in order to avoid a useless copy.
+      * 
+      * \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink.
      */
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE EvalReturnType eval() const
    {
      // Even though MSVC does not honor strong inlining when the return type
@@ -364,61 +405,78 @@ template<typename Derived> class DenseBase
      // size types on MSVC.
      return typename internal::eval<Derived>::type(derived());
    }
-
+    
    /** swaps *this with the expression \a other.
      *
      */
    template<typename OtherDerived>
-    void swap(const DenseBase<OtherDerived>& other,
-              int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
+    EIGEN_DEVICE_FUNC
+    void swap(const DenseBase<OtherDerived>& other)
    {
-      SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+      EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY);
+      eigen_assert(rows()==other.rows() && cols()==other.cols());
+      call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op<Scalar>());
    }

    /** swaps *this with the matrix or array \a other.
      *
      */
    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    void swap(PlainObjectBase<OtherDerived>& other)
    {
-      SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
+      eigen_assert(rows()==other.rows() && cols()==other.cols());
+      call_assignment(derived(), other.derived(), internal::swap_assign_op<Scalar>());
    }

+    EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
+    EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+    EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
+    template<bool Enable> EIGEN_DEVICE_FUNC
+    inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
+    template<bool Enable> EIGEN_DEVICE_FUNC
+    inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();

-    inline const NestByValue<Derived> nestByValue() const;
-    inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
-    inline ForceAlignedAccess<Derived> forceAlignedAccess();
-    template<bool Enable> inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
-    template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
-
-    Scalar sum() const;
-    Scalar mean() const;
-    Scalar trace() const;
+    EIGEN_DEVICE_FUNC Scalar sum() const;
+    EIGEN_DEVICE_FUNC Scalar mean() const;
+    EIGEN_DEVICE_FUNC Scalar trace() const;

-    Scalar prod() const;
+    EIGEN_DEVICE_FUNC Scalar prod() const;

-    typename internal::traits<Derived>::Scalar minCoeff() const;
-    typename internal::traits<Derived>::Scalar maxCoeff() const;
+    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
+    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;

-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
    typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
    typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
    typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;

    template<typename BinaryOp>
-    typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
-    redux(const BinaryOp& func) const;
+    EIGEN_DEVICE_FUNC
+    Scalar redux(const BinaryOp& func) const;

    template<typename Visitor>
+    EIGEN_DEVICE_FUNC
    void visit(Visitor& func) const;

-    inline const WithFormat<Derived> format(const IOFormat& fmt) const;
+    /** \returns a WithFormat proxy object allowing to print a matrix the with given
+      * format \a fmt.
+      *
+      * See class IOFormat for some examples.
+      *
+      * \sa class IOFormat, class WithFormat
+      */
+    inline const WithFormat<Derived> format(const IOFormat& fmt) const
+    {
+      return WithFormat<Derived>(derived(), fmt);
+    }

    /** \returns the unique coefficient of a 1x1 expression */
+    EIGEN_DEVICE_FUNC
    CoeffReturnType value() const
    {
      EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
@@ -426,23 +484,44 @@ template<typename Derived> class DenseBase
      return derived().coeff(0,0);
    }

-    bool all(void) const;
-    bool any(void) const;
-    Index count() const;
+    EIGEN_DEVICE_FUNC bool all() const;
+    EIGEN_DEVICE_FUNC bool any() const;
+    EIGEN_DEVICE_FUNC Index count() const;

    typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
    typedef const VectorwiseOp<const Derived, Horizontal> ConstRowwiseReturnType;
    typedef VectorwiseOp<Derived, Vertical> ColwiseReturnType;
    typedef const VectorwiseOp<const Derived, Vertical> ConstColwiseReturnType;

-    ConstRowwiseReturnType rowwise() const;
-    RowwiseReturnType rowwise();
-    ConstColwiseReturnType colwise() const;
-    ColwiseReturnType colwise();
+    /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
+    *
+    * Example: \include MatrixBase_rowwise.cpp
+    * Output: \verbinclude MatrixBase_rowwise.out
+    *
+    * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+    */
+    //Code moved here due to a CUDA compiler bug
+    EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const {
+      return ConstRowwiseReturnType(derived());
+    }
+    EIGEN_DEVICE_FUNC RowwiseReturnType rowwise();
+
+    /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations
+    *
+    * Example: \include MatrixBase_colwise.cpp
+    * Output: \verbinclude MatrixBase_colwise.out
+    *
+    * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting
+    */
+    EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const {
+      return ConstColwiseReturnType(derived());
+    }
+    EIGEN_DEVICE_FUNC ColwiseReturnType colwise();

-    static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index rows, Index cols);
-    static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random(Index size);
-    static const CwiseNullaryOp<internal::scalar_random_op<Scalar>,Derived> Random();
+    typedef CwiseNullaryOp<internal::scalar_random_op<Scalar>,PlainObject> RandomReturnType;
+    static const RandomReturnType Random(Index rows, Index cols);
+    static const RandomReturnType Random(Index size);
+    static const RandomReturnType Random();

    template<typename ThenDerived,typename ElseDerived>
    const Select<Derived,ThenDerived,ElseDerived>
@@ -460,45 +539,56 @@ template<typename Derived> class DenseBase
    template<int p> RealScalar lpNorm() const;

    template<int RowFactor, int ColFactor>
-    inline const Replicate<Derived,RowFactor,ColFactor> replicate() const;
-    
-    typedef Replicate<Derived,Dynamic,Dynamic> ReplicateReturnType;
-    inline const ReplicateReturnType replicate(Index rowFacor,Index colFactor) const;
+    EIGEN_DEVICE_FUNC
+    const Replicate<Derived,RowFactor,ColFactor> replicate() const;
+    /**
+    * \return an expression of the replication of \c *this
+    *
+    * Example: \include MatrixBase_replicate_int_int.cpp
+    * Output: \verbinclude MatrixBase_replicate_int_int.out
+    *
+    * \sa VectorwiseOp::replicate(), DenseBase::replicate<int,int>(), class Replicate
+    */
+    //Code moved here due to a CUDA compiler bug
+    EIGEN_DEVICE_FUNC
+    const Replicate<Derived, Dynamic, Dynamic> replicate(Index rowFactor, Index colFactor) const
+    {
+      return Replicate<Derived, Dynamic, Dynamic>(derived(), rowFactor, colFactor);
+    }

    typedef Reverse<Derived, BothDirections> ReverseReturnType;
    typedef const Reverse<const Derived, BothDirections> ConstReverseReturnType;
-    ReverseReturnType reverse();
-    ConstReverseReturnType reverse() const;
-    void reverseInPlace();
+    EIGEN_DEVICE_FUNC ReverseReturnType reverse();
+    /** This is the const version of reverse(). */
+    //Code moved here due to a CUDA compiler bug
+    EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const
+    {
+      return ConstReverseReturnType(derived());
+    }
+    EIGEN_DEVICE_FUNC void reverseInPlace();

 #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase
+#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND)
 #   include "../plugins/BlockMethods.h"
 #   ifdef EIGEN_DENSEBASE_PLUGIN
 #     include EIGEN_DENSEBASE_PLUGIN
 #   endif
 #undef EIGEN_CURRENT_STORAGE_BASE_CLASS
-
-#ifdef EIGEN2_SUPPORT
-
-    Block<Derived> corner(CornerType type, Index cRows, Index cCols);
-    const Block<Derived> corner(CornerType type, Index cRows, Index cCols) const;
-    template<int CRows, int CCols>
-    Block<Derived, CRows, CCols> corner(CornerType type);
-    template<int CRows, int CCols>
-    const Block<Derived, CRows, CCols> corner(CornerType type) const;
-
-#endif // EIGEN2_SUPPORT
-
+#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL
+#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF

    // disable the use of evalTo for dense objects with a nice compilation error
-    template<typename Dest> inline void evalTo(Dest& ) const
+    template<typename Dest>
+    EIGEN_DEVICE_FUNC
+    inline void evalTo(Dest& ) const
    {
      EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
    }

  protected:
    /** Default constructor. Do nothing. */
-    DenseBase()
+    EIGEN_DEVICE_FUNC DenseBase()
    {
      /* Just checks for self-consistency of the flags.
       * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
@@ -511,9 +601,9 @@ template<typename Derived> class DenseBase
    }

  private:
-    explicit DenseBase(int);
-    DenseBase(int,int);
-    template<typename OtherDerived> explicit DenseBase(const DenseBase<OtherDerived>&);
+    EIGEN_DEVICE_FUNC explicit DenseBase(int);
+    EIGEN_DEVICE_FUNC DenseBase(int,int);
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
 };

 } // end namespace Eigen

--- a/external/eigen3/Eigen/src/Core/DenseCoeffsBase.h
+++ b/external/eigen3/Eigen/src/Core/DenseCoeffsBase.h
@@ -35,7 +35,6 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
 {
  public:
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
-    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;

@@ -61,6 +60,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    using Base::size;
    using Base::derived;

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
    {
      return int(Derived::RowsAtCompileTime) == 1 ? 0
@@ -69,6 +69,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
          : inner;
    }

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
    {
      return int(Derived::ColsAtCompileTime) == 1 ? 0
@@ -91,13 +92,15 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      *
      * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
      */
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
    {
      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      return derived().coeff(row, col);
+                         && col >= 0 && col < cols());
+      return internal::evaluator<Derived>(derived()).coeff(row,col);
    }

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
    {
      return coeff(rowIndexByOuterInner(outer, inner),
@@ -108,11 +111,12 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      *
      * \sa operator()(Index,Index), operator[](Index)
      */
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
    {
      eigen_assert(row >= 0 && row < rows()
          && col >= 0 && col < cols());
-      return derived().coeff(row, col);
+      return coeff(row, col);
    }

    /** Short version: don't use this function, use
@@ -130,11 +134,14 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
      */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    coeff(Index index) const
    {
+      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
      eigen_internal_assert(index >= 0 && index < size());
-      return derived().coeff(index);
+      return internal::evaluator<Derived>(derived()).coeff(index);
    }


@@ -146,15 +153,14 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * z() const, w() const
      */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    operator[](Index index) const
    {
-      #ifndef EIGEN2_SUPPORT
      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
-      #endif
      eigen_assert(index >= 0 && index < size());
-      return derived().coeff(index);
+      return coeff(index);
    }

    /** \returns the coefficient at given index.
@@ -167,32 +173,49 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
      * z() const, w() const
      */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    operator()(Index index) const
    {
      eigen_assert(index >= 0 && index < size());
-      return derived().coeff(index);
+      return coeff(index);
    }

    /** equivalent to operator[](0).  */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
    x() const { return (*this)[0]; }

    /** equivalent to operator[](1).  */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
-    y() const { return (*this)[1]; }
+    y() const
+    {
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+      return (*this)[1];
+    }

    /** equivalent to operator[](2).  */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
-    z() const { return (*this)[2]; }
+    z() const
+    {
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+      return (*this)[2];
+    }

    /** equivalent to operator[](3).  */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE CoeffReturnType
-    w() const { return (*this)[3]; }
+    w() const
+    {
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+      return (*this)[3];
+    }

    /** \internal
      * \returns the packet of coefficients starting at the given row and column. It is your responsibility
@@ -207,9 +230,9 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const
    {
-      eigen_internal_assert(row >= 0 && row < rows()
-                      && col >= 0 && col < cols());
-      return derived().template packet<LoadMode>(row,col);
+      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
+      eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols());
+      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(row,col);
    }


@@ -234,8 +257,11 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
    template<int LoadMode>
    EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
    {
+      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
+      typedef typename internal::packet_traits<Scalar>::type DefaultPacketType;
      eigen_internal_assert(index >= 0 && index < size());
-      return derived().template packet<LoadMode>(index);
+      return internal::evaluator<Derived>(derived()).template packet<LoadMode,DefaultPacketType>(index);
    }

  protected:
@@ -278,7 +304,6 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;

    typedef typename internal::traits<Derived>::StorageKind StorageKind;
-    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename internal::packet_traits<Scalar>::type PacketScalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
@@ -311,13 +336,15 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      *
      * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
      */
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
    {
      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      return derived().coeffRef(row, col);
+                         && col >= 0 && col < cols());
+      return internal::evaluator<Derived>(derived()).coeffRef(row,col);
    }

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    coeffRefByOuterInner(Index outer, Index inner)
    {
@@ -330,12 +357,13 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index)
      */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    operator()(Index row, Index col)
    {
      eigen_assert(row >= 0 && row < rows()
          && col >= 0 && col < cols());
-      return derived().coeffRef(row, col);
+      return coeffRef(row, col);
    }


@@ -354,11 +382,14 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
      */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    coeffRef(Index index)
    {
+      EIGEN_STATIC_ASSERT(internal::evaluator<Derived>::Flags & LinearAccessBit,
+                          THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS)
      eigen_internal_assert(index >= 0 && index < size());
-      return derived().coeffRef(index);
+      return internal::evaluator<Derived>(derived()).coeffRef(index);
    }

    /** \returns a reference to the coefficient at given index.
@@ -368,15 +399,14 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
      */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    operator[](Index index)
    {
-      #ifndef EIGEN2_SUPPORT
      EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime,
                          THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD)
-      #endif
      eigen_assert(index >= 0 && index < size());
-      return derived().coeffRef(index);
+      return coeffRef(index);
    }

    /** \returns a reference to the coefficient at given index.
@@ -388,167 +418,49 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
      * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
      */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    operator()(Index index)
    {
      eigen_assert(index >= 0 && index < size());
-      return derived().coeffRef(index);
+      return coeffRef(index);
    }

    /** equivalent to operator[](0).  */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
    x() { return (*this)[0]; }

    /** equivalent to operator[](1).  */

+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Scalar&
-    y() { return (*this)[1]; }
-
-    /** equivalent to operator[](2).  */
-
-    EIGEN_STRONG_INLINE Scalar&
-    z() { return (*this)[2]; }
-
-    /** equivalent to operator[](3).  */
-
-    EIGEN_STRONG_INLINE Scalar&
-    w() { return (*this)[3]; }
-
-    /** \internal
-      * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility
-      * to ensure that a packet really starts there. This method is only available on expressions having the
-      * PacketAccessBit.
-      *
-      * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select
-      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
-      * starting at an address which is a multiple of the packet size.
-      */
-
-    template<int StoreMode>
-    EIGEN_STRONG_INLINE void writePacket
-    (Index row, Index col, const typename internal::packet_traits<Scalar>::type& val)
+    y()
    {
-      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      derived().template writePacket<StoreMode>(row,col,val);
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS);
+      return (*this)[1];
    }

+    /** equivalent to operator[](2).  */

-    /** \internal */
-    template<int StoreMode>
-    EIGEN_STRONG_INLINE void writePacketByOuterInner
-    (Index outer, Index inner, const typename internal::packet_traits<Scalar>::type& val)
-    {
-      writePacket<StoreMode>(rowIndexByOuterInner(outer, inner),
-                            colIndexByOuterInner(outer, inner),
-                            val);
-    }
-
-    /** \internal
-      * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility
-      * to ensure that a packet really starts there. This method is only available on expressions having the
-      * PacketAccessBit and the LinearAccessBit.
-      *
-      * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select
-      * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets
-      * starting at an address which is a multiple of the packet size.
-      */
-    template<int StoreMode>
-    EIGEN_STRONG_INLINE void writePacket
-    (Index index, const typename internal::packet_traits<Scalar>::type& val)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      derived().template writePacket<StoreMode>(index,val);
-    }
-
-#ifndef EIGEN_PARSED_BY_DOXYGEN
-
-    /** \internal Copies the coefficient at position (row,col) of other into *this.
-      *
-      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
-      * with usual assignments.
-      *
-      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
-      */
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
-    {
-      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      derived().coeffRef(row, col) = other.derived().coeff(row, col);
-    }
-
-    /** \internal Copies the coefficient at the given index of other into *this.
-      *
-      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
-      * with usual assignments.
-      *
-      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
-      */
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      derived().coeffRef(index) = other.derived().coeff(index);
-    }
-
-
-    template<typename OtherDerived>
-    EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
-    {
-      const Index row = rowIndexByOuterInner(outer,inner);
-      const Index col = colIndexByOuterInner(outer,inner);
-      // derived() is important here: copyCoeff() may be reimplemented in Derived!
-      derived().copyCoeff(row, col, other);
-    }
-
-    /** \internal Copies the packet at position (row,col) of other into *this.
-      *
-      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
-      * with usual assignments.
-      *
-      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
-      */
-
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase<OtherDerived>& other)
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    z()
    {
-      eigen_internal_assert(row >= 0 && row < rows()
-                        && col >= 0 && col < cols());
-      derived().template writePacket<StoreMode>(row, col,
-        other.derived().template packet<LoadMode>(row, col));
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS);
+      return (*this)[2];
    }

-    /** \internal Copies the packet at the given index of other into *this.
-      *
-      * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code
-      * with usual assignments.
-      *
-      * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox.
-      */
-
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase<OtherDerived>& other)
-    {
-      eigen_internal_assert(index >= 0 && index < size());
-      derived().template writePacket<StoreMode>(index,
-        other.derived().template packet<LoadMode>(index));
-    }
+    /** equivalent to operator[](3).  */

-    /** \internal */
-    template<typename OtherDerived, int StoreMode, int LoadMode>
-    EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
+    EIGEN_DEVICE_FUNC
+    EIGEN_STRONG_INLINE Scalar&
+    w()
    {
-      const Index row = rowIndexByOuterInner(outer,inner);
-      const Index col = colIndexByOuterInner(outer,inner);
-      // derived() is important here: copyCoeff() may be reimplemented in Derived!
-      derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other);
+      EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS);
+      return (*this)[3];
    }
-#endif
-
 };

 /** \brief Base class providing direct read-only coefficient access to matrices and arrays.
@@ -560,7 +472,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
  * inherits DenseCoeffsBase<Derived, ReadOnlyAccessors> which defines functions to access entries read-only using
  * \c operator() .
  *
-  * \sa \ref TopicClassHierarchy
+  * \sa \blank \ref TopicClassHierarchy
  */
 template<typename Derived>
 class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived, ReadOnlyAccessors>
@@ -568,7 +480,6 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
  public:

    typedef DenseCoeffsBase<Derived, ReadOnlyAccessors> Base;
-    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;

@@ -581,6 +492,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa outerStride(), rowStride(), colStride()
      */
+    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return derived().innerStride();
@@ -591,6 +503,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa innerStride(), rowStride(), colStride()
      */
+    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return derived().outerStride();
@@ -606,6 +519,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa innerStride(), outerStride(), colStride()
      */
+    EIGEN_DEVICE_FUNC
    inline Index rowStride() const
    {
      return Derived::IsRowMajor ? outerStride() : innerStride();
@@ -615,6 +529,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
      *
      * \sa innerStride(), outerStride(), rowStride()
      */
+    EIGEN_DEVICE_FUNC
    inline Index colStride() const
    {
      return Derived::IsRowMajor ? innerStride() : outerStride();
@@ -630,7 +545,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
  * inherits DenseCoeffsBase<Derived, WriteAccessors> which defines functions to access entries read/write using
  * \c operator().
  *
-  * \sa \ref TopicClassHierarchy
+  * \sa \blank \ref TopicClassHierarchy
  */
 template<typename Derived>
 class DenseCoeffsBase<Derived, DirectWriteAccessors>
@@ -639,7 +554,6 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
  public:

    typedef DenseCoeffsBase<Derived, WriteAccessors> Base;
-    typedef typename internal::traits<Derived>::Index Index;
    typedef typename internal::traits<Derived>::Scalar Scalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;

@@ -652,6 +566,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa outerStride(), rowStride(), colStride()
      */
+    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return derived().innerStride();
@@ -662,6 +577,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa innerStride(), rowStride(), colStride()
      */
+    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return derived().outerStride();
@@ -677,6 +593,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa innerStride(), outerStride(), colStride()
      */
+    EIGEN_DEVICE_FUNC
    inline Index rowStride() const
    {
      return Derived::IsRowMajor ? outerStride() : innerStride();
@@ -686,6 +603,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
      *
      * \sa innerStride(), outerStride(), rowStride()
      */
+    EIGEN_DEVICE_FUNC
    inline Index colStride() const
    {
      return Derived::IsRowMajor ? innerStride() : outerStride();
@@ -694,33 +612,42 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>

 namespace internal {

-template<typename Derived, bool JustReturnZero>
+template<int Alignment, typename Derived, bool JustReturnZero>
 struct first_aligned_impl
 {
-  static inline typename Derived::Index run(const Derived&)
+  static inline Index run(const Derived&)
  { return 0; }
 };

-template<typename Derived>
-struct first_aligned_impl<Derived, false>
+template<int Alignment, typename Derived>
+struct first_aligned_impl<Alignment, Derived, false>
 {
-  static inline typename Derived::Index run(const Derived& m)
+  static inline Index run(const Derived& m)
  {
-    return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
+    return internal::first_aligned<Alignment>(m.data(), m.size());
  }
 };

-/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
+/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization.
+  *
+  * \tparam Alignment requested alignment in Bytes.
  *
  * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
  * documentation.
  */
+template<int Alignment, typename Derived>
+static inline Index first_aligned(const DenseBase<Derived>& m)
+{
+  enum { ReturnZero = (int(evaluator<Derived>::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) };
+  return first_aligned_impl<Alignment, Derived, ReturnZero>::run(m.derived());
+}
+
 template<typename Derived>
-static inline typename Derived::Index first_aligned(const Derived& m)
+static inline Index first_default_aligned(const DenseBase<Derived>& m)
 {
-  return first_aligned_impl
-          <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
-          ::run(m);
+  typedef typename Derived::Scalar Scalar;
+  typedef typename packet_traits<Scalar>::type DefaultPacketType;
+  return internal::first_aligned<int(unpacket_traits<DefaultPacketType>::alignment),Derived>(m);
 }

 template<typename Derived, bool HasDirectAccess = has_direct_access<Derived>::ret>

--- a/external/eigen3/Eigen/src/Core/DenseStorage.h
+++ b/external/eigen3/Eigen/src/Core/DenseStorage.h
@@ -3,7 +3,7 @@
 //
 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
-// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
+// Copyright (C) 2010-2013 Hauke Heibel <hauke.heibel@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -13,9 +13,9 @@
 #define EIGEN_MATRIXSTORAGE_H

 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-  #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
+  #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN;
 #else
-  #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+  #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X)
 #endif

 namespace Eigen {
@@ -24,7 +24,9 @@ namespace internal {

 struct constructor_without_unaligned_array_assert {};

-template<typename T, int Size> void check_static_allocation_size()
+template<typename T, int Size>
+EIGEN_DEVICE_FUNC
+void check_static_allocation_size()
 {
  // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit
  #if EIGEN_STACK_ALLOCATION_LIMIT
@@ -38,18 +40,19 @@ template<typename T, int Size> void check_static_allocation_size()
  */
 template <typename T, int Size, int MatrixOrArrayOptions,
          int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
-                        : (((Size*sizeof(T))%16)==0) ? 16
-                        : 0 >
+                        : compute_default_alignment<T,Size>::value >
 struct plain_array
 {
  T array[Size];

-  plain_array() 
+  EIGEN_DEVICE_FUNC
+  plain_array()
  { 
    check_static_allocation_size<T,Size>();
  }

-  plain_array(constructor_without_unaligned_array_assert) 
+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert)
  { 
    check_static_allocation_size<T,Size>();
  }
@@ -64,29 +67,88 @@ struct plain_array
  template<typename PtrType>
  EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; }
  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
-    eigen_assert((reinterpret_cast<size_t>(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \
+    eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \
              && "this assertion is explained here: " \
              "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
              " **** READ THIS WEB PAGE !!! ****");
 #else
  #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
-    eigen_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
+    eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \
              && "this assertion is explained here: " \
              "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \
              " **** READ THIS WEB PAGE !!! ****");
 #endif

+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 8>
+{
+  EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
+
+  EIGEN_DEVICE_FUNC
+  plain_array() 
+  {
+    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
+    check_static_allocation_size<T,Size>();
+  }
+
+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert) 
+  { 
+    check_static_allocation_size<T,Size>();
+  }
+};
+
 template <typename T, int Size, int MatrixOrArrayOptions>
 struct plain_array<T, Size, MatrixOrArrayOptions, 16>
 {
-  EIGEN_USER_ALIGN16 T array[Size];
+  EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];

+  EIGEN_DEVICE_FUNC
  plain_array() 
  { 
-    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf);
+    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
    check_static_allocation_size<T,Size>();
  }

+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert) 
+  { 
+    check_static_allocation_size<T,Size>();
+  }
+};
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 32>
+{
+  EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
+
+  EIGEN_DEVICE_FUNC
+  plain_array() 
+  {
+    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
+    check_static_allocation_size<T,Size>();
+  }
+
+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert) 
+  { 
+    check_static_allocation_size<T,Size>();
+  }
+};
+
+template <typename T, int Size, int MatrixOrArrayOptions>
+struct plain_array<T, Size, MatrixOrArrayOptions, 64>
+{
+  EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
+
+  EIGEN_DEVICE_FUNC
+  plain_array() 
+  { 
+    EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
+    check_static_allocation_size<T,Size>();
+  }
+
+  EIGEN_DEVICE_FUNC
  plain_array(constructor_without_unaligned_array_assert) 
  { 
    check_static_allocation_size<T,Size>();
@@ -96,9 +158,9 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 16>
 template <typename T, int MatrixOrArrayOptions, int Alignment>
 struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
 {
-  EIGEN_USER_ALIGN16 T array[1];
-  plain_array() {}
-  plain_array(constructor_without_unaligned_array_assert) {}
+  T array[1];
+  EIGEN_DEVICE_FUNC plain_array() {}
+  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
 };

 } // end namespace internal
@@ -122,41 +184,54 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
 {
    internal::plain_array<T,Size,_Options> m_data;
  public:
-    DenseStorage() {}
-    DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC DenseStorage() {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
+    }
+    EIGEN_DEVICE_FUNC
+    explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()) {}
-    DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
+    EIGEN_DEVICE_FUNC 
+    DenseStorage(const DenseStorage& other) : m_data(other.m_data) {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size)
+    }
+    EIGEN_DEVICE_FUNC 
    DenseStorage& operator=(const DenseStorage& other)
-    {
+    { 
      if (this != &other) m_data = other.m_data;
-      return *this;
+      return *this; 
    }
-    DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
-    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
-    static DenseIndex rows(void) {return _Rows;}
-    static DenseIndex cols(void) {return _Cols;}
-    void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
-    void resize(DenseIndex,DenseIndex,DenseIndex) {}
-    const T *data() const { return m_data.array; }
-    T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols);
+      EIGEN_UNUSED_VARIABLE(size);
+      EIGEN_UNUSED_VARIABLE(rows);
+      EIGEN_UNUSED_VARIABLE(cols);
+    }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };

 // null matrix
 template<typename T, int _Rows, int _Cols, int _Options> class DenseStorage<T, 0, _Rows, _Cols, _Options>
 {
  public:
-    DenseStorage() {}
-    DenseStorage(internal::constructor_without_unaligned_array_assert) {}
-    DenseStorage(const DenseStorage&) {}
-    DenseStorage& operator=(const DenseStorage&) { return *this; }
-    DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
-    void swap(DenseStorage& ) {}
-    static DenseIndex rows(void) {return _Rows;}
-    static DenseIndex cols(void) {return _Cols;}
-    void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
-    void resize(DenseIndex,DenseIndex,DenseIndex) {}
-    const T *data() const { return 0; }
-    T *data() { return 0; }
+    EIGEN_DEVICE_FUNC DenseStorage() {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {}
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; }
+    EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {}
+    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {}
+    EIGEN_DEVICE_FUNC const T *data() const { return 0; }
+    EIGEN_DEVICE_FUNC T *data() { return 0; }
 };

 // more specializations for null matrices; these are necessary to resolve ambiguities
@@ -173,74 +248,74 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, 0, Dynamic,
 template<typename T, int Size, int _Options> class DenseStorage<T, Size, Dynamic, Dynamic, _Options>
 {
    internal::plain_array<T,Size,_Options> m_data;
-    DenseIndex m_rows;
-    DenseIndex m_cols;
+    Index m_rows;
+    Index m_cols;
  public:
-    DenseStorage() : m_rows(0), m_cols(0) {}
-    DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {}
-    DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
-    DenseStorage& operator=(const DenseStorage& other)
-    {
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) 
+    { 
      if (this != &other)
      {
        m_data = other.m_data;
        m_rows = other.m_rows;
        m_cols = other.m_cols;
      }
-      return *this;
+      return *this; 
    }
-    DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {}
-    void swap(DenseStorage& other)
+    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
-    DenseIndex rows() const {return m_rows;}
-    DenseIndex cols() const {return m_cols;}
-    void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
-    void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; }
-    const T *data() const { return m_data.array; }
-    T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC Index rows() const {return m_rows;}
+    EIGEN_DEVICE_FUNC Index cols() const {return m_cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
+    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };

 // dynamic-size matrix with fixed-size storage and fixed width
 template<typename T, int Size, int _Cols, int _Options> class DenseStorage<T, Size, Dynamic, _Cols, _Options>
 {
    internal::plain_array<T,Size,_Options> m_data;
-    DenseIndex m_rows;
+    Index m_rows;
  public:
-    DenseStorage() : m_rows(0) {}
-    DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {}
-    DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
-    DenseStorage& operator=(const DenseStorage& other)
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) 
    {
      if (this != &other)
      {
        m_data = other.m_data;
        m_rows = other.m_rows;
      }
-      return *this;
+      return *this; 
    }
-    DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {}
-    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
-    DenseIndex rows(void) const {return m_rows;}
-    DenseIndex cols(void) const {return _Cols;}
-    void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
-    void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; }
-    const T *data() const { return m_data.array; }
-    T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+    EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; }
+    EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };

 // dynamic-size matrix with fixed-size storage and fixed height
 template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Size, _Rows, Dynamic, _Options>
 {
    internal::plain_array<T,Size,_Options> m_data;
-    DenseIndex m_cols;
+    Index m_cols;
  public:
-    DenseStorage() : m_cols(0) {}
-    DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
      : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {}
-    DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
-    DenseStorage& operator=(const DenseStorage& other)
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {}
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
    {
      if (this != &other)
      {
@@ -249,38 +324,62 @@ template<typename T, int Size, int _Rows, int _Options> class DenseStorage<T, Si
      }
      return *this;
    }
-    DenseStorage(DenseIndex, DenseIndex, DenseIndex nbCols) : m_cols(nbCols) {}
-    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
-    DenseIndex rows(void) const {return _Rows;}
-    DenseIndex cols(void) const {return m_cols;}
-    void conservativeResize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
-    void resize(DenseIndex, DenseIndex, DenseIndex nbCols) { m_cols = nbCols; }
-    const T *data() const { return m_data.array; }
-    T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC DenseStorage(Index, Index, Index cols) : m_cols(cols) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+    EIGEN_DEVICE_FUNC Index rows(void) const {return _Rows;}
+    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+    void conservativeResize(Index, Index, Index cols) { m_cols = cols; }
+    void resize(Index, Index, Index cols) { m_cols = cols; }
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };

 // purely dynamic matrix.
 template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynamic, _Options>
 {
    T *m_data;
-    DenseIndex m_rows;
-    DenseIndex m_cols;
+    Index m_rows;
+    Index m_cols;
  public:
-    DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
-    DenseStorage(internal::constructor_without_unaligned_array_assert)
+    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert)
       : m_data(0), m_rows(0), m_cols(0) {}
-    DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
-      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows), m_cols(nbCols)
-    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    DenseStorage(DenseStorage&& other)
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows), m_cols(cols)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*other.m_cols))
+      , m_rows(other.m_rows)
+      , m_cols(other.m_cols)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols)
+      internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        DenseStorage tmp(other);
+        this->swap(tmp);
+      }
+      return *this;
+    }
+#if EIGEN_HAS_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
+    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
      : m_data(std::move(other.m_data))
      , m_rows(std::move(other.m_rows))
      , m_cols(std::move(other.m_cols))
    {
      other.m_data = nullptr;
+      other.m_rows = 0;
+      other.m_cols = 0;
    }
-    DenseStorage& operator=(DenseStorage&& other)
+    EIGEN_DEVICE_FUNC
+    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
    {
      using std::swap;
      swap(m_data, other.m_data);
@@ -289,18 +388,18 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
      return *this;
    }
 #endif
-    ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
-    void swap(DenseStorage& other)
+    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, m_rows*m_cols); }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other)
    { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); }
-    DenseIndex rows(void) const {return m_rows;}
-    DenseIndex cols(void) const {return m_cols;}
-    void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+    void conservativeResize(Index size, Index rows, Index cols)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*m_cols);
-      m_rows = nbRows;
-      m_cols = nbCols;
+      m_rows = rows;
+      m_cols = cols;
    }
-    void resize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols)
+    EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols)
    {
      if(size != m_rows*m_cols)
      {
@@ -309,36 +408,56 @@ template<typename T, int _Options> class DenseStorage<T, Dynamic, Dynamic, Dynam
          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
        else
          m_data = 0;
-        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
      }
-      m_rows = nbRows;
-      m_cols = nbCols;
+      m_rows = rows;
+      m_cols = cols;
    }
-    const T *data() const { return m_data; }
-    T *data() { return m_data; }
-  private:
-    DenseStorage(const DenseStorage&);
-    DenseStorage& operator=(const DenseStorage&);
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data; }
 };

 // matrix with dynamic width and fixed height (so that matrix has dynamic size).
 template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Rows, Dynamic, _Options>
 {
    T *m_data;
-    DenseIndex m_cols;
+    Index m_cols;
  public:
-    DenseStorage() : m_data(0), m_cols(0) {}
-    DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
-    DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(nbCols)
-    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    DenseStorage(DenseStorage&& other)
+    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {}
+    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_cols(cols)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0);
+      EIGEN_UNUSED_VARIABLE(rows);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(_Rows*other.m_cols))
+      , m_cols(other.m_cols)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows)
+      internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        DenseStorage tmp(other);
+        this->swap(tmp);
+      }
+      return *this;
+    }    
+#if EIGEN_HAS_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
+    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
      : m_data(std::move(other.m_data))
      , m_cols(std::move(other.m_cols))
    {
      other.m_data = nullptr;
+      other.m_cols = 0;
    }
-    DenseStorage& operator=(DenseStorage&& other)
+    EIGEN_DEVICE_FUNC
+    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
    {
      using std::swap;
      swap(m_data, other.m_data);
@@ -346,16 +465,16 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
      return *this;
    }
 #endif
-    ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
-    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
-    static DenseIndex rows(void) {return _Rows;}
-    DenseIndex cols(void) const {return m_cols;}
-    void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols)
+    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Rows*m_cols); }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); }
+    EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;}
+    EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, _Rows*m_cols);
-      m_cols = nbCols;
+      m_cols = cols;
    }
-    EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex nbCols)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols)
    {
      if(size != _Rows*m_cols)
      {
@@ -364,35 +483,55 @@ template<typename T, int _Rows, int _Options> class DenseStorage<T, Dynamic, _Ro
          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
        else
          m_data = 0;
-        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
      }
-      m_cols = nbCols;
+      m_cols = cols;
    }
-    const T *data() const { return m_data; }
-    T *data() { return m_data; }
-  private:
-    DenseStorage(const DenseStorage&);
-    DenseStorage& operator=(const DenseStorage&);
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data; }
 };

 // matrix with dynamic height and fixed width (so that matrix has dynamic size).
 template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dynamic, _Cols, _Options>
 {
    T *m_data;
-    DenseIndex m_rows;
+    Index m_rows;
  public:
-    DenseStorage() : m_data(0), m_rows(0) {}
-    DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
-    DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(nbRows)
-    { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN }
-#ifdef EIGEN_HAVE_RVALUE_REFERENCES
-    DenseStorage(DenseStorage&& other)
+    EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {}
+    explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {}
+    EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size)), m_rows(rows)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
+      eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols);
+      EIGEN_UNUSED_VARIABLE(cols);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other)
+      : m_data(internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(other.m_rows*_Cols))
+      , m_rows(other.m_rows)
+    {
+      EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols)
+      internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data);
+    }
+    EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other)
+    {
+      if (this != &other)
+      {
+        DenseStorage tmp(other);
+        this->swap(tmp);
+      }
+      return *this;
+    }    
+#if EIGEN_HAS_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
+    DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT
      : m_data(std::move(other.m_data))
      , m_rows(std::move(other.m_rows))
    {
      other.m_data = nullptr;
+      other.m_rows = 0;
    }
-    DenseStorage& operator=(DenseStorage&& other)
+    EIGEN_DEVICE_FUNC
+    DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT
    {
      using std::swap;
      swap(m_data, other.m_data);
@@ -400,16 +539,16 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
      return *this;
    }
 #endif
-    ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
-    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
-    DenseIndex rows(void) const {return m_rows;}
-    static DenseIndex cols(void) {return _Cols;}
-    void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex)
+    EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto<T,(_Options&DontAlign)==0>(m_data, _Cols*m_rows); }
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); }
+    EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;}
+    EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;}
+    void conservativeResize(Index size, Index rows, Index)
    {
      m_data = internal::conditional_aligned_realloc_new_auto<T,(_Options&DontAlign)==0>(m_data, size, m_rows*_Cols);
-      m_rows = nbRows;
+      m_rows = rows;
    }
-    EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex nbRows, DenseIndex)
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index)
    {
      if(size != m_rows*_Cols)
      {
@@ -418,15 +557,12 @@ template<typename T, int _Cols, int _Options> class DenseStorage<T, Dynamic, Dyn
          m_data = internal::conditional_aligned_new_auto<T,(_Options&DontAlign)==0>(size);
        else
          m_data = 0;
-        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN
+        EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({})
      }
-      m_rows = nbRows;
+      m_rows = rows;
    }
-    const T *data() const { return m_data; }
-    T *data() { return m_data; }
-  private:
-    DenseStorage(const DenseStorage&);
-    DenseStorage& operator=(const DenseStorage&);
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data; }
 };

 } // end namespace Eigen

--- a/external/eigen3/Eigen/src/Core/Diagonal.h
+++ b/external/eigen3/Eigen/src/Core/Diagonal.h
@@ -21,7 +21,7 @@ namespace Eigen {
  * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal
  * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal.
  *              A positive value means a superdiagonal, a negative value means a subdiagonal.
-  *              You can also use Dynamic so the index can be set at runtime.
+  *              You can also use DynamicIndex so the index can be set at runtime.
  *
  * The matrix is not required to be square.
  *
@@ -37,7 +37,7 @@ template<typename MatrixType, int DiagIndex>
 struct traits<Diagonal<MatrixType,DiagIndex> >
 : traits<MatrixType>
 {
-  typedef typename nested<MatrixType>::type MatrixTypeNested;
+  typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
  typedef typename remove_reference<MatrixTypeNested>::type _MatrixTypeNested;
  typedef typename MatrixType::StorageKind StorageKind;
  enum {
@@ -52,8 +52,7 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
                                                 MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
    MaxColsAtCompileTime = 1,
    MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
-    Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
-    CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
+    Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
    MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
    InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,
    OuterStrideAtCompileTime = 0
@@ -70,20 +69,28 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
    typedef typename internal::dense_xpr_base<Diagonal>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)

-    inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
+    EIGEN_DEVICE_FUNC
+    explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}

    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)

+    EIGEN_DEVICE_FUNC
    inline Index rows() const
-    { return m_index.value()<0 ? (std::min<Index>)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min<Index>)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
+    {
+      return m_index.value()<0 ? numext::mini<Index>(m_matrix.cols(),m_matrix.rows()+m_index.value())
+                               : numext::mini<Index>(m_matrix.rows(),m_matrix.cols()-m_index.value());
+    }

+    EIGEN_DEVICE_FUNC
    inline Index cols() const { return 1; }

+    EIGEN_DEVICE_FUNC
    inline Index innerStride() const
    {
      return m_matrix.outerStride() + 1;
    }

+    EIGEN_DEVICE_FUNC
    inline Index outerStride() const
    {
      return 0;
@@ -95,62 +102,75 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
                       const Scalar
                     >::type ScalarWithConstIfNotLvalue;

-    inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
-    inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
+    EIGEN_DEVICE_FUNC
+    inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
+    EIGEN_DEVICE_FUNC
+    inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }

+    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index row, Index)
    {
      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
-      return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
+      return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
    }

+    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index row, Index) const
    {
-      return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
+      return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
    }

+    EIGEN_DEVICE_FUNC
    inline CoeffReturnType coeff(Index row, Index) const
    {
      return m_matrix.coeff(row+rowOffset(), row+colOffset());
    }

+    EIGEN_DEVICE_FUNC
    inline Scalar& coeffRef(Index idx)
    {
      EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
-      return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+      return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
    }

+    EIGEN_DEVICE_FUNC
    inline const Scalar& coeffRef(Index idx) const
    {
-      return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
+      return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
    }

+    EIGEN_DEVICE_FUNC
    inline CoeffReturnType coeff(Index idx) const
    {
      return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
    }

-    const typename internal::remove_all<typename MatrixType::Nested>::type& 
+    EIGEN_DEVICE_FUNC
+    inline const typename internal::remove_all<typename MatrixType::Nested>::type& 
    nestedExpression() const 
    {
      return m_matrix;
    }

-    int index() const
+    EIGEN_DEVICE_FUNC
+    inline Index index() const
    {
      return m_index.value();
    }

  protected:
-    typename MatrixType::Nested m_matrix;
+    typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
    const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;

  private:
    // some compilers may fail to optimize std::max etc in case of compile-time constants...
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
+    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
-    // triger a compile time error is someone try to call packet
+    // trigger a compile-time error if someone try to call packet
    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
    template<int LoadMode> typename MatrixType::PacketReturnType packet(Index,Index) const;
 };
@@ -167,7 +187,7 @@ template<typename Derived>
 inline typename MatrixBase<Derived>::DiagonalReturnType
 MatrixBase<Derived>::diagonal()
 {
-  return derived();
+  return DiagonalReturnType(derived());
 }

 /** This is the const version of diagonal(). */
@@ -216,20 +236,20 @@ MatrixBase<Derived>::diagonal(Index index) const
  *
  * \sa MatrixBase::diagonal(), class Diagonal */
 template<typename Derived>
-template<int Index>
-inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index>::Type
+template<int Index_>
+inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type
 MatrixBase<Derived>::diagonal()
 {
-  return derived();
+  return typename DiagonalIndexReturnType<Index_>::Type(derived());
 }

 /** This is the const version of diagonal<int>(). */
 template<typename Derived>
-template<int Index>
-inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index>::Type
+template<int Index_>
+inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type
 MatrixBase<Derived>::diagonal() const
 {
-  return derived();
+  return typename ConstDiagonalIndexReturnType<Index_>::Type(derived());
 }

 } // end namespace Eigen

--- a/external/eigen3/Eigen/src/Core/DiagonalMatrix.h
+++ b/external/eigen3/Eigen/src/Core/DiagonalMatrix.h
@@ -22,7 +22,7 @@ class DiagonalBase : public EigenBase<Derived>
    typedef typename DiagonalVectorType::Scalar Scalar;
    typedef typename DiagonalVectorType::RealScalar RealScalar;
    typedef typename internal::traits<Derived>::StorageKind StorageKind;
-    typedef typename internal::traits<Derived>::Index Index;
+    typedef typename internal::traits<Derived>::StorageIndex StorageIndex;

    enum {
      RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
@@ -30,79 +30,61 @@ class DiagonalBase : public EigenBase<Derived>
      MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
      MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
      IsVectorAtCompileTime = 0,
-      Flags = 0
+      Flags = NoPreferredStorageOrderBit
    };

    typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime> DenseMatrixType;
    typedef DenseMatrixType DenseType;
    typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;

+    EIGEN_DEVICE_FUNC
    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    EIGEN_DEVICE_FUNC
    inline Derived& derived() { return *static_cast<Derived*>(this); }

+    EIGEN_DEVICE_FUNC
    DenseMatrixType toDenseMatrix() const { return derived(); }
-    template<typename DenseDerived>
-    void evalTo(MatrixBase<DenseDerived> &other) const;
-    template<typename DenseDerived>
-    void addTo(MatrixBase<DenseDerived> &other) const
-    { other.diagonal() += diagonal(); }
-    template<typename DenseDerived>
-    void subTo(MatrixBase<DenseDerived> &other) const
-    { other.diagonal() -= diagonal(); }
-
+    
+    EIGEN_DEVICE_FUNC
    inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
+    EIGEN_DEVICE_FUNC
    inline DiagonalVectorType& diagonal() { return derived().diagonal(); }

+    EIGEN_DEVICE_FUNC
    inline Index rows() const { return diagonal().size(); }
+    EIGEN_DEVICE_FUNC
    inline Index cols() const { return diagonal().size(); }

-    /** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
-      */
    template<typename MatrixDerived>
-    const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
+    EIGEN_DEVICE_FUNC
+    const Product<Derived,MatrixDerived,LazyProduct>
    operator*(const MatrixBase<MatrixDerived> &matrix) const
    {
-      return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived());
+      return Product<Derived, MatrixDerived, LazyProduct>(derived(),matrix.derived());
    }

-    inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
+    typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> > InverseReturnType;
+    EIGEN_DEVICE_FUNC
+    inline const InverseReturnType
    inverse() const
    {
-      return diagonal().cwiseInverse();
+      return InverseReturnType(diagonal().cwiseInverse());
    }
    
-    inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
+    EIGEN_DEVICE_FUNC
+    inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
    operator*(const Scalar& scalar) const
    {
-      return diagonal() * scalar;
+      return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);
    }
-    friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
+    EIGEN_DEVICE_FUNC
+    friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >
    operator*(const Scalar& scalar, const DiagonalBase& other)
    {
-      return other.diagonal() * scalar;
-    }
-    
-    #ifdef EIGEN2_SUPPORT
-    template<typename OtherDerived>
-    bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
-    {
-      return diagonal().isApprox(other.diagonal(), precision);
+      return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());
    }
-    template<typename OtherDerived>
-    bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
-    {
-      return toDenseMatrix().isApprox(other, precision);
-    }
-    #endif
 };

-template<typename Derived>
-template<typename DenseDerived>
-void DiagonalBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const
-{
-  other.setZero();
-  other.diagonal() = diagonal();
-}
 #endif

 /** \class DiagonalMatrix
@@ -124,10 +106,9 @@ struct traits<DiagonalMatrix<_Scalar,SizeAtCompileTime,MaxSizeAtCompileTime> >
 : traits<Matrix<_Scalar,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
 {
  typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType;
-  typedef Dense StorageKind;
-  typedef DenseIndex Index;
+  typedef DiagonalShape StorageKind;
  enum {
-    Flags = LvalueBit
+    Flags = LvalueBit | NoPreferredStorageOrderBit
  };
 };
 }
@@ -141,7 +122,7 @@ class DiagonalMatrix
    typedef const DiagonalMatrix& Nested;
    typedef _Scalar Scalar;
    typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
-    typedef typename internal::traits<DiagonalMatrix>::Index Index;
+    typedef typename internal::traits<DiagonalMatrix>::StorageIndex StorageIndex;
    #endif

  protected:
@@ -151,24 +132,31 @@ class DiagonalMatrix
  public:

    /** const version of diagonal(). */
+    EIGEN_DEVICE_FUNC
    inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
    /** \returns a reference to the stored vector of diagonal coefficients. */
+    EIGEN_DEVICE_FUNC
    inline DiagonalVectorType& diagonal() { return m_diagonal; }

    /** Default constructor without initialization */
+    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix() {}

    /** Constructs a diagonal matrix with given dimension  */
-    inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
+    EIGEN_DEVICE_FUNC
+    explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}

    /** 2D constructor. */
+    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}

    /** 3D constructor. */
+    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}

    /** Copy constructor. */
    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}

    #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -178,11 +166,13 @@ class DiagonalMatrix

    /** generic constructor from expression of the diagonal coefficients */
    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
    {}

    /** Copy operator. */
    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
    DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
    {
      m_diagonal = other.diagonal();
@@ -193,6 +183,7 @@ class DiagonalMatrix
    /** This is a special case of the templated operator=. Its purpose is to
      * prevent a default operator= from hiding the templated operator=.
      */
+    EIGEN_DEVICE_FUNC
    DiagonalMatrix& operator=(const DiagonalMatrix& other)
    {
      m_diagonal = other.diagonal();
@@ -201,14 +192,19 @@ class DiagonalMatrix
    #endif

    /** Resizes to given size. */
+    EIGEN_DEVICE_FUNC
    inline void resize(Index size) { m_diagonal.resize(size); }
    /** Sets all coefficients to zero. */
+    EIGEN_DEVICE_FUNC
    inline void setZero() { m_diagonal.setZero(); }
    /** Resizes and sets all coefficients to zero. */
+    EIGEN_DEVICE_FUNC
    inline void setZero(Index size) { m_diagonal.setZero(size); }
    /** Sets this matrix to be the identity matrix of the current size. */
+    EIGEN_DEVICE_FUNC
    inline void setIdentity() { m_diagonal.setOnes(); }
    /** Sets this matrix to be the identity matrix of the given size. */
+    EIGEN_DEVICE_FUNC
    inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
 };

@@ -232,14 +228,15 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> >
 {
  typedef _DiagonalVectorType DiagonalVectorType;
  typedef typename DiagonalVectorType::Scalar Scalar;
-  typedef typename DiagonalVectorType::Index Index;
-  typedef typename DiagonalVectorType::StorageKind StorageKind;
+  typedef typename DiagonalVectorType::StorageIndex StorageIndex;
+  typedef DiagonalShape StorageKind;
+  typedef typename traits<DiagonalVectorType>::XprKind XprKind;
  enum {
    RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
    ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
-    MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
-    MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
-    Flags =  traits<DiagonalVectorType>::Flags & LvalueBit
+    MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    Flags =  (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
  };
 };
 }
@@ -255,9 +252,11 @@ class DiagonalWrapper
    #endif

    /** Constructor from expression of diagonal coefficients to wrap. */
-    inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
+    EIGEN_DEVICE_FUNC
+    explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}

    /** \returns a const reference to the wrapped expression of diagonal coefficients. */
+    EIGEN_DEVICE_FUNC
    const DiagonalVectorType& diagonal() const { return m_diagonal; }

  protected:
@@ -277,7 +276,7 @@ template<typename Derived>
 inline const DiagonalWrapper<const Derived>
 MatrixBase<Derived>::asDiagonal() const
 {
-  return derived();
+  return DiagonalWrapper<const Derived>(derived());
 }

 /** \returns true if *this is approximately equal to a diagonal matrix,
@@ -291,12 +290,11 @@ MatrixBase<Derived>::asDiagonal() const
 template<typename Derived>
 bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
 {
-  using std::abs;
  if(cols() != rows()) return false;
  RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
  for(Index j = 0; j < cols(); ++j)
  {
-    RealScalar absOnDiagonal = abs(coeff(j,j));
+    RealScalar absOnDiagonal = numext::abs(coeff(j,j));
    if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
  }
  for(Index j = 0; j < cols(); ++j)
@@ -308,6 +306,38 @@ bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
  return true;
 }

+namespace internal {
+
+template<> struct storage_kind_to_shape<DiagonalShape> { typedef DiagonalShape Shape; };
+
+struct Diagonal2Dense {};
+
+template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2Dense Kind; };
+
+// Diagonal matrix to Dense assignment
+template< typename DstXprType, typename SrcXprType, typename Functor>
+struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense>
+{
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
+    dst.setZero();
+    dst.diagonal() = src.diagonal();
+  }
+  
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  { dst.diagonal() += src.diagonal(); }
+  
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
+  { dst.diagonal() -= src.diagonal(); }
+};
+
+} // namespace internal
+
 } // end namespace Eigen

 #endif // EIGEN_DIAGONALMATRIX_H
--- a/external/eigen3/Eigen/src/Core/DiagonalProduct.h
+++ b/external/eigen3/Eigen/src/Core/DiagonalProduct.h
@@ -13,117 +13,14 @@

 namespace Eigen { 

-namespace internal {
-template<typename MatrixType, typename DiagonalType, int ProductOrder>
-struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
- : traits<MatrixType>
-{
-  typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
-  enum {
-    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-    ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-    MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-    MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
-
-    _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
-    _ScalarAccessOnDiag =  !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
-                          ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
-    _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
-    // FIXME currently we need same types, but in the future the next rule should be the one
-    //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
-    _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
-    _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,
-
-    Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
-    Cost0 = EIGEN_ADD_COST(NumTraits<Scalar>::MulCost, MatrixType::CoeffReadCost),
-    CoeffReadCost = EIGEN_ADD_COST(Cost0,DiagonalType::DiagonalVectorType::CoeffReadCost)
-  };
-};
-}
-
-template<typename MatrixType, typename DiagonalType, int ProductOrder>
-class DiagonalProduct : internal::no_assignment_operator,
-                        public MatrixBase<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
-{
-  public:
-
-    typedef MatrixBase<DiagonalProduct> Base;
-    EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct)
-
-    inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal)
-      : m_matrix(matrix), m_diagonal(diagonal)
-    {
-      eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols()));
-    }
-
-    EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); }
-    EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); }
-
-    EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
-    {
-      return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col);
-    }
-    
-    EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
-    {
-      enum {
-        StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
-      };
-      return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const
-    {
-      enum {
-        StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor
-      };
-      const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col;
-      return packet_impl<LoadMode>(row,col,indexInDiagonalVector,typename internal::conditional<
-        ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft)
-       ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type());
-    }
-    
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const
-    {
-      enum {
-        StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor
-      };
-      return packet<LoadMode>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
-    }
-
-  protected:
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const
-    {
-      return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
-                     internal::pset1<PacketScalar>(m_diagonal.diagonal().coeff(id)));
-    }
-
-    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const
-    {
-      enum {
-        InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
-        DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
-      };
-      return internal::pmul(m_matrix.template packet<LoadMode>(row, col),
-                     m_diagonal.diagonal().template packet<DiagonalVectorPacketLoadMode>(id));
-    }
-
-    typename MatrixType::Nested m_matrix;
-    typename DiagonalType::Nested m_diagonal;
-};
-
 /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
  */
 template<typename Derived>
 template<typename DiagonalDerived>
-inline const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
+inline const Product<Derived, DiagonalDerived, LazyProduct>
 MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const
 {
-  return DiagonalProduct<Derived, DiagonalDerived, OnTheRight>(derived(), a_diagonal.derived());
+  return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived());
 }

 } // end namespace Eigen

--- a/external/eigen3/Eigen/src/Core/Dot.h
+++ b/external/eigen3/Eigen/src/Core/Dot.h
@@ -28,26 +28,31 @@ template<typename T, typename U,
 >
 struct dot_nocheck
 {
-  typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
+  typedef typename conj_prod::result_type ResScalar;
+  EIGEN_DEVICE_FUNC
  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
-    return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
+    return a.template binaryExpr<conj_prod>(b).sum();
  }
 };

 template<typename T, typename U>
 struct dot_nocheck<T, U, true>
 {
-  typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
+  typedef typename conj_prod::result_type ResScalar;
+  EIGEN_DEVICE_FUNC
  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
-    return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
+    return a.transpose().template binaryExpr<conj_prod>(b).sum();
  }
 };

 } // end namespace internal

-/** \returns the dot product of *this with other.
+/** \fn MatrixBase::dot
+  * \returns the dot product of *this with other.
  *
  * \only_for_vectors
  *
@@ -59,55 +64,30 @@ struct dot_nocheck<T, U, true>
  */
 template<typename Derived>
 template<typename OtherDerived>
-typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+EIGEN_DEVICE_FUNC
+typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
+#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG))
  typedef internal::scalar_conj_product_op<Scalar,typename OtherDerived::Scalar> func;
  EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar);
-
+#endif
+  
  eigen_assert(size() == other.size());

  return internal::dot_nocheck<Derived,OtherDerived>::run(*this, other);
 }

-#ifdef EIGEN2_SUPPORT
-/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable
-  * (conjugating the second variable). Of course this only makes a difference in the complex case.
-  *
-  * This method is only available in EIGEN2_SUPPORT mode.
-  *
-  * \only_for_vectors
-  *
-  * \sa dot()
-  */
-template<typename Derived>
-template<typename OtherDerived>
-typename internal::traits<Derived>::Scalar
-MatrixBase<Derived>::eigen2_dot(const MatrixBase<OtherDerived>& other) const
-{
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
-  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived)
-  EIGEN_STATIC_ASSERT((internal::is_same<Scalar, typename OtherDerived::Scalar>::value),
-    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
-  eigen_assert(size() == other.size());
-
-  return internal::dot_nocheck<OtherDerived,Derived>::run(other,*this);
-}
-#endif
-
-
 //---------- implementation of L2 norm and related functions ----------

 /** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm.
  * In both cases, it consists in the sum of the square of all the matrix entries.
  * For vectors, this is also equals to the dot product of \c *this with itself.
  *
-  * \sa dot(), norm()
+  * \sa dot(), norm(), lpNorm()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const
@@ -119,16 +99,18 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
  * In both cases, it consists in the square root of the sum of the square of all the matrix entries.
  * For vectors, this is also equals to the square root of the dot product of \c *this with itself.
  *
-  * \sa dot(), squaredNorm()
+  * \sa lpNorm(), dot(), squaredNorm()
  */
 template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
 {
-  using std::sqrt;
-  return sqrt(squaredNorm());
+  return numext::sqrt(squaredNorm());
 }

-/** \returns an expression of the quotient of *this by its own norm.
+/** \returns an expression of the quotient of \c *this by its own norm.
+  *
+  * \warning If the input vector is too small (i.e., this->norm()==0),
+  *          then this function returns a copy of the input.
  *
  * \only_for_vectors
  *
@@ -138,22 +120,77 @@ template<typename Derived>
 inline const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
-  typedef typename internal::nested<Derived>::type Nested;
-  typedef typename internal::remove_reference<Nested>::type _Nested;
+  typedef typename internal::nested_eval<Derived,2>::type _Nested;
  _Nested n(derived());
-  return n / n.norm();
+  RealScalar z = n.squaredNorm();
+  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+  if(z>RealScalar(0))
+    return n / numext::sqrt(z);
+  else
+    return n;
 }

 /** Normalizes the vector, i.e. divides it by its own norm.
  *
  * \only_for_vectors
  *
+  * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
+  *
  * \sa norm(), normalized()
  */
 template<typename Derived>
 inline void MatrixBase<Derived>::normalize()
 {
-  *this /= norm();
+  RealScalar z = squaredNorm();
+  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+  if(z>RealScalar(0))
+    derived() /= numext::sqrt(z);
+}
+
+/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
+  *
+  * \only_for_vectors
+  *
+  * This method is analogue to the normalized() method, but it reduces the risk of
+  * underflow and overflow when computing the norm.
+  *
+  * \warning If the input vector is too small (i.e., this->norm()==0),
+  *          then this function returns a copy of the input.
+  *
+  * \sa stableNorm(), stableNormalize(), normalized()
+  */
+template<typename Derived>
+inline const typename MatrixBase<Derived>::PlainObject
+MatrixBase<Derived>::stableNormalized() const
+{
+  typedef typename internal::nested_eval<Derived,3>::type _Nested;
+  _Nested n(derived());
+  RealScalar w = n.cwiseAbs().maxCoeff();
+  RealScalar z = (n/w).squaredNorm();
+  if(z>RealScalar(0))
+    return n / (numext::sqrt(z)*w);
+  else
+    return n;
+}
+
+/** Normalizes the vector while avoid underflow and overflow
+  *
+  * \only_for_vectors
+  *
+  * This method is analogue to the normalize() method, but it reduces the risk of
+  * underflow and overflow when computing the norm.
+  *
+  * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
+  *
+  * \sa stableNorm(), stableNormalized(), normalize()
+  */
+template<typename Derived>
+inline void MatrixBase<Derived>::stableNormalize()
+{
+  RealScalar w = cwiseAbs().maxCoeff();
+  RealScalar z = (derived()/w).squaredNorm();
+  if(z>RealScalar(0))
+    derived() /= numext::sqrt(z)*w;
 }

 //---------- implementation of other norms ----------
@@ -164,9 +201,10 @@ template<typename Derived, int p>
 struct lpNorm_selector
 {
  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
  static inline RealScalar run(const MatrixBase<Derived>& m)
  {
-    using std::pow;
+    EIGEN_USING_STD_MATH(pow)
    return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p);
  }
 };
@@ -174,6 +212,7 @@ struct lpNorm_selector
 template<typename Derived>
 struct lpNorm_selector<Derived, 1>
 {
+  EIGEN_DEVICE_FUNC
  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.cwiseAbs().sum();
@@ -183,6 +222,7 @@ struct lpNorm_selector<Derived, 1>
 template<typename Derived>
 struct lpNorm_selector<Derived, 2>
 {
+  EIGEN_DEVICE_FUNC
  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
  {
    return m.norm();
@@ -192,23 +232,35 @@ struct lpNorm_selector<Derived, 2>
 template<typename Derived>
 struct lpNorm_selector<Derived, Infinity>
 {
-  static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
+  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
+  static inline RealScalar run(const MatrixBase<Derived>& m)
  {
+    if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0))
+      return RealScalar(0);
    return m.cwiseAbs().maxCoeff();
  }
 };

 } // end namespace internal

-/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
-  *          of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
-  *          norm, that is the maximum of the absolute values of the coefficients of *this.
+/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values
+  *          of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$
+  *          norm, that is the maximum of the absolute values of the coefficients of \c *this.
+  *
+  * In all cases, if \c *this is empty, then the value 0 is returned.
+  *
+  * \note For matrices, this function does not compute the <a href="https://en.wikipedia.org/wiki/Operator_norm">operator-norm</a>. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink.
  *
  * \sa norm()
  */
 template<typename Derived>
 template<int p>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+#else
+MatrixBase<Derived>::RealScalar
+#endif
 MatrixBase<Derived>::lpNorm() const
 {
  return internal::lpNorm_selector<Derived, p>::run(*this);
@@ -227,8 +279,8 @@ template<typename OtherDerived>
 bool MatrixBase<Derived>::isOrthogonal
 (const MatrixBase<OtherDerived>& other, const RealScalar& prec) const
 {
-  typename internal::nested<Derived,2>::type nested(derived());
-  typename internal::nested<OtherDerived,2>::type otherNested(other.derived());
+  typename internal::nested_eval<Derived,2>::type nested(derived());
+  typename internal::nested_eval<OtherDerived,2>::type otherNested(other.derived());
  return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
 }

@@ -246,13 +298,13 @@ bool MatrixBase<Derived>::isOrthogonal
 template<typename Derived>
 bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const
 {
-  typename Derived::Nested nested(derived());
+  typename internal::nested_eval<Derived,1>::type self(derived());
  for(Index i = 0; i < cols(); ++i)
  {
-    if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
+    if(!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec))
      return false;
    for(Index j = 0; j < i; ++j)
-      if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast<Scalar>(1), prec))
+      if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec))
        return false;
  }
  return true;

--- a/external/eigen3/Eigen/src/Core/EigenBase.h
+++ b/external/eigen3/Eigen/src/Core/EigenBase.h
@@ -13,7 +13,10 @@

 namespace Eigen {

-/** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
+/** \class EigenBase
+  * \ingroup Core_Module
+  * 
+  * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
  *
  * In other words, an EigenBase object is an object that can be copied into a MatrixBase.
  *
@@ -21,39 +24,57 @@ namespace Eigen {
  *
  * Notice that this class is trivial, it is only used to disambiguate overloaded functions.
  *
-  * \sa \ref TopicClassHierarchy
+  * \sa \blank \ref TopicClassHierarchy
  */
 template<typename Derived> struct EigenBase
 {
 //   typedef typename internal::plain_matrix_type<Derived>::type PlainObject;
-
+  
+  /** \brief The interface type of indices
+    * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
+    * \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead.
+    * \sa StorageIndex, \ref TopicPreprocessorDirectives.
+    */
+  typedef Eigen::Index Index;
+
+  // FIXME is it needed?
  typedef typename internal::traits<Derived>::StorageKind StorageKind;
-  typedef typename internal::traits<Derived>::Index Index;

  /** \returns a reference to the derived object */
+  EIGEN_DEVICE_FUNC
  Derived& derived() { return *static_cast<Derived*>(this); }
  /** \returns a const reference to the derived object */
+  EIGEN_DEVICE_FUNC
  const Derived& derived() const { return *static_cast<const Derived*>(this); }

+  EIGEN_DEVICE_FUNC
  inline Derived& const_cast_derived() const
  { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
+  EIGEN_DEVICE_FUNC
  inline const Derived& const_derived() const
  { return *static_cast<const Derived*>(this); }

  /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
+  EIGEN_DEVICE_FUNC
  inline Index rows() const { return derived().rows(); }
  /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
+  EIGEN_DEVICE_FUNC
  inline Index cols() const { return derived().cols(); }
  /** \returns the number of coefficients, which is rows()*cols().
    * \sa rows(), cols(), SizeAtCompileTime. */
+  EIGEN_DEVICE_FUNC
  inline Index size() const { return rows() * cols(); }

  /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
-  template<typename Dest> inline void evalTo(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void evalTo(Dest& dst) const
  { derived().evalTo(dst); }

  /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
-  template<typename Dest> inline void addTo(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void addTo(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@@ -63,7 +84,9 @@ template<typename Derived> struct EigenBase
  }

  /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
-  template<typename Dest> inline void subTo(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void subTo(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@@ -73,7 +96,8 @@ template<typename Derived> struct EigenBase
  }

  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
-  template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@@ -81,7 +105,8 @@ template<typename Derived> struct EigenBase
  }

  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
-  template<typename Dest> inline void applyThisOnTheLeft(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
  {
    // This is the default implementation,
    // derived class can reimplement it in a more optimized way.
@@ -104,25 +129,28 @@ template<typename Derived> struct EigenBase
  */
 template<typename Derived>
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
 {
-  other.derived().evalTo(derived());
+  call_assignment(derived(), other.derived());
  return derived();
 }

 template<typename Derived>
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
 {
-  other.derived().addTo(derived());
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

 template<typename Derived>
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
 {
-  other.derived().subTo(derived());
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }


--- a/external/eigen3/Eigen/src/Core/Flagged.h
+++ b/external/eigen3/Eigen/src/Core/Flagged.h
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_FLAGGED_H
-#define EIGEN_FLAGGED_H
-
-namespace Eigen { 
-
-/** \class Flagged
-  * \ingroup Core_Module
-  *
-  * \brief Expression with modified flags
-  *
-  * \param ExpressionType the type of the object of which we are modifying the flags
-  * \param Added the flags added to the expression
-  * \param Removed the flags removed from the expression (has priority over Added).
-  *
-  * This class represents an expression whose flags have been modified.
-  * It is the return type of MatrixBase::flagged()
-  * and most of the time this is the only way it is used.
-  *
-  * \sa MatrixBase::flagged()
-  */
-
-namespace internal {
-template<typename ExpressionType, unsigned int Added, unsigned int Removed>
-struct traits<Flagged<ExpressionType, Added, Removed> > : traits<ExpressionType>
-{
-  enum { Flags = (ExpressionType::Flags | Added) & ~Removed };
-};
-}
-
-template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged
-  : public MatrixBase<Flagged<ExpressionType, Added, Removed> >
-{
-  public:
-
-    typedef MatrixBase<Flagged> Base;
-    
-    EIGEN_DENSE_PUBLIC_INTERFACE(Flagged)
-    typedef typename internal::conditional<internal::must_nest_by_value<ExpressionType>::ret,
-        ExpressionType, const ExpressionType&>::type ExpressionTypeNested;
-    typedef typename ExpressionType::InnerIterator InnerIterator;
-
-    inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {}
-
-    inline Index rows() const { return m_matrix.rows(); }
-    inline Index cols() const { return m_matrix.cols(); }
-    inline Index outerStride() const { return m_matrix.outerStride(); }
-    inline Index innerStride() const { return m_matrix.innerStride(); }
-
-    inline CoeffReturnType coeff(Index row, Index col) const
-    {
-      return m_matrix.coeff(row, col);
-    }
-
-    inline CoeffReturnType coeff(Index index) const
-    {
-      return m_matrix.coeff(index);
-    }
-    
-    inline const Scalar& coeffRef(Index row, Index col) const
-    {
-      return m_matrix.const_cast_derived().coeffRef(row, col);
-    }
-
-    inline const Scalar& coeffRef(Index index) const
-    {
-      return m_matrix.const_cast_derived().coeffRef(index);
-    }
-
-    inline Scalar& coeffRef(Index row, Index col)
-    {
-      return m_matrix.const_cast_derived().coeffRef(row, col);
-    }
-
-    inline Scalar& coeffRef(Index index)
-    {
-      return m_matrix.const_cast_derived().coeffRef(index);
-    }
-
-    template<int LoadMode>
-    inline const PacketScalar packet(Index row, Index col) const
-    {
-      return m_matrix.template packet<LoadMode>(row, col);
-    }
-
-    template<int LoadMode>
-    inline void writePacket(Index row, Index col, const PacketScalar& x)
-    {
-      m_matrix.const_cast_derived().template writePacket<LoadMode>(row, col, x);
-    }
-
-    template<int LoadMode>
-    inline const PacketScalar packet(Index index) const
-    {
-      return m_matrix.template packet<LoadMode>(index);
-    }
-
-    template<int LoadMode>
-    inline void writePacket(Index index, const PacketScalar& x)
-    {
-      m_matrix.const_cast_derived().template writePacket<LoadMode>(index, x);
-    }
-
-    const ExpressionType& _expression() const { return m_matrix; }
-
-    template<typename OtherDerived>
-    typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
-
-    template<typename OtherDerived>
-    void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
-
-  protected:
-    ExpressionTypeNested m_matrix;
-};
-
-/** \returns an expression of *this with added and removed flags
-  *
-  * This is mostly for internal use.
-  *
-  * \sa class Flagged
-  */
-template<typename Derived>
-template<unsigned int Added,unsigned int Removed>
-inline const Flagged<Derived, Added, Removed>
-DenseBase<Derived>::flagged() const
-{
-  return derived();
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_FLAGGED_H
--- a/external/eigen3/Eigen/src/Core/ForceAlignedAccess.h
+++ b/external/eigen3/Eigen/src/Core/ForceAlignedAccess.h
@@ -39,29 +39,29 @@ template<typename ExpressionType> class ForceAlignedAccess
    typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
    EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)

-    inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
+    EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}

-    inline Index rows() const { return m_expression.rows(); }
-    inline Index cols() const { return m_expression.cols(); }
-    inline Index outerStride() const { return m_expression.outerStride(); }
-    inline Index innerStride() const { return m_expression.innerStride(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); }

-    inline const CoeffReturnType coeff(Index row, Index col) const
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const
    {
      return m_expression.coeff(row, col);
    }

-    inline Scalar& coeffRef(Index row, Index col)
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col)
    {
      return m_expression.const_cast_derived().coeffRef(row, col);
    }

-    inline const CoeffReturnType coeff(Index index) const
+    EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const
    {
      return m_expression.coeff(index);
    }

-    inline Scalar& coeffRef(Index index)
+    EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index)
    {
      return m_expression.const_cast_derived().coeffRef(index);
    }
@@ -90,7 +90,7 @@ template<typename ExpressionType> class ForceAlignedAccess
      m_expression.const_cast_derived().template writePacket<Aligned>(index, x);
    }

-    operator const ExpressionType&() const { return m_expression; }
+    EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }

  protected:
    const ExpressionType& m_expression;
@@ -127,7 +127,7 @@ template<bool Enable>
 inline typename internal::add_const_on_value_type<typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type>::type
 MatrixBase<Derived>::forceAlignedAccessIf() const
 {
-  return derived();
+  return derived();  // FIXME This should not work but apparently is never used
 }

 /** \returns an expression of *this with forced aligned access if \a Enable is true.
@@ -138,7 +138,7 @@ template<bool Enable>
 inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type
 MatrixBase<Derived>::forceAlignedAccessIf()
 {
-  return derived();
+  return derived();  // FIXME This should not work but apparently is never used
 }

 } // end namespace Eigen

--- a/external/eigen3/Eigen/src/Core/Functors.h
+++ b/external/eigen3/Eigen/src/Core/Functors.h
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_FUNCTORS_H
-#define EIGEN_FUNCTORS_H
-
-namespace Eigen {
-
-namespace internal {
-
-// associative functors:
-
-/** \internal
-  * \brief Template functor to compute the sum of two scalars
-  *
-  * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, MatrixBase::sum()
-  */
-template<typename Scalar> struct scalar_sum_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
-  { return internal::padd(a,b); }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
-  { return internal::predux(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_sum_op<Scalar> > {
-  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasAdd
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the product of two scalars
-  *
-  * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
-  */
-template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
-  enum {
-    // TODO vectorize mixed product
-    Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
-  };
-  typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
-  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
-  { return internal::pmul(a,b); }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
-  { return internal::predux_mul(a); }
-};
-template<typename LhsScalar,typename RhsScalar>
-struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
-  enum {
-    Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
-    PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the conjugate product of two scalars
-  *
-  * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
-  */
-template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
-
-  enum {
-    Conj = NumTraits<LhsScalar>::IsComplex
-  };
-  
-  typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
-  
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
-  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
-  { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
-  
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
-  { return conj_helper<Packet,Packet,Conj,false>().pmul(a,b); }
-};
-template<typename LhsScalar,typename RhsScalar>
-struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
-  enum {
-    Cost = NumTraits<LhsScalar>::MulCost,
-    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMul
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the min of two scalars
-  *
-  * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
-  */
-template<typename Scalar> struct scalar_min_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
-  { return internal::pmin(a,b); }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
-  { return internal::predux_min(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_min_op<Scalar> > {
-  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasMin
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the max of two scalars
-  *
-  * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
-  */
-template<typename Scalar> struct scalar_max_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
-  { return internal::pmax(a,b); }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
-  { return internal::predux_max(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_max_op<Scalar> > {
-  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasMax
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the hypot of two scalars
-  *
-  * \sa MatrixBase::stableNorm(), class Redux
-  */
-template<typename Scalar> struct scalar_hypot_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
-//   typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
-  {
-    using std::max;
-    using std::min;
-    using std::sqrt;
-    Scalar p = (max)(_x, _y);
-    Scalar q = (min)(_x, _y);
-    Scalar qp = q/p;
-    return p * sqrt(Scalar(1) + qp*qp);
-  }
-};
-template<typename Scalar>
-struct functor_traits<scalar_hypot_op<Scalar> > {
-  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess=0 };
-};
-
-/** \internal
-  * \brief Template functor to compute the pow of two scalars
-  */
-template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
-  inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); }
-};
-template<typename Scalar, typename OtherScalar>
-struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
-  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
-};
-
-// other binary functors:
-
-/** \internal
-  * \brief Template functor to compute the difference of two scalars
-  *
-  * \sa class CwiseBinaryOp, MatrixBase::operator-
-  */
-template<typename Scalar> struct scalar_difference_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
-  { return internal::psub(a,b); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_difference_op<Scalar> > {
-  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasSub
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the quotient of two scalars
-  *
-  * \sa class CwiseBinaryOp, Cwise::operator/()
-  */
-template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
-  enum {
-    // TODO vectorize mixed product
-    Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv
-  };
-  typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
-  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
-  { return internal::pdiv(a,b); }
-};
-template<typename LhsScalar,typename RhsScalar>
-struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
-  enum {
-    Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost), // rough estimate!
-    PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable
-  };
-};
-
-
-
-/** \internal
-  * \brief Template functor to compute the and of two booleans
-  *
-  * \sa class CwiseBinaryOp, ArrayBase::operator&&
-  */
-struct scalar_boolean_and_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
-  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
-};
-template<> struct functor_traits<scalar_boolean_and_op> {
-  enum {
-    Cost = NumTraits<bool>::AddCost,
-    PacketAccess = false
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the or of two booleans
-  *
-  * \sa class CwiseBinaryOp, ArrayBase::operator||
-  */
-struct scalar_boolean_or_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
-  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
-};
-template<> struct functor_traits<scalar_boolean_or_op> {
-  enum {
-    Cost = NumTraits<bool>::AddCost,
-    PacketAccess = false
-  };
-};
-
-/** \internal
-  * \brief Template functors for comparison of two scalars
-  * \todo Implement packet-comparisons
-  */
-template<typename Scalar, ComparisonName cmp> struct scalar_cmp_op;
-
-template<typename Scalar, ComparisonName cmp>
-struct functor_traits<scalar_cmp_op<Scalar, cmp> > {
-  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = false
-  };
-};
-
-template<ComparisonName Cmp, typename Scalar>
-struct result_of<scalar_cmp_op<Scalar, Cmp>(Scalar,Scalar)> {
-  typedef bool type;
-};
-
-
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_EQ> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;}
-};
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LT> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<b;}
-};
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LE> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;}
-};
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_UNORD> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);}
-};
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;}
-};
-
-// unary functors:
-
-/** \internal
-  * \brief Template functor to compute the opposite of a scalar
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::operator-
-  */
-template<typename Scalar> struct scalar_opposite_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
-  { return internal::pnegate(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_opposite_op<Scalar> >
-{ enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasNegate };
-};
-
-/** \internal
-  * \brief Template functor to compute the absolute value of a scalar
-  *
-  * \sa class CwiseUnaryOp, Cwise::abs
-  */
-template<typename Scalar> struct scalar_abs_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
-  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
-  { return internal::pabs(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_abs_op<Scalar> >
-{
-  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasAbs
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the squared absolute value of a scalar
-  *
-  * \sa class CwiseUnaryOp, Cwise::abs2
-  */
-template<typename Scalar> struct scalar_abs2_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
-  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
-  { return internal::pmul(a,a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_abs2_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasAbs2 }; };
-
-/** \internal
-  * \brief Template functor to compute the conjugate of a complex value
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::conjugate()
-  */
-template<typename Scalar> struct scalar_conjugate_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
-  template<typename Packet>
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_conjugate_op<Scalar> >
-{
-  enum {
-    Cost = NumTraits<Scalar>::IsComplex ? NumTraits<Scalar>::AddCost : 0,
-    PacketAccess = packet_traits<Scalar>::HasConj
-  };
-};
-
-/** \internal
-  * \brief Template functor to cast a scalar to another type
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::cast()
-  */
-template<typename Scalar, typename NewType>
-struct scalar_cast_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
-  typedef NewType result_type;
-  EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
-};
-template<typename Scalar, typename NewType>
-struct functor_traits<scalar_cast_op<Scalar,NewType> >
-{ enum { Cost = is_same<Scalar, NewType>::value ? 0 : NumTraits<NewType>::AddCost, PacketAccess = false }; };
-
-/** \internal
-  * \brief Template functor to extract the real part of a complex
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::real()
-  */
-template<typename Scalar>
-struct scalar_real_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
-  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_real_op<Scalar> >
-{ enum { Cost = 0, PacketAccess = false }; };
-
-/** \internal
-  * \brief Template functor to extract the imaginary part of a complex
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::imag()
-  */
-template<typename Scalar>
-struct scalar_imag_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
-  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_imag_op<Scalar> >
-{ enum { Cost = 0, PacketAccess = false }; };
-
-/** \internal
-  * \brief Template functor to extract the real part of a complex as a reference
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::real()
-  */
-template<typename Scalar>
-struct scalar_real_ref_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
-  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_real_ref_op<Scalar> >
-{ enum { Cost = 0, PacketAccess = false }; };
-
-/** \internal
-  * \brief Template functor to extract the imaginary part of a complex as a reference
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::imag()
-  */
-template<typename Scalar>
-struct scalar_imag_ref_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
-  typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_imag_ref_op<Scalar> >
-{ enum { Cost = 0, PacketAccess = false }; };
-
-/** \internal
-  *
-  * \brief Template functor to compute the exponential of a scalar
-  *
-  * \sa class CwiseUnaryOp, Cwise::exp()
-  */
-template<typename Scalar> struct scalar_exp_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
-  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_exp_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasExp }; };
-
-/** \internal
-  *
-  * \brief Template functor to compute the logarithm of a scalar
-  *
-  * \sa class CwiseUnaryOp, Cwise::log()
-  */
-template<typename Scalar> struct scalar_log_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
-  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_log_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasLog }; };
-
-/** \internal
-  * \brief Template functor to multiply a scalar by a fixed other one
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
-  */
-/* NOTE why doing the pset1() in packetOp *is* an optimization ?
- * indeed it seems better to declare m_other as a Packet and do the pset1() once
- * in the constructor. However, in practice:
- *  - GCC does not like m_other as a Packet and generate a load every time it needs it
- *  - on the other hand GCC is able to moves the pset1() outside the loop :)
- *  - simpler code ;)
- * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
- */
-template<typename Scalar>
-struct scalar_multiple_op {
-  typedef typename packet_traits<Scalar>::type Packet;
-  // FIXME default copy constructors seems bugged with std::complex<>
-  EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
-  EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
-  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
-  { return internal::pmul(a, pset1<Packet>(m_other)); }
-  typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
-};
-template<typename Scalar>
-struct functor_traits<scalar_multiple_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
-
-template<typename Scalar1, typename Scalar2>
-struct scalar_multiple2_op {
-  typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
-  EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
-  EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
-  EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
-  typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
-};
-template<typename Scalar1,typename Scalar2>
-struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
-{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
-
-/** \internal
-  * \brief Template functor to divide a scalar by a fixed other one
-  *
-  * This functor is used to implement the quotient of a matrix by
-  * a scalar where the scalar type is not necessarily a floating point type.
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::operator/
-  */
-template<typename Scalar>
-struct scalar_quotient1_op {
-  typedef typename packet_traits<Scalar>::type Packet;
-  // FIXME default copy constructors seems bugged with std::complex<>
-  EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
-  EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
-  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
-  EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
-  { return internal::pdiv(a, pset1<Packet>(m_other)); }
-  typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
-};
-template<typename Scalar>
-struct functor_traits<scalar_quotient1_op<Scalar> >
-{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
-
-// nullary functors
-
-template<typename Scalar>
-struct scalar_constant_op {
-  typedef typename packet_traits<Scalar>::type Packet;
-  EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
-  EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
-  const Scalar m_other;
-};
-template<typename Scalar>
-struct functor_traits<scalar_constant_op<Scalar> >
-// FIXME replace this packet test by a safe one
-{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
-
-template<typename Scalar> struct scalar_identity_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_identity_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
-
-template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
-
-// linear access for packet ops:
-// 1) initialization
-//   base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
-// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
-//   base += [size*step, ..., size*step]
-//
-// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
-//       in order to avoid the padd() in operator() ?
-template <typename Scalar>
-struct linspaced_op_impl<Scalar,false>
-{
-  typedef typename packet_traits<Scalar>::type Packet;
-
-  linspaced_op_impl(const Scalar& low, const Scalar& step) :
-  m_low(low), m_step(step),
-  m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
-  m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
-
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const 
-  { 
-    m_base = padd(m_base, pset1<Packet>(m_step));
-    return m_low+Scalar(i)*m_step; 
-  }
-
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
-
-  const Scalar m_low;
-  const Scalar m_step;
-  const Packet m_packetStep;
-  mutable Packet m_base;
-};
-
-// random access for packet ops:
-// 1) each step
-//   [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
-template <typename Scalar>
-struct linspaced_op_impl<Scalar,true>
-{
-  typedef typename packet_traits<Scalar>::type Packet;
-
-  linspaced_op_impl(const Scalar& low, const Scalar& step) :
-  m_low(low), m_step(step),
-  m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
-
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
-
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
-  { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
-
-  const Scalar m_low;
-  const Scalar m_step;
-  const Packet m_lowPacket;
-  const Packet m_stepPacket;
-  const Packet m_interPacket;
-};
-
-// ----- Linspace functor ----------------------------------------------------------------
-
-// Forward declaration (we default to random access which does not really give
-// us a speed gain when using packet access but it allows to use the functor in
-// nested expressions).
-template <typename Scalar, bool RandomAccess = true> struct linspaced_op;
-template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,RandomAccess> >
-{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
-template <typename Scalar, bool RandomAccess> struct linspaced_op
-{
-  typedef typename packet_traits<Scalar>::type Packet;
-  linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}
-
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
-
-  // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
-  // there row==0 and col is used for the actual iteration.
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const 
-  {
-    eigen_assert(col==0 || row==0);
-    return impl(col + row);
-  }
-
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
-
-  // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
-  // there row==0 and col is used for the actual iteration.
-  template<typename Index>
-  EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const
-  {
-    eigen_assert(col==0 || row==0);
-    return impl.packetOp(col + row);
-  }
-
-  // This proxy object handles the actual required temporaries, the different
-  // implementations (random vs. sequential access) as well as the
-  // correct piping to size 2/4 packet operations.
-  const linspaced_op_impl<Scalar,RandomAccess> impl;
-};
-
-// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
-// to indicate whether a functor allows linear access, just always answering 'yes' except for
-// scalar_identity_op.
-// FIXME move this to functor_traits adding a functor_default
-template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
-template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };
-
-// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
-// where the mixing of different types is handled by scalar_product_traits
-// In particular, real * complex<real> is allowed.
-// FIXME move this to functor_traits adding a functor_default
-template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-
-
-/** \internal
-  * \brief Template functor to add a scalar to a fixed other one
-  * \sa class CwiseUnaryOp, Array::operator+
-  */
-/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
-template<typename Scalar>
-struct scalar_add_op {
-  typedef typename packet_traits<Scalar>::type Packet;
-  // FIXME default copy constructors seems bugged with std::complex<>
-  inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
-  inline scalar_add_op(const Scalar& other) : m_other(other) { }
-  inline Scalar operator() (const Scalar& a) const { return a + m_other; }
-  inline const Packet packetOp(const Packet& a) const
-  { return internal::padd(a, pset1<Packet>(m_other)); }
-  const Scalar m_other;
-};
-template<typename Scalar>
-struct functor_traits<scalar_add_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
-
-/** \internal
-  * \brief Template functor to compute the square root of a scalar
-  * \sa class CwiseUnaryOp, Cwise::sqrt()
-  */
-template<typename Scalar> struct scalar_sqrt_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
-  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_sqrt_op<Scalar> >
-{ enum {
-    Cost = 5 * NumTraits<Scalar>::MulCost,
-    PacketAccess = packet_traits<Scalar>::HasSqrt
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the cosine of a scalar
-  * \sa class CwiseUnaryOp, ArrayBase::cos()
-  */
-template<typename Scalar> struct scalar_cos_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
-  inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
-  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_cos_op<Scalar> >
-{
-  enum {
-    Cost = 5 * NumTraits<Scalar>::MulCost,
-    PacketAccess = packet_traits<Scalar>::HasCos
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the sine of a scalar
-  * \sa class CwiseUnaryOp, ArrayBase::sin()
-  */
-template<typename Scalar> struct scalar_sin_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
-  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_sin_op<Scalar> >
-{
-  enum {
-    Cost = 5 * NumTraits<Scalar>::MulCost,
-    PacketAccess = packet_traits<Scalar>::HasSin
-  };
-};
-
-
-/** \internal
-  * \brief Template functor to compute the tan of a scalar
-  * \sa class CwiseUnaryOp, ArrayBase::tan()
-  */
-template<typename Scalar> struct scalar_tan_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
-  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_tan_op<Scalar> >
-{
-  enum {
-    Cost = 5 * NumTraits<Scalar>::MulCost,
-    PacketAccess = packet_traits<Scalar>::HasTan
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the arc cosine of a scalar
-  * \sa class CwiseUnaryOp, ArrayBase::acos()
-  */
-template<typename Scalar> struct scalar_acos_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
-  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_acos_op<Scalar> >
-{
-  enum {
-    Cost = 5 * NumTraits<Scalar>::MulCost,
-    PacketAccess = packet_traits<Scalar>::HasACos
-  };
-};
-
-/** \internal
-  * \brief Template functor to compute the arc sine of a scalar
-  * \sa class CwiseUnaryOp, ArrayBase::asin()
-  */
-template<typename Scalar> struct scalar_asin_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
-  typedef typename packet_traits<Scalar>::type Packet;
-  inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_asin_op<Scalar> >
-{
-  enum {
-    Cost = 5 * NumTraits<Scalar>::MulCost,
-    PacketAccess = packet_traits<Scalar>::HasASin
-  };
-};
-
-/** \internal
-  * \brief Template functor to raise a scalar to a power
-  * \sa class CwiseUnaryOp, Cwise::pow
-  */
-template<typename Scalar>
-struct scalar_pow_op {
-  // FIXME default copy constructors seems bugged with std::complex<>
-  inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
-  inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
-  inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
-  const Scalar m_exponent;
-};
-template<typename Scalar>
-struct functor_traits<scalar_pow_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
-
-/** \internal
-  * \brief Template functor to compute the quotient between a scalar and array entries.
-  * \sa class CwiseUnaryOp, Cwise::inverse()
-  */
-template<typename Scalar>
-struct scalar_inverse_mult_op {
-  scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
-  inline Scalar operator() (const Scalar& a) const { return m_other / a; }
-  template<typename Packet>
-  inline const Packet packetOp(const Packet& a) const
-  { return internal::pdiv(pset1<Packet>(m_other),a); }
-  Scalar m_other;
-};
-
-/** \internal
-  * \brief Template functor to compute the inverse of a scalar
-  * \sa class CwiseUnaryOp, Cwise::inverse()
-  */
-template<typename Scalar>
-struct scalar_inverse_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
-  inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
-  template<typename Packet>
-  inline const Packet packetOp(const Packet& a) const
-  { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_inverse_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
-
-/** \internal
-  * \brief Template functor to compute the square of a scalar
-  * \sa class CwiseUnaryOp, Cwise::square()
-  */
-template<typename Scalar>
-struct scalar_square_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
-  inline Scalar operator() (const Scalar& a) const { return a*a; }
-  template<typename Packet>
-  inline const Packet packetOp(const Packet& a) const
-  { return internal::pmul(a,a); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_square_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
-
-/** \internal
-  * \brief Template functor to compute the cube of a scalar
-  * \sa class CwiseUnaryOp, Cwise::cube()
-  */
-template<typename Scalar>
-struct scalar_cube_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
-  inline Scalar operator() (const Scalar& a) const { return a*a*a; }
-  template<typename Packet>
-  inline const Packet packetOp(const Packet& a) const
-  { return internal::pmul(a,pmul(a,a)); }
-};
-template<typename Scalar>
-struct functor_traits<scalar_cube_op<Scalar> >
-{ enum { Cost = 2*NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
-
-// default functor traits for STL functors:
-
-template<typename T>
-struct functor_traits<std::multiplies<T> >
-{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::divides<T> >
-{ enum { Cost = NumTraits<T>::MulCost, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::plus<T> >
-{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::minus<T> >
-{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::negate<T> >
-{ enum { Cost = NumTraits<T>::AddCost, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::logical_or<T> >
-{ enum { Cost = 1, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::logical_and<T> >
-{ enum { Cost = 1, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::logical_not<T> >
-{ enum { Cost = 1, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::greater<T> >
-{ enum { Cost = 1, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::less<T> >
-{ enum { Cost = 1, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::greater_equal<T> >
-{ enum { Cost = 1, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::less_equal<T> >
-{ enum { Cost = 1, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::equal_to<T> >
-{ enum { Cost = 1, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::not_equal_to<T> >
-{ enum { Cost = 1, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::binder2nd<T> >
-{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::binder1st<T> >
-{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::unary_negate<T> >
-{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
-
-template<typename T>
-struct functor_traits<std::binary_negate<T> >
-{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
-
-#ifdef EIGEN_STDEXT_SUPPORT
-
-template<typename T0,typename T1>
-struct functor_traits<std::project1st<T0,T1> >
-{ enum { Cost = 0, PacketAccess = false }; };
-
-template<typename T0,typename T1>
-struct functor_traits<std::project2nd<T0,T1> >
-{ enum { Cost = 0, PacketAccess = false }; };
-
-template<typename T0,typename T1>
-struct functor_traits<std::select2nd<std::pair<T0,T1> > >
-{ enum { Cost = 0, PacketAccess = false }; };
-
-template<typename T0,typename T1>
-struct functor_traits<std::select1st<std::pair<T0,T1> > >
-{ enum { Cost = 0, PacketAccess = false }; };
-
-template<typename T0,typename T1>
-struct functor_traits<std::unary_compose<T0,T1> >
-{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost, PacketAccess = false }; };
-
-template<typename T0,typename T1,typename T2>
-struct functor_traits<std::binary_compose<T0,T1,T2> >
-{ enum { Cost = functor_traits<T0>::Cost + functor_traits<T1>::Cost + functor_traits<T2>::Cost, PacketAccess = false }; };
-
-#endif // EIGEN_STDEXT_SUPPORT
-
-// allow to add new functors and specializations of functor_traits from outside Eigen.
-// this macro is really needed because functor_traits must be specialized after it is declared but before it is used...
-#ifdef EIGEN_FUNCTORS_PLUGIN
-#include EIGEN_FUNCTORS_PLUGIN
-#endif
-
-} // end namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_FUNCTORS_H
--- a/external/eigen3/Eigen/src/Core/Fuzzy.h
+++ b/external/eigen3/Eigen/src/Core/Fuzzy.h
@@ -19,18 +19,19 @@ namespace internal
 template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isApprox_selector
 {
+  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
  {
-    using std::min;
-    typename internal::nested<Derived,2>::type nested(x);
-    typename internal::nested<OtherDerived,2>::type otherNested(y);
-    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
+    typename internal::nested_eval<Derived,2>::type nested(x);
+    typename internal::nested_eval<OtherDerived,2>::type otherNested(y);
+    return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
  }
 };

 template<typename Derived, typename OtherDerived>
 struct isApprox_selector<Derived, OtherDerived, true>
 {
+  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
  {
    return x.matrix() == y.matrix();
@@ -40,6 +41,7 @@ struct isApprox_selector<Derived, OtherDerived, true>
 template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isMuchSmallerThan_object_selector
 {
+  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
  {
    return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
@@ -49,6 +51,7 @@ struct isMuchSmallerThan_object_selector
 template<typename Derived, typename OtherDerived>
 struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
 {
+  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
  {
    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
@@ -58,6 +61,7 @@ struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
 template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isMuchSmallerThan_scalar_selector
 {
+  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
  {
    return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
@@ -67,6 +71,7 @@ struct isMuchSmallerThan_scalar_selector
 template<typename Derived>
 struct isMuchSmallerThan_scalar_selector<Derived, true>
 {
+  EIGEN_DEVICE_FUNC
  static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
  {
    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();

--- a/external/eigen3/Eigen/src/Core/GeneralProduct.h
+++ b/external/eigen3/Eigen/src/Core/GeneralProduct.h
@@ -11,29 +11,7 @@
 #ifndef EIGEN_GENERAL_PRODUCT_H
 #define EIGEN_GENERAL_PRODUCT_H

-namespace Eigen { 
-
-/** \class GeneralProduct
-  * \ingroup Core_Module
-  *
-  * \brief Expression of the product of two general matrices or vectors
-  *
-  * \param LhsNested the type used to store the left-hand side
-  * \param RhsNested the type used to store the right-hand side
-  * \param ProductMode the type of the product
-  *
-  * This class represents an expression of the product of two general matrices.
-  * We call a general matrix, a dense matrix with full storage. For instance,
-  * This excludes triangular, selfadjoint, and sparse matrices.
-  * It is the return type of the operator* between general matrices. Its template
-  * arguments are determined automatically by ProductReturnType. Therefore,
-  * GeneralProduct should never be used direclty. To determine the result type of a
-  * function which involves a matrix product, use ProductReturnType::Type.
-  *
-  * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
-  */
-template<typename Lhs, typename Rhs, int ProductType = internal::product_type<Lhs,Rhs>::value>
-class GeneralProduct;
+namespace Eigen {

 enum {
  Large = 2,
@@ -47,7 +25,8 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
 template<int Size, int MaxSize> struct product_size_category
 {
  enum { is_large = MaxSize == Dynamic ||
-                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD,
+                    Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
+                    (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
         value = is_large  ? Large
               : Size == 1 ? 1
                           : Small
@@ -59,15 +38,14 @@ template<typename Lhs, typename Rhs> struct product_type
  typedef typename remove_all<Lhs>::type _Lhs;
  typedef typename remove_all<Rhs>::type _Rhs;
  enum {
-    MaxRows  = _Lhs::MaxRowsAtCompileTime,
-    Rows  = _Lhs::RowsAtCompileTime,
-    MaxCols  = _Rhs::MaxColsAtCompileTime,
-    Cols  = _Rhs::ColsAtCompileTime,
-    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime,
-                                           _Rhs::MaxRowsAtCompileTime),
-    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime,
-                                        _Rhs::RowsAtCompileTime),
-    LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
+    MaxRows = traits<_Lhs>::MaxRowsAtCompileTime,
+    Rows    = traits<_Lhs>::RowsAtCompileTime,
+    MaxCols = traits<_Rhs>::MaxColsAtCompileTime,
+    Cols    = traits<_Rhs>::ColsAtCompileTime,
+    MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime,
+                                           traits<_Rhs>::MaxRowsAtCompileTime),
+    Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime,
+                                        traits<_Rhs>::RowsAtCompileTime)
  };

  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
@@ -82,7 +60,8 @@ private:

 public:
  enum {
-    value = selector::ret
+    value = selector::ret,
+    ret = selector::ret
  };
 #ifdef EIGEN_DEBUG_PRODUCT
  static void debug()
@@ -98,12 +77,13 @@ public:
 #endif
 };

-
 /* The following allows to select the kind of product at compile time
 * based on the three dimensions of the product.
 * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
 // FIXME I'm not sure the current mapping is the ideal one.
 template<int M, int N>  struct product_type_selector<M,N,1>              { enum { ret = OuterProduct }; };
+template<int M>         struct product_type_selector<M, 1, 1>            { enum { ret = LazyCoeffBasedProductMode }; };
+template<int N>         struct product_type_selector<1, N, 1>            { enum { ret = LazyCoeffBasedProductMode }; };
 template<int Depth>     struct product_type_selector<1,    1,    Depth>  { enum { ret = InnerProduct }; };
 template<>              struct product_type_selector<1,    1,    1>      { enum { ret = InnerProduct }; };
 template<>              struct product_type_selector<Small,1,    Small>  { enum { ret = CoeffBasedProductMode }; };
@@ -122,60 +102,12 @@ template<>              struct product_type_selector<Small,Small,Large>  { enum
 template<>              struct product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };
 template<>              struct product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };
 template<>              struct product_type_selector<Large,Large,Large>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Large,Small,Small>  { enum { ret = GemmProduct }; };
-template<>              struct product_type_selector<Small,Large,Small>  { enum { ret = GemmProduct }; };
+template<>              struct product_type_selector<Large,Small,Small>  { enum { ret = CoeffBasedProductMode }; };
+template<>              struct product_type_selector<Small,Large,Small>  { enum { ret = CoeffBasedProductMode }; };
 template<>              struct product_type_selector<Large,Large,Small>  { enum { ret = GemmProduct }; };

 } // end namespace internal

-/** \class ProductReturnType
-  * \ingroup Core_Module
-  *
-  * \brief Helper class to get the correct and optimized returned type of operator*
-  *
-  * \param Lhs the type of the left-hand side
-  * \param Rhs the type of the right-hand side
-  * \param ProductMode the type of the product (determined automatically by internal::product_mode)
-  *
-  * This class defines the typename Type representing the optimized product expression
-  * between two matrix expressions. In practice, using ProductReturnType<Lhs,Rhs>::Type
-  * is the recommended way to define the result type of a function returning an expression
-  * which involve a matrix product. The class Product should never be
-  * used directly.
-  *
-  * \sa class Product, MatrixBase::operator*(const MatrixBase<OtherDerived>&)
-  */
-template<typename Lhs, typename Rhs, int ProductType>
-struct ProductReturnType
-{
-  // TODO use the nested type to reduce instanciations ????
-//   typedef typename internal::nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
-//   typedef typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
-
-  typedef GeneralProduct<Lhs/*Nested*/, Rhs/*Nested*/, ProductType> Type;
-};
-
-template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,CoeffBasedProductMode>
-{
-  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
-  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, EvalBeforeAssigningBit | EvalBeforeNestingBit> Type;
-};
-
-template<typename Lhs, typename Rhs>
-struct ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{
-  typedef typename internal::nested<Lhs, Rhs::ColsAtCompileTime, typename internal::plain_matrix_type<Lhs>::type >::type LhsNested;
-  typedef typename internal::nested<Rhs, Lhs::RowsAtCompileTime, typename internal::plain_matrix_type<Rhs>::type >::type RhsNested;
-  typedef CoeffBasedProduct<LhsNested, RhsNested, NestByRefBit> Type;
-};
-
-// this is a workaround for sun CC
-template<typename Lhs, typename Rhs>
-struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedProductMode>
-{};
-
 /***********************************************************************
 *  Implementation of Inner Vector Vector Product
 ***********************************************************************/
@@ -187,119 +119,10 @@ struct LazyProductReturnType : public ProductReturnType<Lhs,Rhs,LazyCoeffBasedPr
 // product ends up to a row-vector times col-vector product... To tackle this use
 // case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);

-namespace internal {
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,InnerProduct> >
- : traits<Matrix<typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, InnerProduct>
-  : internal::no_assignment_operator,
-    public Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1>
-{
-    typedef Matrix<typename internal::scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType,1,1> Base;
-  public:
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs)
-    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-
-      Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
-    }
-
-    /** Convertion to scalar */
-    operator const typename Base::Scalar() const {
-      return Base::coeff(0,0);
-    }
-};
-
 /***********************************************************************
 *  Implementation of Outer Vector Vector Product
 ***********************************************************************/

-namespace internal {
-
-// Column major
-template<typename ProductType, typename Dest, typename Func>
-EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
-{
-  typedef typename Dest::Index Index;
-  // FIXME make sure lhs is sequentially stored
-  // FIXME not very good if rhs is real and lhs complex while alpha is real too
-  const Index cols = dest.cols();
-  for (Index j=0; j<cols; ++j)
-    func(dest.col(j), prod.rhs().coeff(0,j) * prod.lhs());
-}
-
-// Row major
-template<typename ProductType, typename Dest, typename Func>
-EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
-  typedef typename Dest::Index Index;
-  // FIXME make sure rhs is sequentially stored
-  // FIXME not very good if lhs is real and rhs complex while alpha is real too
-  const Index rows = dest.rows();
-  for (Index i=0; i<rows; ++i)
-    func(dest.row(i), prod.lhs().coeff(i,0) * prod.rhs());
-}
-
-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs> >
-{};
-
-}
-
-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, OuterProduct>
-  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
-{
-    template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
-    
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
-    GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs)
-    {
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
-        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-    }
-    
-    struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
-    struct add  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
-    struct sub  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
-    struct adds {
-      Scalar m_scale;
-      adds(const Scalar& s) : m_scale(s) {}
-      template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
-        dst.const_cast_derived() += m_scale * src;
-      }
-    };
-    
-    template<typename Dest>
-    inline void evalTo(Dest& dest) const {
-      internal::outer_product_selector_run(*this, dest, set(), is_row_major<Dest>());
-    }
-    
-    template<typename Dest>
-    inline void addTo(Dest& dest) const {
-      internal::outer_product_selector_run(*this, dest, add(), is_row_major<Dest>());
-    }
-
-    template<typename Dest>
-    inline void subTo(Dest& dest) const {
-      internal::outer_product_selector_run(*this, dest, sub(), is_row_major<Dest>());
-    }
-
-    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
-    {
-      internal::outer_product_selector_run(*this, dest, adds(alpha), is_row_major<Dest>());
-    }
-};
-
 /***********************************************************************
 *  Implementation of General Matrix Vector Product
 ***********************************************************************/
@@ -313,60 +136,13 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
 */
 namespace internal {

-template<typename Lhs, typename Rhs>
-struct traits<GeneralProduct<Lhs,Rhs,GemvProduct> >
- : traits<ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs> >
-{};
-
 template<int Side, int StorageOrder, bool BlasCompatible>
-struct gemv_selector;
+struct gemv_dense_selector;

 } // end namespace internal

-template<typename Lhs, typename Rhs>
-class GeneralProduct<Lhs, Rhs, GemvProduct>
-  : public ProductBase<GeneralProduct<Lhs,Rhs,GemvProduct>, Lhs, Rhs>
-{
-  public:
-    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)
-
-    typedef typename Lhs::Scalar LhsScalar;
-    typedef typename Rhs::Scalar RhsScalar;
-
-    GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs)
-    {
-//       EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::Scalar, typename Rhs::Scalar>::value),
-//         YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-    }
-
-    enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
-    typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;
-
-    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
-    {
-      eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
-      internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
-                       bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)>::run(*this, dst, alpha);
-    }
-};
-
 namespace internal {

-// The vector is on the left => transposition
-template<int StorageOrder, bool BlasCompatible>
-struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
-{
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
-  {
-    Transpose<Dest> destT(dest);
-    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
-    gemv_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
-      ::run(GeneralProduct<Transpose<const typename ProductType::_RhsNested>,Transpose<const typename ProductType::_LhsNested>, GemvProduct>
-        (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha);
-  }
-};
-
 template<typename Scalar,int Size,int MaxSize,bool Cond> struct gemv_static_vector_if;

 template<typename Scalar,int Size,int MaxSize>
@@ -384,46 +160,61 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
 template<typename Scalar,int Size,int MaxSize>
 struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
 {
-  #if EIGEN_ALIGN_STATICALLY
-  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
-  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
-  #else
-  // Some architectures cannot align on the stack,
-  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
  enum {
    ForceAlignment  = internal::packet_traits<Scalar>::Vectorizable,
    PacketSize      = internal::packet_traits<Scalar>::size
  };
-  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
+  #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
+  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0,EIGEN_PLAIN_ENUM_MIN(AlignedMax,PacketSize)> m_data;
+  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
+  #else
+  // Some architectures cannot align on the stack,
+  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
+  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?EIGEN_MAX_ALIGN_BYTES:0),0> m_data;
  EIGEN_STRONG_INLINE Scalar* data() {
    return ForceAlignment
-            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(15))) + 16)
+            ? reinterpret_cast<Scalar*>((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
            : m_data.array;
  }
  #endif
 };

-template<> struct gemv_selector<OnTheRight,ColMajor,true>
+// The vector is on the left => transposition
+template<int StorageOrder, bool BlasCompatible>
+struct gemv_dense_selector<OnTheLeft,StorageOrder,BlasCompatible>
+{
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
+  {
+    Transpose<Dest> destT(dest);
+    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+    gemv_dense_selector<OnTheRight,OtherStorageOrder,BlasCompatible>
+      ::run(rhs.transpose(), lhs.transpose(), destT, alpha);
+  }
+};
+
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,true>
 {
-  template<typename ProductType, typename Dest>
-  static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::LhsScalar   LhsScalar;
-    typedef typename ProductType::RhsScalar   RhsScalar;
-    typedef typename ProductType::Scalar      ResScalar;
-    typedef typename ProductType::RealScalar  RealScalar;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
-    typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
-
-    ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs());
-    ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+    typedef typename Lhs::Scalar   LhsScalar;
+    typedef typename Rhs::Scalar   RhsScalar;
+    typedef typename Dest::Scalar  ResScalar;
+    typedef typename Dest::RealScalar  RealScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+  
+    typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
+
+    ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
+    ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+                                  * RhsBlasTraits::extractScalarFactor(rhs);

    // make sure Dest is a compile-time vector type (bug 1166)
    typedef typename conditional<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr>::type ActualDest;
@@ -433,80 +224,97 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
      // on, the other hand it is good for the cache to pack the vector anyways...
      EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1),
      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
-      MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal
+      MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal
    };

-    gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
-
-    bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
-    bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
-    
+    typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
    RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);

-    ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
-                                                  evalToDest ? dest.data() : static_dest.data());
-    
-    if(!evalToDest)
+    if(!MightCannotUseDest)
    {
-      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = dest.size();
-      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      #endif
-      if(!alphaIsCompatible)
+      // shortcut if we are sure to be able to use dest directly,
+      // this ease the compiler to generate cleaner and more optimzized code for most common cases
+      general_matrix_vector_product
+          <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+          actualLhs.rows(), actualLhs.cols(),
+          LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+          RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+          dest.data(), 1,
+          compatibleAlpha);
+    }
+    else
+    {
+      gemv_static_vector_if<ResScalar,ActualDest::SizeAtCompileTime,ActualDest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
+
+      const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
+      const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+
+      ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
+                                                    evalToDest ? dest.data() : static_dest.data());
+
+      if(!evalToDest)
      {
-        MappedDest(actualDestPtr, dest.size()).setZero();
-        compatibleAlpha = RhsScalar(1);
+        #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+        Index size = dest.size();
+        EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+        #endif
+        if(!alphaIsCompatible)
+        {
+          MappedDest(actualDestPtr, dest.size()).setZero();
+          compatibleAlpha = RhsScalar(1);
+        }
+        else
+          MappedDest(actualDestPtr, dest.size()) = dest;
      }
-      else
-        MappedDest(actualDestPtr, dest.size()) = dest;
-    }

-    general_matrix_vector_product
-      <Index,LhsScalar,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
-        actualLhs.rows(), actualLhs.cols(),
-        actualLhs.data(), actualLhs.outerStride(),
-        actualRhs.data(), actualRhs.innerStride(),
-        actualDestPtr, 1,
-        compatibleAlpha);
+      general_matrix_vector_product
+          <Index,LhsScalar,LhsMapper,ColMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
+          actualLhs.rows(), actualLhs.cols(),
+          LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+          RhsMapper(actualRhs.data(), actualRhs.innerStride()),
+          actualDestPtr, 1,
+          compatibleAlpha);

-    if (!evalToDest)
-    {
-      if(!alphaIsCompatible)
-        dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
-      else
-        dest = MappedDest(actualDestPtr, dest.size());
+      if (!evalToDest)
+      {
+        if(!alphaIsCompatible)
+          dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
+        else
+          dest = MappedDest(actualDestPtr, dest.size());
+      }
    }
  }
 };

-template<> struct gemv_selector<OnTheRight,RowMajor,true>
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,true>
 {
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    typedef typename ProductType::LhsScalar LhsScalar;
-    typedef typename ProductType::RhsScalar RhsScalar;
-    typedef typename ProductType::Scalar    ResScalar;
-    typedef typename ProductType::Index Index;
-    typedef typename ProductType::ActualLhsType ActualLhsType;
-    typedef typename ProductType::ActualRhsType ActualRhsType;
-    typedef typename ProductType::_ActualRhsType _ActualRhsType;
-    typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
-    typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
-
-    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
-                                  * RhsBlasTraits::extractScalarFactor(prod.rhs());
+    typedef typename Lhs::Scalar   LhsScalar;
+    typedef typename Rhs::Scalar   RhsScalar;
+    typedef typename Dest::Scalar  ResScalar;
+    
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
+
+    typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
+    typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
+
+    ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
+                                  * RhsBlasTraits::extractScalarFactor(rhs);

    enum {
      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
      // on, the other hand it is good for the cache to pack the vector anyways...
-      DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
+      DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
    };

-    gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
+    gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;

    ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
@@ -514,45 +322,48 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
    if(!DirectlyUseRhs)
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = actualRhs.size();
+      Index size = actualRhs.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
-      Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
    }

+    typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
    general_matrix_vector_product
-      <Index,LhsScalar,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsBlasTraits::NeedToConjugate>::run(
+        <Index,LhsScalar,LhsMapper,RowMajor,LhsBlasTraits::NeedToConjugate,RhsScalar,RhsMapper,RhsBlasTraits::NeedToConjugate>::run(
        actualLhs.rows(), actualLhs.cols(),
-        actualLhs.data(), actualLhs.outerStride(),
-        actualRhsPtr, 1,
+        LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+        RhsMapper(actualRhsPtr, 1),
        dest.data(), dest.col(0).innerStride(), //NOTE  if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166)
        actualAlpha);
  }
 };

-template<> struct gemv_selector<OnTheRight,ColMajor,false>
+template<> struct gemv_dense_selector<OnTheRight,ColMajor,false>
 {
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    typedef typename Dest::Index Index;
-    // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
-    const Index size = prod.rhs().rows();
+    EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+    // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp
+    typename nested_eval<Rhs,1>::type actual_rhs(rhs);
+    const Index size = rhs.rows();
    for(Index k=0; k<size; ++k)
-      dest += (alpha*prod.rhs().coeff(k)) * prod.lhs().col(k);
+      dest += (alpha*actual_rhs.coeff(k)) * lhs.col(k);
  }
 };

-template<> struct gemv_selector<OnTheRight,RowMajor,false>
+template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
 {
-  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
+  template<typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
  {
-    typedef typename Dest::Index Index;
-    // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
-    const Index rows = prod.rows();
+    EIGEN_STATIC_ASSERT((!nested_eval<Lhs,1>::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+    typename nested_eval<Rhs,Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
+    const Index rows = dest.rows();
    for(Index i=0; i<rows; ++i)
-      dest.coeffRef(i) += alpha * (prod.lhs().row(i).cwiseProduct(prod.rhs().transpose())).sum();
+      dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
  }
 };

@@ -568,9 +379,11 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false>
  *
  * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
  */
+#ifndef __CUDACC__
+
 template<typename Derived>
 template<typename OtherDerived>
-inline const typename ProductReturnType<Derived, OtherDerived>::Type
+inline const Product<Derived, OtherDerived>
 MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 {
  // A note regarding the function declaration: In MSVC, this function will sometimes
@@ -595,9 +408,12 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 #ifdef EIGEN_DEBUG_PRODUCT
  internal::product_type<Derived,OtherDerived>::debug();
 #endif
-  return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+
+  return Product<Derived, OtherDerived>(derived(), other.derived());
 }

+#endif // __CUDACC__
+
 /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
  *
  * The returned product will behave like any other expressions: the coefficients of the product will be
@@ -611,7 +427,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
  */
 template<typename Derived>
 template<typename OtherDerived>
-const typename LazyProductReturnType<Derived,OtherDerived>::Type
+const Product<Derived,OtherDerived,LazyProduct>
 MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
 {
  enum {
@@ -630,7 +446,7 @@ MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const
    INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)

-  return typename LazyProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
+  return Product<Derived,OtherDerived,LazyProduct>(derived(), other.derived());
 }

 } // end namespace Eigen

--- a/external/eigen3/Eigen/src/Core/GenericPacketMath.h
+++ b/external/eigen3/Eigen/src/Core/GenericPacketMath.h
@@ -42,21 +42,28 @@ namespace internal {
 struct default_packet_traits
 {
  enum {
+    HasHalfPacket = 0,
+
    HasAdd    = 1,
    HasSub    = 1,
    HasMul    = 1,
    HasNegate = 1,
    HasAbs    = 1,
+    HasArg    = 0,
    HasAbs2   = 1,
    HasMin    = 1,
    HasMax    = 1,
    HasConj   = 1,
    HasSetLinear = 1,
+    HasBlend  = 0,

    HasDiv    = 0,
    HasSqrt   = 0,
+    HasRsqrt  = 0,
    HasExp    = 0,
    HasLog    = 0,
+    HasLog1p  = 0,
+    HasLog10  = 0,
    HasPow    = 0,

    HasSin    = 0,
@@ -64,17 +71,37 @@ struct default_packet_traits
    HasTan    = 0,
    HasASin   = 0,
    HasACos   = 0,
-    HasATan   = 0
+    HasATan   = 0,
+    HasSinh   = 0,
+    HasCosh   = 0,
+    HasTanh   = 0,
+    HasLGamma = 0,
+    HasDiGamma = 0,
+    HasZeta = 0,
+    HasPolygamma = 0,
+    HasErf = 0,
+    HasErfc = 0,
+    HasIGamma = 0,
+    HasIGammac = 0,
+    HasBetaInc = 0,
+
+    HasRound  = 0,
+    HasFloor  = 0,
+    HasCeil   = 0,
+
+    HasSign   = 0
  };
 };

 template<typename T> struct packet_traits : default_packet_traits
 {
  typedef T type;
+  typedef T half;
  enum {
    Vectorizable = 0,
    size = 1,
-    AlignedOnScalar = 0
+    AlignedOnScalar = 0,
+    HasHalfPacket = 0
  };
  enum {
    HasAdd    = 0,
@@ -90,135 +117,239 @@ template<typename T> struct packet_traits : default_packet_traits
  };
 };

+template<typename T> struct packet_traits<const T> : packet_traits<T> { };
+
+template <typename Src, typename Tgt> struct type_casting_traits {
+  enum {
+    VectorizedCast = 0,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+
+
+/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a) {
+  return static_cast<TgtPacket>(a);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/) {
+  return static_cast<TgtPacket>(a);
+}
+
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket
+pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) {
+  return static_cast<TgtPacket>(a);
+}
+
 /** \internal \returns a + b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 padd(const Packet& a,
        const Packet& b) { return a+b; }

 /** \internal \returns a - b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 psub(const Packet& a,
        const Packet& b) { return a-b; }

 /** \internal \returns -a (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pnegate(const Packet& a) { return -a; }

 /** \internal \returns conj(a) (coeff-wise) */
-template<typename Packet> inline Packet
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pconj(const Packet& a) { return numext::conj(a); }

 /** \internal \returns a * b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmul(const Packet& a,
        const Packet& b) { return a*b; }

 /** \internal \returns a / b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pdiv(const Packet& a,
        const Packet& b) { return a/b; }

 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmin(const Packet& a,
-        const Packet& b) { using std::min; return (min)(a, b); }
+        const Packet& b) { return numext::mini(a, b); }

 /** \internal \returns the max of \a a and \a b  (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmax(const Packet& a,
-        const Packet& b) { using std::max; return (max)(a, b); }
+        const Packet& b) { return numext::maxi(a, b); }

 /** \internal \returns the absolute value of \a a */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pabs(const Packet& a) { using std::abs; return abs(a); }

+/** \internal \returns the phase angle of \a a */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+parg(const Packet& a) { using numext::arg; return arg(a); }
+
 /** \internal \returns the bitwise and of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pand(const Packet& a, const Packet& b) { return a & b; }

 /** \internal \returns the bitwise or of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 por(const Packet& a, const Packet& b) { return a | b; }

 /** \internal \returns the bitwise xor of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pxor(const Packet& a, const Packet& b) { return a ^ b; }

 /** \internal \returns the bitwise andnot of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pandnot(const Packet& a, const Packet& b) { return a & (!b); }

 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }

 /** \internal \returns a packet version of \a *from, (un-aligned load) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }

+/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
+
+/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pload1(const typename unpacket_traits<Packet>::type  *a) { return pset1<Packet>(*a); }
+
 /** \internal \returns a packet with elements of \a *from duplicated.
-  * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
-  * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
+  * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
+  * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
  * Currently, this function is only used for scalar * complex products.
- */
-template<typename Packet> inline Packet
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }

-/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
-template<typename Packet> inline Packet
-pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
+/** \internal \returns a packet with elements of \a *from quadrupled.
+  * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
+  * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
+  * Currently, this function is only used in matrix products.
+  * For packet-size smaller or equal to 4, this function is equivalent to pload1 
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+ploadquad(const typename unpacket_traits<Packet>::type* from)
+{ return pload1<Packet>(from); }
+
+/** \internal equivalent to
+  * \code
+  * a0 = pload1(a+0);
+  * a1 = pload1(a+1);
+  * a2 = pload1(a+2);
+  * a3 = pload1(a+3);
+  * \endcode
+  * \sa pset1, pload1, ploaddup, pbroadcast2
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
+                        Packet& a0, Packet& a1, Packet& a2, Packet& a3)
+{
+  a0 = pload1<Packet>(a+0);
+  a1 = pload1<Packet>(a+1);
+  a2 = pload1<Packet>(a+2);
+  a3 = pload1<Packet>(a+3);
+}
+
+/** \internal equivalent to
+  * \code
+  * a0 = pload1(a+0);
+  * a1 = pload1(a+1);
+  * \endcode
+  * \sa pset1, pload1, ploaddup, pbroadcast4
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC
+inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
+                        Packet& a0, Packet& a1)
+{
+  a0 = pload1<Packet>(a+0);
+  a1 = pload1<Packet>(a+1);
+}

 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
-template<typename Scalar> inline typename packet_traits<Scalar>::type
-plset(const Scalar& a) { return a; }
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
+plset(const typename unpacket_traits<Packet>::type& a) { return a; }

 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
-template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
 { (*to) = from; }

 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
-template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
-{ (*to) = from; }
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
+{  (*to) = from; }
+
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
+ { return ploadu<Packet>(from); }
+
+ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
+ { pstore(to, from); }

 /** \internal tries to do cache prefetching of \a addr */
-template<typename Scalar> inline void prefetch(const Scalar* addr)
+template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
 {
-#if !defined(_MSC_VER)
-__builtin_prefetch(addr);
+#ifdef __CUDA_ARCH__
+#if defined(__LP64__)
+  // 64-bit pointer operand constraint for inlined asm
+  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
+#else
+  // 32-bit pointer operand constraint for inlined asm
+  asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
+#endif
+#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
+  __builtin_prefetch(addr);
 #endif
 }

 /** \internal \returns the first element of a packet */
-template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
 { return a; }

 /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 preduxp(const Packet* vecs) { return vecs[0]; }

 /** \internal \returns the sum of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
+{ return a; }
+
+/** \internal \returns the sum of the elements of \a a by block of 4 elements.
+  * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
+  * For packet-size smaller or equal to 4, this boils down to a noop.
+  */
+template<typename Packet> EIGEN_DEVICE_FUNC inline
+typename conditional<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>::type
+predux_downto4(const Packet& a)
 { return a; }

 /** \internal \returns the product of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
 { return a; }

 /** \internal \returns the min of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
 { return a; }

 /** \internal \returns the max of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
 { return a; }

 /** \internal \returns the reversed elements of \a a*/
-template<typename Packet> inline Packet preverse(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
 { return a; }

-
 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
-template<typename Packet> inline Packet pcplxflip(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
 {
  // FIXME: uncomment the following in case we drop the internal imag and real functions.
 //   using std::imag;
@@ -250,6 +381,22 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pacos(const Packet& a) { using std::acos; return acos(a); }

+/** \internal \returns the arc tangent of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet patan(const Packet& a) { using std::atan; return atan(a); }
+
+/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet psinh(const Packet& a) { using std::sinh; return sinh(a); }
+
+/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); }
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); }
+
 /** \internal \returns the exp of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet pexp(const Packet& a) { using std::exp; return exp(a); }
@@ -258,10 +405,36 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); }
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet plog(const Packet& a) { using std::log; return log(a); }

+/** \internal \returns the log1p of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog1p(const Packet& a) { return numext::log1p(a); }
+
+/** \internal \returns the log10 of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plog10(const Packet& a) { using std::log10; return log10(a); }
+
 /** \internal \returns the square-root of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
 Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }

+/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet prsqrt(const Packet& a) {
+  return pdiv(pset1<Packet>(1), psqrt(a));
+}
+
+/** \internal \returns the rounded value of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pround(const Packet& a) { using numext::round; return round(a); }
+
+/** \internal \returns the floor of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
+
+/** \internal \returns the ceil of \a a (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
+
 /***************************************************************************
 * The following functions might not have to be overwritten for vectorized types
 ***************************************************************************/
@@ -275,34 +448,45 @@ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename u
 }

 /** \internal \returns a * b + c (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmadd(const Packet&  a,
         const Packet&  b,
         const Packet&  c)
 { return padd(pmul(a, b),c); }

 /** \internal \returns a packet version of \a *from.
-  * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */
-template<typename Packet, int LoadMode>
-inline Packet ploadt(const typename unpacket_traits<Packet>::type* from)
+  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template<typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
 {
-  if(LoadMode == Aligned)
+  if(Alignment >= unpacket_traits<Packet>::alignment)
    return pload<Packet>(from);
  else
    return ploadu<Packet>(from);
 }

 /** \internal copy the packet \a from to \a *to.
-  * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */
-template<typename Scalar, typename Packet, int LoadMode>
-inline void pstoret(Scalar* to, const Packet& from)
+  * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template<typename Scalar, typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
 {
-  if(LoadMode == Aligned)
+  if(Alignment >= unpacket_traits<Packet>::alignment)
    pstore(to, from);
  else
    pstoreu(to, from);
 }

+/** \internal \returns a packet version of \a *from.
+  * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
+  * hardware if available to speedup the loading of data that won't be modified
+  * by the current computation.
+  */
+template<typename Packet, int LoadMode>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
+{
+  return ploadt<Packet, LoadMode>(from);
+}
+
 /** \internal default implementation of palign() allowing partial specialization */
 template<int Offset,typename PacketType>
 struct palign_impl
@@ -336,15 +520,74 @@ inline void palign(PacketType& first, const PacketType& second)
 * Fast complex products (GCC generates a function call which is very slow)
 ***************************************************************************/

+// Eigen+CUDA does not support complexes.
+#ifndef __CUDACC__
+
 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }

 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }

+#endif
+
+
+/***************************************************************************
+ * PacketBlock, that is a collection of N packets where the number of words
+ * in the packet is a multiple of N.
+***************************************************************************/
+template <typename Packet,int N=unpacket_traits<Packet>::size> struct PacketBlock {
+  Packet packet[N];
+};
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline void
+ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
+  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
+}
+
+/***************************************************************************
+ * Selector, i.e. vector of N boolean values used to select (i.e. blend)
+ * words from 2 packets.
+***************************************************************************/
+template <size_t N> struct Selector {
+  bool select[N];
+};
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
+  return ifPacket.select[0] ? thenPacket : elsePacket;
+}
+
+/** \internal \returns \a a with the first coefficient replaced by the scalar b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertfirst(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+  // Default implementation based on pblend.
+  // It must be specialized for higher performance.
+  Selector<unpacket_traits<Packet>::size> mask;
+  mask.select[0] = true;
+  // This for loop should be optimized away by the compiler.
+  for(Index i=1; i<unpacket_traits<Packet>::size; ++i)
+    mask.select[i] = false;
+  return pblend(mask, pset1<Packet>(b), a);
+}
+
+/** \internal \returns \a a with the last coefficient replaced by the scalar b */
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
+pinsertlast(const Packet& a, typename unpacket_traits<Packet>::type b)
+{
+  // Default implementation based on pblend.
+  // It must be specialized for higher performance.
+  Selector<unpacket_traits<Packet>::size> mask;
+  // This for loop should be optimized away by the compiler.
+  for(Index i=0; i<unpacket_traits<Packet>::size-1; ++i)
+    mask.select[i] = false;
+  mask.select[unpacket_traits<Packet>::size-1] = true;
+  return pblend(mask, pset1<Packet>(b), a);
+}
+
 } // end namespace internal

 } // end namespace Eigen

 #endif // EIGEN_GENERIC_PACKET_MATH_H
-
--- a/external/eigen3/Eigen/src/Core/GlobalFunctions.h
+++ b/external/eigen3/Eigen/src/Core/GlobalFunctions.h
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2010-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -11,13 +11,30 @@
 #ifndef EIGEN_GLOBAL_FUNCTIONS_H
 #define EIGEN_GLOBAL_FUNCTIONS_H

-#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+
+#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
+  /** \returns an expression of the coefficient-wise DOC_OP of \a x
+
+    DOC_DETAILS
+
+    \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_##NAME">Math functions</a>, class CwiseUnaryOp
+    */ \
+  template<typename Derived> \
+  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
+  NAME(const Eigen::ArrayBase<Derived>& x);
+
+#else
+
+#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \
  template<typename Derived> \
  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> \
-  NAME(const Eigen::ArrayBase<Derived>& x) { \
-    return x.derived(); \
+  (NAME)(const Eigen::ArrayBase<Derived>& x) { \
+    return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
  }

+#endif // EIGEN_PARSED_BY_DOXYGEN
+
 #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \
  \
  template<typename Derived> \
@@ -30,55 +47,133 @@
  { \
    static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) \
    { \
-      return x.derived(); \
+      return typename NAME##_retval<ArrayBase<Derived> >::type(x.derived()); \
    } \
  };

-
 namespace Eigen
 {
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op)
-  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
+  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
  
+  /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
+    *
+    * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
+    *
+    * \sa ArrayBase::pow()
+    *
+    * \relates ArrayBase
+    */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+  template<typename Derived,typename ScalarExponent>
+  inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >
+  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
+#else
+  template<typename Derived,typename ScalarExponent>
+  inline typename internal::enable_if<   !(internal::is_same<typename Derived::Scalar,ScalarExponent>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent),
+          const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type
+  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
+    return x.derived().pow(exponent);
+  }
+
  template<typename Derived>
-  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
+  inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow)
  pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
    return x.derived().pow(exponent);
  }
+#endif

-  template<typename Derived>
-  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>
-  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<Derived>& exponents) 
+  /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
+    *
+    * This function computes the coefficient-wise power.
+    *
+    * Example: \include Cwise_array_power_array.cpp
+    * Output: \verbinclude Cwise_array_power_array.out
+    * 
+    * \sa ArrayBase::pow()
+    *
+    * \relates ArrayBase
+    */
+  template<typename Derived,typename ExponentDerived>
+  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
+  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) 
  {
-    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const Derived, const Derived>(
+    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
      x.derived(),
      exponents.derived()
    );
  }
  
-  /**
-  * \brief Component-wise division of a scalar by array elements.
-  **/
-  template <typename Derived>
-  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
-    operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
+  /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
+    *
+    * This function computes the coefficient-wise power between a scalar and an array of exponents.
+    *
+    * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar).
+    *
+    * Example: \include Cwise_scalar_power_array.cpp
+    * Output: \verbinclude Cwise_scalar_power_array.out
+    * 
+    * \sa ArrayBase::pow()
+    *
+    * \relates ArrayBase
+    */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+  template<typename Scalar,typename Derived>
+  inline const CwiseBinaryOp<internal::scalar_pow_op<Scalar,Derived::Scalar>,Constant<Scalar>,Derived>
+  pow(const Scalar& x,const Eigen::ArrayBase<Derived>& x);
+#else
+  template<typename Scalar, typename Derived>
+  inline typename internal::enable_if<   !(internal::is_same<typename Derived::Scalar,Scalar>::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar),
+          const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type
+  pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
  {
-    return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
-      a.derived(),
-      Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>(s)  
-    );
+    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)(
+            typename internal::plain_constant_type<Derived,Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
  }

+  template<typename Derived>
+  inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)
+  pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
+  {
+    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)(
+      typename internal::plain_constant_type<Derived,typename Derived::Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
+  }
+#endif
+
+
  namespace internal
  {
    EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op)

--- a/external/eigen3/Eigen/src/Core/IO.h
+++ b/external/eigen3/Eigen/src/Core/IO.h
@@ -49,7 +49,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
  */
 struct IOFormat
 {
-  /** Default contructor, see class IOFormat for the meaning of the parameters */
+  /** Default constructor, see class IOFormat for the meaning of the parameters */
  IOFormat(int _precision = StreamPrecision, int _flags = 0,
    const std::string& _coeffSeparator = " ",
    const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="",
@@ -57,6 +57,10 @@ struct IOFormat
  : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator),
    rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags)
  {
+    // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
+    // don't add rowSpacer if columns are not to be aligned
+    if((flags & DontAlignCols))
+      return;
    int i = int(matSuffix.length())-1;
    while (i>=0 && matSuffix[i]!='\n')
    {
@@ -76,7 +80,7 @@ struct IOFormat
  *
  * \brief Pseudo expression providing matrix output with given format
  *
-  * \param ExpressionType the type of the object on which IO stream operations are performed
+  * \tparam ExpressionType the type of the object on which IO stream operations are performed
  *
  * This class represents an expression with stream operators controlled by a given IOFormat.
  * It is the return type of DenseBase::format()
@@ -101,52 +105,24 @@ class WithFormat
    }

  protected:
-    const typename ExpressionType::Nested m_matrix;
+    typename ExpressionType::Nested m_matrix;
    IOFormat m_format;
 };

-/** \returns a WithFormat proxy object allowing to print a matrix the with given
-  * format \a fmt.
-  *
-  * See class IOFormat for some examples.
-  *
-  * \sa class IOFormat, class WithFormat
-  */
-template<typename Derived>
-inline const WithFormat<Derived>
-DenseBase<Derived>::format(const IOFormat& fmt) const
-{
-  return WithFormat<Derived>(derived(), fmt);
-}
-
 namespace internal {

-template<typename Scalar, bool IsInteger>
-struct significant_decimals_default_impl
-{
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-  static inline int run()
-  {
-    using std::ceil;
-    using std::log;
-    return cast<RealScalar,int>(ceil(-log(NumTraits<RealScalar>::epsilon())/log(RealScalar(10))));
-  }
-};
-
+// NOTE: This helper is kept for backward compatibility with previous code specializing
+//       this internal::significant_decimals_impl structure. In the future we should directly
+//       call digits10() which has been introduced in July 2016 in 3.3.
 template<typename Scalar>
-struct significant_decimals_default_impl<Scalar, true>
+struct significant_decimals_impl
 {
  static inline int run()
  {
-    return 0;
+    return NumTraits<Scalar>::digits10();
  }
 };

-template<typename Scalar>
-struct significant_decimals_impl
-  : significant_decimals_default_impl<Scalar, NumTraits<Scalar>::IsInteger>
-{};
-
 /** \internal
  * print the matrix \a _m to the output stream \a s using the output format \a fmt */
 template<typename Derived>
@@ -160,7 +136,6 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat&
  
  typename Derived::Nested m = _m;
  typedef typename Derived::Scalar Scalar;
-  typedef typename Derived::Index Index;

  Index width = 0;


--- a/external/eigen3/Eigen/src/Core/Inverse.h
+++ b/external/eigen3/Eigen/src/Core/Inverse.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_INVERSE_H
+#define EIGEN_INVERSE_H
+
+namespace Eigen { 
+
+template<typename XprType,typename StorageKind> class InverseImpl;
+
+namespace internal {
+
+template<typename XprType>
+struct traits<Inverse<XprType> >
+  : traits<typename XprType::PlainObject>
+{
+  typedef typename XprType::PlainObject PlainObject;
+  typedef traits<PlainObject> BaseTraits;
+  enum {
+    Flags = BaseTraits::Flags & RowMajorBit
+  };
+};
+
+} // end namespace internal
+
+/** \class Inverse
+  *
+  * \brief Expression of the inverse of another expression
+  *
+  * \tparam XprType the type of the expression we are taking the inverse
+  *
+  * This class represents an abstract expression of A.inverse()
+  * and most of the time this is the only way it is used.
+  *
+  */
+template<typename XprType>
+class Inverse : public InverseImpl<XprType,typename internal::traits<XprType>::StorageKind>
+{
+public:
+  typedef typename XprType::StorageIndex StorageIndex;
+  typedef typename XprType::PlainObject                       PlainObject;
+  typedef typename XprType::Scalar                            Scalar;
+  typedef typename internal::ref_selector<XprType>::type      XprTypeNested;
+  typedef typename internal::remove_all<XprTypeNested>::type  XprTypeNestedCleaned;
+  typedef typename internal::ref_selector<Inverse>::type Nested;
+  typedef typename internal::remove_all<XprType>::type NestedExpression;
+  
+  explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr)
+    : m_xpr(xpr)
+  {}
+
+  EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); }
+
+  EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
+
+protected:
+  XprTypeNested m_xpr;
+};
+
+// Generic API dispatcher
+template<typename XprType, typename StorageKind>
+class InverseImpl
+  : public internal::generic_xpr_base<Inverse<XprType> >::type
+{
+public:
+  typedef typename internal::generic_xpr_base<Inverse<XprType> >::type Base;
+  typedef typename XprType::Scalar Scalar;
+private:
+
+  Scalar coeff(Index row, Index col) const;
+  Scalar coeff(Index i) const;
+};
+
+namespace internal {
+
+/** \internal
+  * \brief Default evaluator for Inverse expression.
+  * 
+  * This default evaluator for Inverse expression simply evaluate the inverse into a temporary
+  * by a call to internal::call_assignment_no_alias.
+  * Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for
+  * there own nested expression.
+  *
+  * \sa class Inverse
+  */
+template<typename ArgType>
+struct unary_evaluator<Inverse<ArgType> >
+  : public evaluator<typename Inverse<ArgType>::PlainObject>
+{
+  typedef Inverse<ArgType> InverseType;
+  typedef typename InverseType::PlainObject PlainObject;
+  typedef evaluator<PlainObject> Base;
+  
+  enum { Flags = Base::Flags | EvalBeforeNestingBit };
+
+  unary_evaluator(const InverseType& inv_xpr)
+    : m_result(inv_xpr.rows(), inv_xpr.cols())
+  {
+    ::new (static_cast<Base*>(this)) Base(m_result);
+    internal::call_assignment_no_alias(m_result, inv_xpr);
+  }
+  
+protected:
+  PlainObject m_result;
+};
+  
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_H