From ad2df8fdd30445f47e0c29735a26fcd2dacb90aa Mon Sep 17 00:00:00 2001 From: Luca Fulchir Date: Thu, 29 Mar 2018 21:51:56 +0200 Subject: [PATCH] Update to Eigen 3.3.4 Signed-off-by: Luca Fulchir --- external/eigen3/.hg_archival.txt | 6 +- external/eigen3/.hgeol | 3 + external/eigen3/.hgignore | 2 + external/eigen3/.hgtags | 22 +- external/eigen3/CMakeLists.txt | 218 +- external/eigen3/COPYING.MINPACK | 104 +- external/eigen3/CTestConfig.cmake | 4 +- external/eigen3/Eigen/Array | 11 - external/eigen3/Eigen/CMakeLists.txt | 2 +- external/eigen3/Eigen/Cholesky | 17 +- external/eigen3/Eigen/CholmodSupport | 11 +- external/eigen3/Eigen/Core | 290 +- external/eigen3/Eigen/Eigen | 2 +- external/eigen3/Eigen/Eigen2Support | 95 - external/eigen3/Eigen/Eigenvalues | 15 +- external/eigen3/Eigen/Geometry | 61 +- external/eigen3/Eigen/Householder | 7 + external/eigen3/Eigen/IterativeLinearSolvers | 24 +- external/eigen3/Eigen/Jacobi | 7 + external/eigen3/Eigen/LU | 21 +- external/eigen3/Eigen/LeastSquares | 32 - external/eigen3/Eigen/MetisSupport | 7 + external/eigen3/Eigen/OrderingMethods | 7 + external/eigen3/Eigen/PaStiXSupport | 12 +- external/eigen3/Eigen/PardisoSupport | 9 +- external/eigen3/Eigen/QR | 26 +- external/eigen3/Eigen/QtAlignedMalloc | 10 +- external/eigen3/Eigen/SPQRSupport | 9 +- external/eigen3/Eigen/SVD | 26 +- external/eigen3/Eigen/Sparse | 15 +- external/eigen3/Eigen/SparseCholesky | 2 - external/eigen3/Eigen/SparseCore | 33 +- external/eigen3/Eigen/SparseLU | 3 - external/eigen3/Eigen/SparseQR | 10 +- external/eigen3/Eigen/StdDeque | 2 +- external/eigen3/Eigen/StdList | 2 +- external/eigen3/Eigen/StdVector | 2 +- external/eigen3/Eigen/SuperLUSupport | 13 +- external/eigen3/Eigen/UmfPackSupport | 10 +- external/eigen3/Eigen/src/CMakeLists.txt | 7 - .../eigen3/Eigen/src/Cholesky/CMakeLists.txt | 6 - external/eigen3/Eigen/src/Cholesky/LDLT.h | 268 +- external/eigen3/Eigen/src/Cholesky/LLT.h | 168 +- .../src/Cholesky/{LLT_MKL.h => LLT_LAPACKE.h} | 45 +- .../Eigen/src/CholmodSupport/CMakeLists.txt | 6 - .../Eigen/src/CholmodSupport/CholmodSupport.h | 284 +- external/eigen3/Eigen/src/Core/Array.h | 154 +- external/eigen3/Eigen/src/Core/ArrayBase.h | 78 +- external/eigen3/Eigen/src/Core/ArrayWrapper.h | 151 +- external/eigen3/Eigen/src/Core/Assign.h | 540 +- .../eigen3/Eigen/src/Core/AssignEvaluator.h | 935 + external/eigen3/Eigen/src/Core/Assign_MKL.h | 256 +- external/eigen3/Eigen/src/Core/BandMatrix.h | 61 +- external/eigen3/Eigen/src/Core/Block.h | 249 +- external/eigen3/Eigen/src/Core/BooleanRedux.h | 42 +- external/eigen3/Eigen/src/Core/CMakeLists.txt | 10 - .../eigen3/Eigen/src/Core/CommaInitializer.h | 15 +- .../Eigen/src/Core/ConditionEstimator.h | 175 + .../eigen3/Eigen/src/Core/CoreEvaluators.h | 1671 ++ .../eigen3/Eigen/src/Core/CoreIterators.h | 140 +- .../eigen3/Eigen/src/Core/CwiseBinaryOp.h | 166 +- .../eigen3/Eigen/src/Core/CwiseNullaryOp.h | 322 +- .../eigen3/Eigen/src/Core/CwiseTernaryOp.h | 197 + external/eigen3/Eigen/src/Core/CwiseUnaryOp.h | 111 +- .../eigen3/Eigen/src/Core/CwiseUnaryView.h | 81 +- external/eigen3/Eigen/src/Core/DenseBase.h | 388 +- .../eigen3/Eigen/src/Core/DenseCoeffsBase.h | 279 +- external/eigen3/Eigen/src/Core/DenseStorage.h | 442 +- external/eigen3/Eigen/src/Core/Diagonal.h | 66 +- .../eigen3/Eigen/src/Core/DiagonalMatrix.h | 140 +- .../eigen3/Eigen/src/Core/DiagonalProduct.h | 107 +- external/eigen3/Eigen/src/Core/Dot.h | 160 +- external/eigen3/Eigen/src/Core/EigenBase.h | 52 +- external/eigen3/Eigen/src/Core/Flagged.h | 140 - .../Eigen/src/Core/ForceAlignedAccess.h | 24 +- external/eigen3/Eigen/src/Core/Functors.h | 1029 - external/eigen3/Eigen/src/Core/Fuzzy.h | 13 +- .../eigen3/Eigen/src/Core/GeneralProduct.h | 501 +- .../eigen3/Eigen/src/Core/GenericPacketMath.h | 347 +- .../eigen3/Eigen/src/Core/GlobalFunctions.h | 161 +- external/eigen3/Eigen/src/Core/IO.h | 49 +- external/eigen3/Eigen/src/Core/Inverse.h | 118 + external/eigen3/Eigen/src/Core/Map.h | 110 +- external/eigen3/Eigen/src/Core/MapBase.h | 98 +- .../eigen3/Eigen/src/Core/MathFunctions.h | 829 +- .../eigen3/Eigen/src/Core/MathFunctionsImpl.h | 78 + external/eigen3/Eigen/src/Core/Matrix.h | 241 +- external/eigen3/Eigen/src/Core/MatrixBase.h | 317 +- external/eigen3/Eigen/src/Core/NestByValue.h | 35 +- external/eigen3/Eigen/src/Core/NoAlias.h | 62 +- external/eigen3/Eigen/src/Core/NumTraits.h | 138 +- .../eigen3/Eigen/src/Core/PermutationMatrix.h | 365 +- .../eigen3/Eigen/src/Core/PlainObjectBase.h | 431 +- external/eigen3/Eigen/src/Core/Product.h | 186 + external/eigen3/Eigen/src/Core/ProductBase.h | 290 - .../eigen3/Eigen/src/Core/ProductEvaluators.h | 1105 ++ external/eigen3/Eigen/src/Core/Random.h | 58 +- external/eigen3/Eigen/src/Core/Redux.h | 210 +- external/eigen3/Eigen/src/Core/Ref.h | 199 +- external/eigen3/Eigen/src/Core/Replicate.h | 95 +- .../eigen3/Eigen/src/Core/ReturnByValue.h | 50 +- external/eigen3/Eigen/src/Core/Reverse.h | 223 +- external/eigen3/Eigen/src/Core/Select.h | 22 +- .../eigen3/Eigen/src/Core/SelfAdjointView.h | 292 +- .../eigen3/Eigen/src/Core/SelfCwiseBinaryOp.h | 172 +- external/eigen3/Eigen/src/Core/Solve.h | 188 + .../eigen3/Eigen/src/Core/SolveTriangular.h | 77 +- external/eigen3/Eigen/src/Core/SolverBase.h | 130 + external/eigen3/Eigen/src/Core/StableNorm.h | 54 +- external/eigen3/Eigen/src/Core/Stride.h | 25 +- external/eigen3/Eigen/src/Core/Swap.h | 149 +- external/eigen3/Eigen/src/Core/Transpose.h | 185 +- .../eigen3/Eigen/src/Core/Transpositions.h | 246 +- .../eigen3/Eigen/src/Core/TriangularMatrix.h | 1102 +- external/eigen3/Eigen/src/Core/VectorBlock.h | 27 +- external/eigen3/Eigen/src/Core/VectorwiseOp.h | 339 +- external/eigen3/Eigen/src/Core/Visitor.h | 77 +- .../eigen3/Eigen/src/Core/arch/AVX/Complex.h | 483 + .../Eigen/src/Core/arch/AVX/MathFunctions.h | 439 + .../Eigen/src/Core/arch/AVX/PacketMath.h | 633 + .../Eigen/src/Core/arch/AVX/TypeCasting.h | 51 + .../src/Core/arch/AVX512/MathFunctions.h | 396 + .../Eigen/src/Core/arch/AVX512/PacketMath.h | 1316 ++ .../src/Core/arch/AltiVec/CMakeLists.txt | 6 - .../Eigen/src/Core/arch/AltiVec/Complex.h | 354 +- .../src/Core/arch/AltiVec/MathFunctions.h | 322 + .../Eigen/src/Core/arch/AltiVec/PacketMath.h | 796 +- .../eigen3/Eigen/src/Core/arch/CMakeLists.txt | 4 - .../eigen3/Eigen/src/Core/arch/CUDA/Complex.h | 103 + .../eigen3/Eigen/src/Core/arch/CUDA/Half.h | 635 + .../Eigen/src/Core/arch/CUDA/MathFunctions.h | 91 + .../Eigen/src/Core/arch/CUDA/PacketMath.h | 333 + .../Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 1123 ++ .../Eigen/src/Core/arch/CUDA/TypeCasting.h | 212 + .../src/Core/arch/Default/CMakeLists.txt | 6 - .../Eigen/src/Core/arch/NEON/CMakeLists.txt | 6 - .../eigen3/Eigen/src/Core/arch/NEON/Complex.h | 251 +- .../Eigen/src/Core/arch/NEON/MathFunctions.h | 91 + .../Eigen/src/Core/arch/NEON/PacketMath.h | 413 +- .../Eigen/src/Core/arch/SSE/CMakeLists.txt | 6 - .../eigen3/Eigen/src/Core/arch/SSE/Complex.h | 99 +- .../Eigen/src/Core/arch/SSE/MathFunctions.h | 113 +- .../Eigen/src/Core/arch/SSE/PacketMath.h | 416 +- .../Eigen/src/Core/arch/SSE/TypeCasting.h | 77 + .../Eigen/src/Core/arch/ZVector/Complex.h | 394 + .../src/Core/arch/ZVector/MathFunctions.h | 137 + .../Eigen/src/Core/arch/ZVector/PacketMath.h | 945 + .../src/Core/functors/AssignmentFunctors.h | 168 + .../Eigen/src/Core/functors/BinaryFunctors.h | 482 + .../Eigen/src/Core/functors/NullaryFunctors.h | 188 + .../Eigen/src/Core/functors/StlFunctors.h | 132 + .../Eigen/src/Core/functors/TernaryFunctors.h | 25 + .../Eigen/src/Core/functors/UnaryFunctors.h | 792 + .../Eigen/src/Core/products/CMakeLists.txt | 6 - .../src/Core/products/CoeffBasedProduct.h | 476 - .../Core/products/GeneralBlockPanelKernel.h | 2244 ++- .../src/Core/products/GeneralMatrixMatrix.h | 371 +- .../products/GeneralMatrixMatrixTriangular.h | 145 +- ...h => GeneralMatrixMatrixTriangular_BLAS.h} | 67 +- ...atrix_MKL.h => GeneralMatrixMatrix_BLAS.h} | 47 +- .../src/Core/products/GeneralMatrixVector.h | 305 +- ...ector_MKL.h => GeneralMatrixVector_BLAS.h} | 68 +- .../Eigen/src/Core/products/Parallelizer.h | 67 +- .../Core/products/SelfadjointMatrixMatrix.h | 335 +- ...x_MKL.h => SelfadjointMatrixMatrix_BLAS.h} | 136 +- .../Core/products/SelfadjointMatrixVector.h | 127 +- ...r_MKL.h => SelfadjointMatrixVector_BLAS.h} | 49 +- .../src/Core/products/SelfadjointProduct.h | 26 +- .../Core/products/SelfadjointRank2Update.h | 8 +- .../Core/products/TriangularMatrixMatrix.h | 138 +- ...ix_MKL.h => TriangularMatrixMatrix_BLAS.h} | 111 +- .../Core/products/TriangularMatrixVector.h | 182 +- ...or_MKL.h => TriangularMatrixVector_BLAS.h} | 96 +- .../Core/products/TriangularSolverMatrix.h | 73 +- ...ix_MKL.h => TriangularSolverMatrix_BLAS.h} | 56 +- .../Core/products/TriangularSolverVector.h | 24 +- .../eigen3/Eigen/src/Core/util/BlasUtil.h | 215 +- .../eigen3/Eigen/src/Core/util/CMakeLists.txt | 6 - .../eigen3/Eigen/src/Core/util/Constants.h | 160 +- .../src/Core/util/DisableStupidWarnings.h | 31 +- .../Eigen/src/Core/util/ForwardDeclarations.h | 134 +- .../eigen3/Eigen/src/Core/util/MKL_support.h | 50 +- external/eigen3/Eigen/src/Core/util/Macros.h | 544 +- external/eigen3/Eigen/src/Core/util/Memory.h | 493 +- external/eigen3/Eigen/src/Core/util/Meta.h | 351 +- .../src/Core/util/ReenableStupidWarnings.h | 10 + .../eigen3/Eigen/src/Core/util/StaticAssert.h | 40 +- .../eigen3/Eigen/src/Core/util/XprHelper.h | 582 +- .../eigen3/Eigen/src/Eigen2Support/Block.h | 126 - .../Eigen/src/Eigen2Support/CMakeLists.txt | 8 - .../eigen3/Eigen/src/Eigen2Support/Cwise.h | 192 - .../Eigen/src/Eigen2Support/CwiseOperators.h | 298 - .../src/Eigen2Support/Geometry/AlignedBox.h | 159 - .../Eigen/src/Eigen2Support/Geometry/All.h | 115 - .../src/Eigen2Support/Geometry/AngleAxis.h | 214 - .../src/Eigen2Support/Geometry/CMakeLists.txt | 6 - .../src/Eigen2Support/Geometry/Hyperplane.h | 254 - .../Eigen2Support/Geometry/ParametrizedLine.h | 141 - .../src/Eigen2Support/Geometry/Quaternion.h | 495 - .../src/Eigen2Support/Geometry/Rotation2D.h | 145 - .../src/Eigen2Support/Geometry/RotationBase.h | 123 - .../src/Eigen2Support/Geometry/Scaling.h | 167 - .../src/Eigen2Support/Geometry/Transform.h | 786 - .../src/Eigen2Support/Geometry/Translation.h | 184 - external/eigen3/Eigen/src/Eigen2Support/LU.h | 120 - .../eigen3/Eigen/src/Eigen2Support/Lazy.h | 71 - .../Eigen/src/Eigen2Support/LeastSquares.h | 169 - .../eigen3/Eigen/src/Eigen2Support/Macros.h | 20 - .../Eigen/src/Eigen2Support/MathFunctions.h | 57 - .../eigen3/Eigen/src/Eigen2Support/Memory.h | 45 - .../eigen3/Eigen/src/Eigen2Support/Meta.h | 75 - .../eigen3/Eigen/src/Eigen2Support/Minor.h | 117 - external/eigen3/Eigen/src/Eigen2Support/QR.h | 67 - external/eigen3/Eigen/src/Eigen2Support/SVD.h | 637 - .../src/Eigen2Support/TriangularSolver.h | 42 - .../Eigen/src/Eigen2Support/VectorBlock.h | 94 - .../Eigen/src/Eigenvalues/CMakeLists.txt | 6 - .../src/Eigenvalues/ComplexEigenSolver.h | 25 +- .../Eigen/src/Eigenvalues/ComplexSchur.h | 19 +- ...plexSchur_MKL.h => ComplexSchur_LAPACKE.h} | 40 +- .../Eigen/src/Eigenvalues/EigenSolver.h | 113 +- .../src/Eigenvalues/GeneralizedEigenSolver.h | 217 +- .../GeneralizedSelfAdjointEigenSolver.h | 3 +- .../src/Eigenvalues/HessenbergDecomposition.h | 15 +- .../eigen3/Eigen/src/Eigenvalues/RealQZ.h | 46 +- .../eigen3/Eigen/src/Eigenvalues/RealSchur.h | 41 +- .../{RealSchur_MKL.h => RealSchur_LAPACKE.h} | 38 +- .../src/Eigenvalues/SelfAdjointEigenSolver.h | 273 +- ...MKL.h => SelfAdjointEigenSolver_LAPACKE.h} | 42 +- .../src/Eigenvalues/Tridiagonalization.h | 35 +- .../eigen3/Eigen/src/Geometry/AlignedBox.h | 96 +- .../eigen3/Eigen/src/Geometry/AngleAxis.h | 85 +- .../eigen3/Eigen/src/Geometry/CMakeLists.txt | 8 - .../eigen3/Eigen/src/Geometry/EulerAngles.h | 22 +- .../eigen3/Eigen/src/Geometry/Homogeneous.h | 290 +- .../eigen3/Eigen/src/Geometry/Hyperplane.h | 64 +- .../eigen3/Eigen/src/Geometry/OrthoMethods.h | 58 +- .../Eigen/src/Geometry/ParametrizedLine.h | 60 +- .../eigen3/Eigen/src/Geometry/Quaternion.h | 227 +- .../eigen3/Eigen/src/Geometry/Rotation2D.h | 85 +- .../eigen3/Eigen/src/Geometry/RotationBase.h | 48 +- external/eigen3/Eigen/src/Geometry/Scaling.h | 38 +- .../eigen3/Eigen/src/Geometry/Transform.h | 254 +- .../eigen3/Eigen/src/Geometry/Translation.h | 54 +- external/eigen3/Eigen/src/Geometry/Umeyama.h | 19 +- .../Eigen/src/Geometry/arch/CMakeLists.txt | 6 - .../Eigen/src/Geometry/arch/Geometry_SSE.h | 86 +- .../Eigen/src/Householder/BlockHouseholder.h | 77 +- .../Eigen/src/Householder/CMakeLists.txt | 6 - .../Eigen/src/Householder/Householder.h | 4 +- .../src/Householder/HouseholderSequence.h | 56 +- .../BasicPreconditioners.h | 129 +- .../src/IterativeLinearSolvers/BiCGSTAB.h | 97 +- .../src/IterativeLinearSolvers/CMakeLists.txt | 6 - .../ConjugateGradient.h | 137 +- .../IncompleteCholesky.h | 400 + .../IterativeLinearSolvers/IncompleteLUT.h | 138 +- .../IterativeSolverBase.h | 340 +- .../LeastSquareConjugateGradient.h | 216 + .../IterativeLinearSolvers/SolveWithGuess.h | 115 + .../eigen3/Eigen/src/Jacobi/CMakeLists.txt | 6 - external/eigen3/Eigen/src/Jacobi/Jacobi.h | 78 +- external/eigen3/Eigen/src/LU/CMakeLists.txt | 8 - external/eigen3/Eigen/src/LU/Determinant.h | 2 +- external/eigen3/Eigen/src/LU/FullPivLU.h | 314 +- .../Eigen/src/LU/{Inverse.h => InverseImpl.h} | 79 +- external/eigen3/Eigen/src/LU/PartialPivLU.h | 252 +- ...tialPivLU_MKL.h => PartialPivLU_LAPACKE.h} | 26 +- .../eigen3/Eigen/src/LU/arch/CMakeLists.txt | 6 - .../eigen3/Eigen/src/LU/arch/Inverse_SSE.h | 23 +- .../Eigen/src/MetisSupport/CMakeLists.txt | 6 - .../Eigen/src/MetisSupport/MetisSupport.h | 18 +- .../eigen3/Eigen/src/OrderingMethods/Amd.h | 83 +- .../Eigen/src/OrderingMethods/CMakeLists.txt | 6 - .../Eigen/src/OrderingMethods/Eigen_Colamd.h | 412 +- .../Eigen/src/OrderingMethods/Ordering.h | 51 +- .../Eigen/src/PaStiXSupport/CMakeLists.txt | 6 - .../Eigen/src/PaStiXSupport/PaStiXSupport.h | 151 +- .../Eigen/src/PardisoSupport/CMakeLists.txt | 6 - .../Eigen/src/PardisoSupport/PardisoSupport.h | 230 +- external/eigen3/Eigen/src/QR/CMakeLists.txt | 6 - .../eigen3/Eigen/src/QR/ColPivHouseholderQR.h | 278 +- ...QR_MKL.h => ColPivHouseholderQR_LAPACKE.h} | 47 +- .../src/QR/CompleteOrthogonalDecomposition.h | 562 + .../Eigen/src/QR/FullPivHouseholderQR.h | 189 +- external/eigen3/Eigen/src/QR/HouseholderQR.h | 116 +- ...holderQR_MKL.h => HouseholderQR_LAPACKE.h} | 29 +- .../Eigen/src/SPQRSupport/CMakeLists.txt | 6 - .../src/SPQRSupport/SuiteSparseQRSupport.h | 123 +- external/eigen3/Eigen/src/SVD/BDCSVD.h | 1231 ++ external/eigen3/Eigen/src/SVD/CMakeLists.txt | 6 - external/eigen3/Eigen/src/SVD/JacobiSVD.h | 332 +- .../{JacobiSVD_MKL.h => JacobiSVD_LAPACKE.h} | 48 +- .../{unsupported => }/Eigen/src/SVD/SVDBase.h | 199 +- .../Eigen/src/SVD/UpperBidiagonalization.h | 328 +- .../Eigen/src/SparseCholesky/CMakeLists.txt | 6 - .../src/SparseCholesky/SimplicialCholesky.h | 272 +- .../SparseCholesky/SimplicialCholesky_impl.h | 34 +- .../eigen3/Eigen/src/SparseCore/AmbiVector.h | 98 +- .../Eigen/src/SparseCore/CMakeLists.txt | 6 - .../Eigen/src/SparseCore/CompressedStorage.h | 138 +- .../ConservativeSparseSparseProduct.h | 216 +- .../Eigen/src/SparseCore/MappedSparseMatrix.h | 164 +- .../Eigen/src/SparseCore/SparseAssign.h | 216 + .../eigen3/Eigen/src/SparseCore/SparseBlock.h | 1226 +- .../Eigen/src/SparseCore/SparseColEtree.h | 44 +- .../src/SparseCore/SparseCompressedBase.h | 341 + .../src/SparseCore/SparseCwiseBinaryOp.h | 666 +- .../Eigen/src/SparseCore/SparseCwiseUnaryOp.h | 155 +- .../Eigen/src/SparseCore/SparseDenseProduct.h | 417 +- .../src/SparseCore/SparseDiagonalProduct.h | 228 +- .../eigen3/Eigen/src/SparseCore/SparseDot.h | 17 +- .../eigen3/Eigen/src/SparseCore/SparseFuzzy.h | 29 +- .../eigen3/Eigen/src/SparseCore/SparseMap.h | 305 + .../Eigen/src/SparseCore/SparseMatrix.h | 637 +- .../Eigen/src/SparseCore/SparseMatrixBase.h | 269 +- .../Eigen/src/SparseCore/SparsePermutation.h | 170 +- .../Eigen/src/SparseCore/SparseProduct.h | 291 +- .../eigen3/Eigen/src/SparseCore/SparseRedux.h | 5 +- .../eigen3/Eigen/src/SparseCore/SparseRef.h | 397 + .../src/SparseCore/SparseSelfAdjointView.h | 559 +- .../Eigen/src/SparseCore/SparseSolverBase.h | 124 + .../SparseSparseProductWithPruning.h | 84 +- .../Eigen/src/SparseCore/SparseTranspose.h | 99 +- .../src/SparseCore/SparseTriangularView.h | 252 +- .../eigen3/Eigen/src/SparseCore/SparseUtil.h | 106 +- .../Eigen/src/SparseCore/SparseVector.h | 208 +- .../eigen3/Eigen/src/SparseCore/SparseView.h | 230 +- .../Eigen/src/SparseCore/TriangularSolver.h | 117 +- .../eigen3/Eigen/src/SparseLU/CMakeLists.txt | 6 - external/eigen3/Eigen/src/SparseLU/SparseLU.h | 221 +- .../eigen3/Eigen/src/SparseLU/SparseLUImpl.h | 10 +- .../Eigen/src/SparseLU/SparseLU_Memory.h | 15 +- .../Eigen/src/SparseLU/SparseLU_Structs.h | 3 +- .../src/SparseLU/SparseLU_SupernodalMatrix.h | 69 +- .../Eigen/src/SparseLU/SparseLU_Utils.h | 10 +- .../Eigen/src/SparseLU/SparseLU_column_bmod.h | 7 +- .../Eigen/src/SparseLU/SparseLU_column_dfs.h | 38 +- .../src/SparseLU/SparseLU_copy_to_ucol.h | 7 +- .../Eigen/src/SparseLU/SparseLU_gemm_kernel.h | 93 +- .../src/SparseLU/SparseLU_heap_relax_snode.h | 21 +- .../Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 52 +- .../Eigen/src/SparseLU/SparseLU_panel_bmod.h | 6 +- .../Eigen/src/SparseLU/SparseLU_panel_dfs.h | 44 +- .../Eigen/src/SparseLU/SparseLU_pivotL.h | 12 +- .../Eigen/src/SparseLU/SparseLU_pruneL.h | 7 +- .../Eigen/src/SparseLU/SparseLU_relax_snode.h | 12 +- .../eigen3/Eigen/src/SparseQR/CMakeLists.txt | 6 - external/eigen3/Eigen/src/SparseQR/SparseQR.h | 187 +- .../Eigen/src/StlSupport/CMakeLists.txt | 6 - .../eigen3/Eigen/src/StlSupport/StdDeque.h | 2 +- .../eigen3/Eigen/src/StlSupport/StdList.h | 4 +- .../eigen3/Eigen/src/StlSupport/StdVector.h | 5 + .../eigen3/Eigen/src/StlSupport/details.h | 16 +- .../Eigen/src/SuperLUSupport/CMakeLists.txt | 6 - .../Eigen/src/SuperLUSupport/SuperLUSupport.h | 209 +- .../Eigen/src/UmfPackSupport/CMakeLists.txt | 6 - .../Eigen/src/UmfPackSupport/UmfPackSupport.h | 313 +- external/eigen3/Eigen/src/misc/CMakeLists.txt | 6 - external/eigen3/Eigen/src/misc/Image.h | 2 - external/eigen3/Eigen/src/misc/Kernel.h | 4 +- external/eigen3/Eigen/src/misc/RealSvd2x2.h | 55 + external/eigen3/Eigen/src/misc/Solve.h | 76 - external/eigen3/Eigen/src/misc/SparseSolve.h | 128 - external/eigen3/Eigen/src/misc/blas.h | 418 +- external/eigen3/Eigen/src/misc/lapack.h | 152 + external/eigen3/Eigen/src/misc/lapacke.h | 16291 ++++++++++++++++ .../eigen3/Eigen/src/misc/lapacke_mangling.h | 17 + .../Eigen/src/plugins/ArrayCwiseBinaryOps.h | 203 +- .../Eigen/src/plugins/ArrayCwiseUnaryOps.h | 467 +- .../eigen3/Eigen/src/plugins/BlockMethods.h | 1269 +- .../eigen3/Eigen/src/plugins/CMakeLists.txt | 6 - .../Eigen/src/plugins/CommonCwiseBinaryOps.h | 75 +- .../Eigen/src/plugins/CommonCwiseUnaryOps.h | 205 +- .../Eigen/src/plugins/MatrixCwiseBinaryOps.h | 29 +- .../Eigen/src/plugins/MatrixCwiseUnaryOps.h | 115 +- external/eigen3/README.md | 3 + external/eigen3/bench/BenchTimer.h | 10 +- .../eigen3/bench/analyze-blocking-sizes.cpp | 876 + external/eigen3/bench/benchCholesky.cpp | 16 +- external/eigen3/bench/bench_gemm.cpp | 117 +- external/eigen3/bench/bench_norm.cpp | 117 +- .../eigen3/bench/benchmark-blocking-sizes.cpp | 677 + external/eigen3/bench/btl/CMakeLists.txt | 31 +- .../eigen3/bench/btl/actions/action_axpby.hh | 2 +- .../eigen3/bench/btl/actions/action_axpy.hh | 2 +- .../eigen3/bench/btl/cmake/FindACML.cmake | 2 + .../eigen3/bench/btl/cmake/FindATLAS.cmake | 26 +- .../eigen3/bench/btl/cmake/FindBLAZE.cmake | 31 + .../eigen3/bench/btl/cmake/FindCBLAS.cmake | 1 + .../eigen3/bench/btl/cmake/FindGOTO.cmake | 15 - .../eigen3/bench/btl/cmake/FindGOTO2.cmake | 25 - .../eigen3/bench/btl/cmake/FindOPENBLAS.cmake | 17 + .../eigen3/bench/btl/data/action_settings.txt | 34 +- .../bench/btl/data/perlib_plot_settings.txt | 4 +- .../eigen3/bench/btl/generic_bench/bench.hh | 4 +- .../btl/generic_bench/bench_parameter.hh | 4 +- .../eigen3/bench/btl/generic_bench/btl.hh | 4 +- .../btl/generic_bench/init/init_function.hh | 8 +- .../btl/generic_bench/init/init_matrix.hh | 10 +- .../btl/generic_bench/init/init_vector.hh | 2 +- .../timers/portable_perf_analyzer.hh | 2 +- .../generic_bench/timers/portable_timer.hh | 46 +- .../btl/generic_bench/utils/size_lin_log.hh | 2 +- .../eigen3/bench/btl/libs/BLAS/CMakeLists.txt | 29 +- .../btl/libs/BLAS/blas_interface_impl.hh | 6 +- .../bench/btl/libs/BLAS/c_interface_base.h | 6 +- external/eigen3/bench/btl/libs/BLAS/main.cpp | 10 +- .../bench/btl/libs/STL/STL_interface.hh | 4 +- .../bench/btl/libs/blaze/CMakeLists.txt | 13 + .../bench/btl/libs/blaze/blaze_interface.hh | 140 + external/eigen3/bench/btl/libs/blaze/main.cpp | 40 + .../bench/btl/libs/eigen2/eigen2_interface.hh | 2 +- .../bench/btl/libs/eigen3/eigen3_interface.hh | 58 +- .../eigen3/bench/btl/libs/eigen3/main_adv.cpp | 14 +- .../bench/btl/libs/tensors/CMakeLists.txt | 44 + .../bench/btl/libs/tensors/main_linear.cpp | 23 + .../bench/btl/libs/tensors/main_matmat.cpp | 21 + .../bench/btl/libs/tensors/main_vecmat.cpp | 21 + .../btl/libs/tensors/tensor_interface.hh | 105 + external/eigen3/bench/dense_solvers.cpp | 186 + external/eigen3/bench/eig33.cpp | 57 +- .../bench/perf_monitoring/gemm/changesets.txt | 61 + .../bench/perf_monitoring/gemm/gemm.cpp | 67 + .../perf_monitoring/gemm/gemm_settings.txt | 15 + .../bench/perf_monitoring/gemm/lazy_gemm.cpp | 98 + .../gemm/lazy_gemm_settings.txt | 15 + .../bench/perf_monitoring/gemm/make_plot.sh | 38 + .../eigen3/bench/perf_monitoring/gemm/run.sh | 156 + external/eigen3/bench/spbench/CMakeLists.txt | 29 +- external/eigen3/bench/spbench/spbenchstyle.h | 3 +- external/eigen3/bench/tensors/README | 21 + external/eigen3/bench/tensors/benchmark.h | 49 + .../eigen3/bench/tensors/benchmark_main.cc | 237 + .../tensors/contraction_benchmarks_cpu.cc | 39 + .../eigen3/bench/tensors/tensor_benchmarks.h | 478 + .../bench/tensors/tensor_benchmarks_cpu.cc | 168 + .../tensors/tensor_benchmarks_fp16_gpu.cu | 77 + .../bench/tensors/tensor_benchmarks_gpu.cu | 75 + .../bench/tensors/tensor_benchmarks_sycl.cc | 37 + external/eigen3/blas/CMakeLists.txt | 27 +- .../blas/PackedTriangularMatrixVector.h | 4 +- external/eigen3/blas/chbmv.f | 310 - external/eigen3/blas/chpmv.f | 272 - external/eigen3/blas/common.h | 44 +- external/eigen3/blas/ctbmv.f | 366 - external/eigen3/blas/double.cpp | 11 +- external/eigen3/blas/drotm.f | 147 - external/eigen3/blas/drotmg.f | 206 - external/eigen3/blas/dsbmv.f | 304 - external/eigen3/blas/dspmv.f | 265 - external/eigen3/blas/dtbmv.f | 335 - external/eigen3/blas/f2c/chbmv.c | 487 + external/eigen3/blas/f2c/chpmv.c | 438 + external/eigen3/blas/f2c/complexdots.c | 84 + external/eigen3/blas/f2c/ctbmv.c | 647 + external/eigen3/blas/f2c/d_cnjg.c | 6 + external/eigen3/blas/f2c/datatypes.h | 24 + external/eigen3/blas/f2c/drotm.c | 215 + external/eigen3/blas/f2c/drotmg.c | 293 + external/eigen3/blas/f2c/dsbmv.c | 366 + external/eigen3/blas/f2c/dspmv.c | 316 + external/eigen3/blas/f2c/dtbmv.c | 428 + external/eigen3/blas/f2c/lsame.c | 117 + external/eigen3/blas/f2c/r_cnjg.c | 6 + external/eigen3/blas/f2c/srotm.c | 216 + external/eigen3/blas/f2c/srotmg.c | 295 + external/eigen3/blas/f2c/ssbmv.c | 368 + external/eigen3/blas/f2c/sspmv.c | 316 + external/eigen3/blas/f2c/stbmv.c | 428 + external/eigen3/blas/f2c/zhbmv.c | 488 + external/eigen3/blas/f2c/zhpmv.c | 438 + external/eigen3/blas/f2c/ztbmv.c | 647 + .../eigen3/blas/{ => fortran}/complexdots.f | 0 external/eigen3/blas/level1_cplx_impl.h | 54 +- external/eigen3/blas/level1_impl.h | 49 +- external/eigen3/blas/level1_real_impl.h | 22 +- external/eigen3/blas/level2_cplx_impl.h | 118 +- external/eigen3/blas/level2_impl.h | 429 +- external/eigen3/blas/level2_real_impl.h | 174 +- external/eigen3/blas/level3_impl.h | 468 +- external/eigen3/blas/lsame.f | 85 - external/eigen3/blas/single.cpp | 2 +- external/eigen3/blas/srotm.f | 148 - external/eigen3/blas/srotmg.f | 208 - external/eigen3/blas/ssbmv.f | 306 - external/eigen3/blas/sspmv.f | 265 - external/eigen3/blas/stbmv.f | 335 - external/eigen3/blas/testing/cblat1.f | 83 +- external/eigen3/blas/testing/cblat2.f | 188 +- external/eigen3/blas/testing/cblat3.f | 185 +- external/eigen3/blas/testing/dblat2.f | 186 +- external/eigen3/blas/testing/dblat3.f | 168 +- external/eigen3/blas/testing/sblat2.f | 186 +- external/eigen3/blas/testing/sblat3.f | 168 +- external/eigen3/blas/testing/zblat1.f | 83 +- external/eigen3/blas/testing/zblat2.f | 188 +- external/eigen3/blas/testing/zblat3.f | 189 +- external/eigen3/blas/xerbla.cpp | 4 +- external/eigen3/blas/zhbmv.f | 310 - external/eigen3/blas/zhpmv.f | 272 - external/eigen3/blas/ztbmv.f | 366 - external/eigen3/cmake/Eigen3Config.cmake.in | 21 + .../eigen3/cmake/Eigen3ConfigLegacy.cmake.in | 30 + .../eigen3/cmake/EigenConfigureTesting.cmake | 22 +- .../cmake/EigenDetermineVSServicePack.cmake | 18 +- external/eigen3/cmake/EigenTesting.cmake | 344 +- external/eigen3/cmake/EigenUninstall.cmake | 40 + external/eigen3/cmake/FindAdolc.cmake | 2 +- external/eigen3/cmake/FindBLAS.cmake | 1499 +- external/eigen3/cmake/FindBLASEXT.cmake | 380 + external/eigen3/cmake/FindComputeCpp.cmake | 245 + external/eigen3/cmake/FindEigen3.cmake | 30 +- external/eigen3/cmake/FindHWLOC.cmake | 331 + external/eigen3/cmake/FindMetis.cmake | 297 +- external/eigen3/cmake/FindPTSCOTCH.cmake | 423 + external/eigen3/cmake/FindPastix.cmake | 713 +- external/eigen3/cmake/FindScotch.cmake | 379 +- external/eigen3/cmake/FindSuperLU.cmake | 79 +- external/eigen3/cmake/UseEigen3.cmake | 6 + external/eigen3/cmake/language_support.cmake | 3 +- external/eigen3/debug/gdb/printers.py | 12 +- external/eigen3/debug/msvc/eigen.natvis | 470 +- .../eigen3/debug/msvc/eigen_autoexp_part.dat | 590 +- .../eigen3/demos/opengl/quaternion_demo.cpp | 2 +- external/eigen3/demos/opengl/trackball.cpp | 2 +- external/eigen3/doc/A05_PortingFrom2To3.dox | 15 +- .../eigen3/doc/A10_Eigen2SupportModes.dox | 95 - external/eigen3/doc/AsciiQuickReference.txt | 116 +- external/eigen3/doc/B01_Experimental.dox | 6 +- external/eigen3/doc/CMakeLists.txt | 12 + .../doc/CoeffwiseMathFunctionsTable.dox | 525 + external/eigen3/doc/CustomizingEigen.dox | 188 - .../doc/CustomizingEigen_CustomScalar.dox | 120 + .../doc/CustomizingEigen_InheritingMatrix.dox | 34 + .../doc/CustomizingEigen_NullaryExpr.dox | 86 + .../eigen3/doc/CustomizingEigen_Plugins.dox | 69 + .../doc/DenseDecompositionBenchmark.dox | 42 + external/eigen3/doc/Doxyfile.in | 52 +- external/eigen3/doc/FixedSizeVectorizable.dox | 4 +- external/eigen3/doc/InplaceDecomposition.dox | 115 + external/eigen3/doc/LeastSquares.dox | 70 + external/eigen3/doc/Manual.dox | 31 +- .../eigen3/doc/MatrixfreeSolverExample.dox | 9 +- external/eigen3/doc/NewExpressionType.dox | 143 + external/eigen3/doc/Overview.dox | 4 +- .../eigen3/doc/PreprocessorDirectives.dox | 80 +- external/eigen3/doc/QuickReference.dox | 138 +- external/eigen3/doc/SparseLinearSystems.dox | 89 +- external/eigen3/doc/SparseQuickReference.dox | 16 +- external/eigen3/doc/StlContainers.dox | 4 +- .../eigen3/doc/StructHavingEigenMembers.dox | 24 +- external/eigen3/doc/TemplateKeyword.dox | 17 +- external/eigen3/doc/TopicAliasing.dox | 30 +- external/eigen3/doc/TopicCMakeGuide.dox | 52 + external/eigen3/doc/TopicLazyEvaluation.dox | 4 +- .../doc/TopicLinearAlgebraDecompositions.dox | 8 +- external/eigen3/doc/TopicMultithreading.dox | 16 +- external/eigen3/doc/TutorialArrayClass.dox | 2 +- external/eigen3/doc/TutorialGeometry.dox | 9 +- external/eigen3/doc/TutorialLinearAlgebra.dox | 37 +- ...TutorialReductionsVisitorsBroadcasting.dox | 29 +- .../eigen3/doc/TutorialReshapeSlicing.dox | 65 + external/eigen3/doc/TutorialSparse.dox | 16 +- external/eigen3/doc/UnalignedArrayAssert.dox | 23 +- .../eigen3/doc/UsingBlasLapackBackends.dox | 133 + external/eigen3/doc/UsingIntelMKL.dox | 96 +- external/eigen3/doc/UsingNVCC.dox | 32 + external/eigen3/doc/eigendoxy.css | 41 +- external/eigen3/doc/examples/CMakeLists.txt | 5 + .../examples/CustomizingEigen_Inheritance.cpp | 30 + external/eigen3/doc/examples/Cwise_erf.cpp | 9 + external/eigen3/doc/examples/Cwise_erfc.cpp | 9 + external/eigen3/doc/examples/Cwise_lgamma.cpp | 9 + .../doc/examples/MatrixBase_cwise_const.cpp | 18 - .../eigen3/doc/examples/TutorialInplaceLU.cpp | 61 + .../TutorialLinAlgInverseDeterminant.cpp | 2 +- ...rsBroadcasting_reductions_operatornorm.cpp | 18 + .../eigen3/doc/examples/make_circulant.cpp | 11 + .../doc/examples/make_circulant.cpp.entry | 5 + .../doc/examples/make_circulant.cpp.evaluator | 32 + .../examples/make_circulant.cpp.expression | 20 + .../doc/examples/make_circulant.cpp.main | 8 + .../doc/examples/make_circulant.cpp.preamble | 4 + .../doc/examples/make_circulant.cpp.traits | 19 + .../eigen3/doc/examples/make_circulant2.cpp | 52 + .../eigen3/doc/examples/matrixfree_cg.cpp | 210 +- .../eigen3/doc/examples/nullary_indexing.cpp | 66 + external/eigen3/doc/ftv2node.png | Bin 0 -> 86 bytes external/eigen3/doc/ftv2pnode.png | Bin 0 -> 229 bytes .../eigen3/doc/snippets/BiCGSTAB_simple.cpp | 11 + .../doc/snippets/BiCGSTAB_step_by_step.cpp | 14 + external/eigen3/doc/snippets/CMakeLists.txt | 2 - external/eigen3/doc/snippets/Cwise_arg.cpp | 3 + .../doc/snippets/Cwise_array_power_array.cpp | 4 + external/eigen3/doc/snippets/Cwise_atan.cpp | 2 + .../eigen3/doc/snippets/Cwise_boolean_not.cpp | 5 + .../eigen3/doc/snippets/Cwise_boolean_xor.cpp | 2 + external/eigen3/doc/snippets/Cwise_ceil.cpp | 3 + external/eigen3/doc/snippets/Cwise_cosh.cpp | 2 + external/eigen3/doc/snippets/Cwise_floor.cpp | 3 + .../eigen3/doc/snippets/Cwise_isFinite.cpp | 5 + external/eigen3/doc/snippets/Cwise_isInf.cpp | 5 + external/eigen3/doc/snippets/Cwise_isNaN.cpp | 5 + external/eigen3/doc/snippets/Cwise_log10.cpp | 2 + external/eigen3/doc/snippets/Cwise_round.cpp | 3 + .../doc/snippets/Cwise_scalar_power_array.cpp | 2 + external/eigen3/doc/snippets/Cwise_sign.cpp | 2 + external/eigen3/doc/snippets/Cwise_sinh.cpp | 2 + external/eigen3/doc/snippets/Cwise_tanh.cpp | 2 + .../doc/snippets/DenseBase_LinSpacedInt.cpp | 8 + .../snippets/DirectionWise_hnormalized.cpp | 7 + .../doc/snippets/EigenSolver_eigenvectors.cpp | 4 +- .../snippets/LeastSquaresNormalEquations.cpp | 4 + .../eigen3/doc/snippets/LeastSquaresQR.cpp | 4 + .../doc/snippets/MatrixBase_cwiseSign.cpp | 4 + .../doc/snippets/MatrixBase_hnormalized.cpp | 6 + .../doc/snippets/MatrixBase_homogeneous.cpp | 6 + .../eigen3/doc/snippets/MatrixBase_marked.cpp | 14 - .../eigen3/doc/snippets/MatrixBase_part.cpp | 13 - .../snippets/MatrixBase_selfadjointView.cpp | 6 + ...ract.cpp => MatrixBase_triangularView.cpp} | 12 +- .../doc/snippets/PartialRedux_count.cpp | 4 +- .../doc/snippets/SparseMatrix_coeffs.cpp | 9 + .../doc/snippets/TopicAliasing_mult4.cpp | 5 + .../doc/snippets/TopicAliasing_mult5.cpp | 5 + .../eigen3/doc/snippets/Triangular_solve.cpp | 11 + .../Tutorial_AdvancedInitialization_Join.cpp | 2 +- .../doc/snippets/Tutorial_ReshapeMat2Mat.cpp | 6 + .../doc/snippets/Tutorial_ReshapeMat2Vec.cpp | 11 + .../doc/snippets/Tutorial_SlicingCol.cpp | 11 + .../doc/snippets/Tutorial_SlicingVec.cpp | 4 + .../doc/snippets/VectorwiseOp_homogeneous.cpp | 7 + .../doc/snippets/compile_snippet.cpp.in | 10 +- .../doc/special_examples/CMakeLists.txt | 14 + .../Tutorial_sparse_example.cpp | 2 + .../Tutorial_sparse_example_details.cpp | 2 +- .../doc/special_examples/random_cpp11.cpp | 14 + external/eigen3/failtest/CMakeLists.txt | 21 + external/eigen3/failtest/bdcsvd_int.cpp | 14 + ...seunaryview_nonconst_ctor_on_const_xpr.cpp | 15 + ...unaryview_on_const_type_actually_const.cpp | 16 + ...adjointview_nonconst_ctor_on_const_xpr.cpp | 15 + ...jointview_on_const_type_actually_const.cpp | 16 + external/eigen3/failtest/sparse_ref_1.cpp | 18 + external/eigen3/failtest/sparse_ref_2.cpp | 15 + external/eigen3/failtest/sparse_ref_3.cpp | 15 + external/eigen3/failtest/sparse_ref_4.cpp | 15 + external/eigen3/failtest/sparse_ref_5.cpp | 16 + .../failtest/sparse_storage_mismatch.cpp | 16 + external/eigen3/failtest/swap_1.cpp | 14 + external/eigen3/failtest/swap_2.cpp | 14 + external/eigen3/failtest/ternary_1.cpp | 13 + external/eigen3/failtest/ternary_2.cpp | 13 + ...angularview_nonconst_ctor_on_const_xpr.cpp | 15 + ...gularview_on_const_type_actually_const.cpp | 16 + external/eigen3/lapack/complex_double.cpp | 3 +- external/eigen3/lapack/complex_single.cpp | 3 +- external/eigen3/lapack/double.cpp | 3 +- external/eigen3/lapack/eigenvalues.cpp | 27 +- external/eigen3/lapack/lapack_common.h | 8 +- external/eigen3/lapack/single.cpp | 3 +- external/eigen3/lapack/svd.cpp | 138 + external/eigen3/scripts/buildtests.in | 6 +- external/eigen3/scripts/check.in | 2 +- external/eigen3/scripts/eigen_gen_docs | 2 +- external/eigen3/test/CMakeLists.txt | 234 +- external/eigen3/test/adjoint.cpp | 42 +- external/eigen3/test/array.cpp | 243 +- external/eigen3/test/array_for_matrix.cpp | 56 +- external/eigen3/test/array_of_string.cpp | 32 + external/eigen3/test/array_replicate.cpp | 13 + external/eigen3/test/array_reverse.cpp | 32 +- external/eigen3/test/bandmatrix.cpp | 3 - external/eigen3/test/basicstuff.cpp | 86 +- external/eigen3/test/bdcsvd.cpp | 111 + external/eigen3/test/bicgstab.cpp | 16 +- external/eigen3/test/block.cpp | 31 +- external/eigen3/test/boostmultiprec.cpp | 201 + external/eigen3/test/bug1213.cpp | 13 + external/eigen3/test/bug1213.h | 8 + external/eigen3/test/bug1213_main.cpp | 18 + external/eigen3/test/cholesky.cpp | 157 +- external/eigen3/test/cholmod_support.cpp | 15 +- external/eigen3/test/commainitializer.cpp | 4 +- external/eigen3/test/conjugate_gradient.cpp | 18 +- external/eigen3/test/constructor.cpp | 84 + external/eigen3/test/ctorleak.cpp | 69 + external/eigen3/test/cuda_basic.cu | 173 + external/eigen3/test/cuda_common.h | 101 + external/eigen3/test/cwiseop.cpp | 187 - external/eigen3/test/dense_storage.cpp | 76 + external/eigen3/test/diagonal.cpp | 24 + external/eigen3/test/diagonalmatrices.cpp | 27 + external/eigen3/test/dynalloc.cpp | 66 +- external/eigen3/test/eigen2/CMakeLists.txt | 61 - .../eigen3/test/eigen2/eigen2_adjoint.cpp | 99 - .../eigen3/test/eigen2/eigen2_alignedbox.cpp | 60 - external/eigen3/test/eigen2/eigen2_array.cpp | 142 - .../eigen3/test/eigen2/eigen2_basicstuff.cpp | 105 - .../eigen3/test/eigen2/eigen2_bug_132.cpp | 26 - .../eigen3/test/eigen2/eigen2_cholesky.cpp | 113 - .../test/eigen2/eigen2_commainitializer.cpp | 46 - .../eigen3/test/eigen2/eigen2_cwiseop.cpp | 158 - .../eigen3/test/eigen2/eigen2_determinant.cpp | 61 - .../eigen3/test/eigen2/eigen2_dynalloc.cpp | 131 - .../eigen3/test/eigen2/eigen2_eigensolver.cpp | 146 - .../test/eigen2/eigen2_first_aligned.cpp | 49 - .../eigen3/test/eigen2/eigen2_geometry.cpp | 432 - .../eigen2_geometry_with_eigen2_prefix.cpp | 435 - .../eigen3/test/eigen2/eigen2_hyperplane.cpp | 126 - .../eigen3/test/eigen2/eigen2_inverse.cpp | 62 - .../test/eigen2/eigen2_linearstructure.cpp | 83 - external/eigen3/test/eigen2/eigen2_lu.cpp | 122 - external/eigen3/test/eigen2/eigen2_map.cpp | 114 - external/eigen3/test/eigen2/eigen2_meta.cpp | 60 - .../test/eigen2/eigen2_miscmatrices.cpp | 48 - .../eigen3/test/eigen2/eigen2_mixingtypes.cpp | 77 - .../test/eigen2/eigen2_newstdvector.cpp | 149 - .../eigen3/test/eigen2/eigen2_nomalloc.cpp | 53 - .../eigen3/test/eigen2/eigen2_packetmath.cpp | 132 - .../test/eigen2/eigen2_parametrizedline.cpp | 62 - .../test/eigen2/eigen2_prec_inverse_4x4.cpp | 84 - .../test/eigen2/eigen2_product_large.cpp | 45 - .../test/eigen2/eigen2_product_small.cpp | 22 - external/eigen3/test/eigen2/eigen2_qr.cpp | 69 - .../eigen3/test/eigen2/eigen2_qtvector.cpp | 158 - .../eigen3/test/eigen2/eigen2_regression.cpp | 136 - external/eigen3/test/eigen2/eigen2_sizeof.cpp | 31 - .../test/eigen2/eigen2_smallvectors.cpp | 42 - .../test/eigen2/eigen2_sparse_basic.cpp | 317 - .../test/eigen2/eigen2_sparse_product.cpp | 115 - .../test/eigen2/eigen2_sparse_solvers.cpp | 200 - .../test/eigen2/eigen2_sparse_vector.cpp | 84 - .../eigen3/test/eigen2/eigen2_stdvector.cpp | 148 - .../eigen3/test/eigen2/eigen2_submatrices.cpp | 142 - external/eigen3/test/eigen2/eigen2_sum.cpp | 71 - external/eigen3/test/eigen2/eigen2_svd.cpp | 87 - external/eigen3/test/eigen2/eigen2_swap.cpp | 83 - .../eigen3/test/eigen2/eigen2_triangular.cpp | 148 - .../test/eigen2/eigen2_unalignedassert.cpp | 116 - .../eigen3/test/eigen2/eigen2_visitor.cpp | 116 - external/eigen3/test/eigen2/gsl_helper.h | 175 - external/eigen3/test/eigen2/main.h | 399 - external/eigen3/test/eigen2/product.h | 129 - external/eigen3/test/eigen2/runtest.sh | 28 - external/eigen3/test/eigen2/sparse.h | 154 - external/eigen3/test/eigen2/testsuite.cmake | 197 - external/eigen3/test/eigen2support.cpp | 1 - external/eigen3/test/eigensolver_complex.cpp | 71 +- .../test/eigensolver_generalized_real.cpp | 54 +- external/eigen3/test/eigensolver_generic.cpp | 49 +- .../eigen3/test/eigensolver_selfadjoint.cpp | 180 +- external/eigen3/test/evaluator_common.h | 0 external/eigen3/test/evaluators.cpp | 499 + external/eigen3/test/fastmath.cpp | 99 + external/eigen3/test/first_aligned.cpp | 6 +- external/eigen3/test/geo_alignedbox.cpp | 11 +- external/eigen3/test/geo_eulerangles.cpp | 20 +- external/eigen3/test/geo_homogeneous.cpp | 24 +- external/eigen3/test/geo_hyperplane.cpp | 39 +- external/eigen3/test/geo_orthomethods.cpp | 14 +- external/eigen3/test/geo_parametrizedline.cpp | 10 +- external/eigen3/test/geo_quaternion.cpp | 51 +- external/eigen3/test/geo_transformations.cpp | 95 +- external/eigen3/test/half_float.cpp | 264 + external/eigen3/test/incomplete_cholesky.cpp | 65 + .../eigen3/test/inplace_decomposition.cpp | 110 + external/eigen3/test/integer_types.cpp | 8 + external/eigen3/test/inverse.cpp | 15 +- external/eigen3/test/is_same_dense.cpp | 33 + external/eigen3/test/jacobisvd.cpp | 386 +- external/eigen3/test/linearstructure.cpp | 67 +- external/eigen3/test/lscg.cpp | 37 + external/eigen3/test/lu.cpp | 83 +- external/eigen3/test/main.h | 295 +- external/eigen3/test/mapped_matrix.cpp | 86 +- external/eigen3/test/mapstaticmethods.cpp | 6 +- external/eigen3/test/mapstride.cpp | 43 +- external/eigen3/test/meta.cpp | 24 + external/eigen3/test/metis_support.cpp | 22 +- external/eigen3/test/mixingtypes.cpp | 202 +- external/eigen3/test/mpl2only.cpp | 2 + external/eigen3/test/nesting_ops.cpp | 86 +- external/eigen3/test/nomalloc.cpp | 59 +- external/eigen3/test/nullary.cpp | 225 +- external/eigen3/test/numext.cpp | 53 + external/eigen3/test/packetmath.cpp | 425 +- external/eigen3/test/pastix_support.cpp | 10 + external/eigen3/test/permutationmatrices.cpp | 62 +- external/eigen3/test/product.h | 58 +- external/eigen3/test/product_extra.cpp | 146 +- external/eigen3/test/product_large.cpp | 32 +- external/eigen3/test/product_mmtr.cpp | 39 +- external/eigen3/test/product_notemporary.cpp | 60 +- external/eigen3/test/product_selfadjoint.cpp | 9 +- external/eigen3/test/product_small.cpp | 247 +- external/eigen3/test/product_symm.cpp | 18 + external/eigen3/test/product_syrk.cpp | 5 +- external/eigen3/test/product_trmm.cpp | 22 +- external/eigen3/test/product_trmv.cpp | 6 +- external/eigen3/test/product_trsolve.cpp | 18 +- external/eigen3/test/qr.cpp | 9 +- external/eigen3/test/qr_colpivoting.cpp | 206 +- external/eigen3/test/qr_fullpivoting.cpp | 34 +- external/eigen3/test/rand.cpp | 2 +- external/eigen3/test/real_qz.cpp | 16 +- external/eigen3/test/redux.cpp | 13 +- external/eigen3/test/ref.cpp | 27 +- external/eigen3/test/runtest.sh | 20 - external/eigen3/test/rvalue_types.cpp | 14 +- external/eigen3/test/schur_complex.cpp | 4 +- external/eigen3/test/schur_real.cpp | 4 +- external/eigen3/test/selfadjoint.cpp | 15 +- external/eigen3/test/simplicial_cholesky.cpp | 28 +- external/eigen3/test/sizeof.cpp | 17 +- external/eigen3/test/sizeoverflow.cpp | 2 - external/eigen3/test/sparse.h | 23 +- external/eigen3/test/sparse_basic.cpp | 656 +- external/eigen3/test/sparse_block.cpp | 317 + external/eigen3/test/sparse_permutations.cpp | 95 +- external/eigen3/test/sparse_product.cpp | 207 +- external/eigen3/test/sparse_ref.cpp | 139 + external/eigen3/test/sparse_solver.h | 386 +- external/eigen3/test/sparse_vector.cpp | 75 +- external/eigen3/test/sparselu.cpp | 28 +- external/eigen3/test/sparseqr.cpp | 12 +- external/eigen3/test/spqr_support.cpp | 8 +- external/eigen3/test/stable_norm.cpp | 105 +- external/eigen3/test/stdvector.cpp | 6 +- external/eigen3/test/stdvector_overload.cpp | 6 +- external/eigen3/test/superlu_support.cpp | 1 + external/eigen3/test/svd_common.h | 483 + external/eigen3/test/svd_fill.h | 119 + external/eigen3/test/swap.cpp | 23 +- external/eigen3/test/testsuite.cmake | 229 - external/eigen3/test/triangular.cpp | 19 +- external/eigen3/test/umfpack_support.cpp | 1 + external/eigen3/test/unalignedassert.cpp | 93 +- external/eigen3/test/unalignedcount.cpp | 9 +- .../eigen3/test/upperbidiagonalization.cpp | 2 +- external/eigen3/test/vectorization_logic.cpp | 292 +- external/eigen3/test/vectorwiseop.cpp | 75 +- external/eigen3/test/zerosized.cpp | 20 +- .../eigen3/unsupported/Eigen/AdolcForward | 2 +- .../eigen3/unsupported/Eigen/AlignedVector3 | 38 +- .../eigen3/unsupported/Eigen/CMakeLists.txt | 31 +- .../unsupported/Eigen/CXX11/CMakeLists.txt | 8 + .../eigen3/unsupported/Eigen/CXX11/Tensor | 152 + .../unsupported/Eigen/CXX11/TensorSymmetry | 42 + .../eigen3/unsupported/Eigen/CXX11/ThreadPool | 65 + .../Eigen/CXX11/src/Tensor/README.md | 1757 ++ .../Eigen/CXX11/src/Tensor/Tensor.h | 527 + .../Eigen/CXX11/src/Tensor/TensorArgMax.h | 299 + .../Eigen/CXX11/src/Tensor/TensorAssign.h | 181 + .../Eigen/CXX11/src/Tensor/TensorBase.h | 1010 + .../CXX11/src/Tensor/TensorBroadcasting.h | 392 + .../Eigen/CXX11/src/Tensor/TensorChipping.h | 384 + .../CXX11/src/Tensor/TensorConcatenation.h | 361 + .../CXX11/src/Tensor/TensorContraction.h | 628 + .../src/Tensor/TensorContractionBlocking.h | 56 + .../CXX11/src/Tensor/TensorContractionCuda.h | 1391 ++ .../src/Tensor/TensorContractionMapper.h | 467 + .../src/Tensor/TensorContractionThreadPool.h | 1052 + .../Eigen/CXX11/src/Tensor/TensorConversion.h | 279 + .../CXX11/src/Tensor/TensorConvolution.h | 1104 ++ .../Eigen/CXX11/src/Tensor/TensorCostModel.h | 212 + .../Eigen/CXX11/src/Tensor/TensorCustomOp.h | 313 + .../Eigen/CXX11/src/Tensor/TensorDevice.h | 68 + .../Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 337 + .../CXX11/src/Tensor/TensorDeviceDefault.h | 81 + .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 122 + .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 279 + .../CXX11/src/Tensor/TensorDimensionList.h | 236 + .../Eigen/CXX11/src/Tensor/TensorDimensions.h | 428 + .../Eigen/CXX11/src/Tensor/TensorEvalTo.h | 181 + .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 633 + .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 288 + .../Eigen/CXX11/src/Tensor/TensorExpr.h | 371 + .../Eigen/CXX11/src/Tensor/TensorFFT.h | 651 + .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 389 + .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 167 + .../src/Tensor/TensorForwardDeclarations.h | 109 + .../Eigen/CXX11/src/Tensor/TensorFunctors.h | 489 + .../Eigen/CXX11/src/Tensor/TensorGenerator.h | 185 + .../CXX11/src/Tensor/TensorGlobalFunctions.h | 33 + .../Eigen/CXX11/src/Tensor/TensorIO.h | 79 + .../Eigen/CXX11/src/Tensor/TensorImagePatch.h | 509 + .../Eigen/CXX11/src/Tensor/TensorIndexList.h | 725 + .../Eigen/CXX11/src/Tensor/TensorInflation.h | 229 + .../CXX11/src/Tensor/TensorInitializer.h | 82 + .../Eigen/CXX11/src/Tensor/TensorIntDiv.h | 253 + .../Eigen/CXX11/src/Tensor/TensorLayoutSwap.h | 209 + .../Eigen/CXX11/src/Tensor/TensorMacros.h | 54 + .../Eigen/CXX11/src/Tensor/TensorMap.h | 321 + .../Eigen/CXX11/src/Tensor/TensorMeta.h | 218 + .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 888 + .../Eigen/CXX11/src/Tensor/TensorPadding.h | 397 + .../Eigen/CXX11/src/Tensor/TensorPatch.h | 269 + .../Eigen/CXX11/src/Tensor/TensorRandom.h | 276 + .../Eigen/CXX11/src/Tensor/TensorReduction.h | 781 + .../CXX11/src/Tensor/TensorReductionCuda.h | 750 + .../CXX11/src/Tensor/TensorReductionSycl.h | 242 + .../Eigen/CXX11/src/Tensor/TensorRef.h | 429 + .../Eigen/CXX11/src/Tensor/TensorReverse.h | 288 + .../Eigen/CXX11/src/Tensor/TensorScan.h | 287 + .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 264 + .../Eigen/CXX11/src/Tensor/TensorStorage.h | 146 + .../Eigen/CXX11/src/Tensor/TensorStriding.h | 338 + .../Eigen/CXX11/src/Tensor/TensorSycl.h | 82 + .../TensorSyclConvertToDeviceExpression.h | 121 + .../src/Tensor/TensorSyclExprConstructor.h | 239 + .../src/Tensor/TensorSyclExtractAccessor.h | 204 + .../src/Tensor/TensorSyclExtractFunctors.h | 177 + .../CXX11/src/Tensor/TensorSyclLeafCount.h | 114 + .../src/Tensor/TensorSyclPlaceHolderExpr.h | 181 + .../Eigen/CXX11/src/Tensor/TensorSyclRun.h | 70 + .../Eigen/CXX11/src/Tensor/TensorSyclTuple.h | 234 + .../Eigen/CXX11/src/Tensor/TensorTraits.h | 272 + .../Eigen/CXX11/src/Tensor/TensorUInt128.h | 248 + .../CXX11/src/Tensor/TensorVolumePatch.h | 608 + .../src/TensorSymmetry/DynamicSymmetry.h | 293 + .../CXX11/src/TensorSymmetry/StaticSymmetry.h | 236 + .../Eigen/CXX11/src/TensorSymmetry/Symmetry.h | 338 + .../TensorSymmetry/util/TemplateGroupTheory.h | 666 + .../Eigen/CXX11/src/ThreadPool/EventCount.h | 233 + .../src/ThreadPool/NonBlockingThreadPool.h | 274 + .../Eigen/CXX11/src/ThreadPool/RunQueue.h | 210 + .../CXX11/src/ThreadPool/SimpleThreadPool.h | 154 + .../CXX11/src/ThreadPool/ThreadEnvironment.h | 38 + .../Eigen/CXX11/src/ThreadPool/ThreadLocal.h | 22 + .../src/ThreadPool/ThreadPoolInterface.h | 33 + .../Eigen/CXX11/src/ThreadPool/ThreadYield.h | 20 + .../Eigen/CXX11/src/util/CXX11Meta.h | 542 + .../Eigen/CXX11/src/util/CXX11Workarounds.h | 88 + .../Eigen/CXX11/src/util/EmulateArray.h | 267 + .../Eigen/CXX11/src/util/EmulateCXX11Meta.h | 311 + .../Eigen/CXX11/src/util/MaxSizeVector.h | 141 + external/eigen3/unsupported/Eigen/EulerAngles | 43 + .../eigen3/unsupported/Eigen/IterativeSolvers | 5 +- .../eigen3/unsupported/Eigen/KroneckerProduct | 2 + .../eigen3/unsupported/Eigen/MPRealSupport | 104 +- .../eigen3/unsupported/Eigen/MatrixFunctions | 76 +- .../eigen3/unsupported/Eigen/OpenGLSupport | 20 +- external/eigen3/unsupported/Eigen/SVD | 39 - external/eigen3/unsupported/Eigen/SparseExtra | 3 - .../eigen3/unsupported/Eigen/SpecialFunctions | 63 + .../Eigen/src/AutoDiff/AutoDiffJacobian.h | 49 +- .../Eigen/src/AutoDiff/AutoDiffScalar.h | 231 +- .../Eigen/src/AutoDiff/CMakeLists.txt | 6 - .../unsupported/Eigen/src/BVH/CMakeLists.txt | 6 - .../unsupported/Eigen/src/CMakeLists.txt | 15 - .../ArpackSelfAdjointEigenSolver.h | 14 +- .../Eigen/src/Eigenvalues/CMakeLists.txt | 6 - .../Eigen/src/EulerAngles/CMakeLists.txt | 6 + .../Eigen/src/EulerAngles/EulerAngles.h | 386 + .../Eigen/src/EulerAngles/EulerSystem.h | 326 + .../unsupported/Eigen/src/FFT/CMakeLists.txt | 6 - .../Eigen/src/IterativeSolvers/CMakeLists.txt | 6 - .../Eigen/src/IterativeSolvers/DGMRES.h | 57 +- .../Eigen/src/IterativeSolvers/GMRES.h | 406 +- .../src/IterativeSolvers/IncompleteCholesky.h | 278 - .../Eigen/src/IterativeSolvers/IncompleteLU.h | 39 +- .../Eigen/src/IterativeSolvers/MINRES.h | 84 +- .../Eigen/src/IterativeSolvers/Scaling.h | 6 +- .../Eigen/src/KroneckerProduct/CMakeLists.txt | 6 - .../KroneckerProduct/KroneckerTensorProduct.h | 169 +- .../src/LevenbergMarquardt/CMakeLists.txt | 6 - .../Eigen/src/LevenbergMarquardt/LMcovar.h | 1 - .../Eigen/src/LevenbergMarquardt/LMpar.h | 2 +- .../Eigen/src/LevenbergMarquardt/LMqrsolv.h | 9 +- .../LevenbergMarquardt/LevenbergMarquardt.h | 31 +- .../Eigen/src/MatrixFunctions/CMakeLists.txt | 6 - .../src/MatrixFunctions/MatrixExponential.h | 665 +- .../src/MatrixFunctions/MatrixFunction.h | 707 +- .../MatrixFunctions/MatrixFunctionAtomic.h | 131 - .../src/MatrixFunctions/MatrixLogarithm.h | 507 +- .../Eigen/src/MatrixFunctions/MatrixPower.h | 397 +- .../src/MatrixFunctions/MatrixSquareRoot.h | 450 +- .../Eigen/src/MatrixFunctions/StemFunction.h | 172 +- .../src/MoreVectorization/CMakeLists.txt | 6 - .../src/NonLinearOptimization/CMakeLists.txt | 6 - .../HybridNonLinearSolver.h | 4 +- .../LevenbergMarquardt.h | 25 +- .../Eigen/src/NumericalDiff/CMakeLists.txt | 6 - .../Eigen/src/Polynomials/CMakeLists.txt | 6 - .../Eigen/src/Polynomials/PolynomialSolver.h | 33 +- .../Eigen/src/Polynomials/PolynomialUtils.h | 2 +- .../eigen3/unsupported/Eigen/src/SVD/BDCSVD.h | 748 - .../unsupported/Eigen/src/SVD/CMakeLists.txt | 6 - .../unsupported/Eigen/src/SVD/JacobiSVD.h | 782 - .../unsupported/Eigen/src/SVD/TODOBdcsvd.txt | 29 - .../Eigen/src/SVD/doneInBDCSVD.txt | 21 - .../Eigen/src/Skyline/CMakeLists.txt | 6 - .../Eigen/src/Skyline/SkylineProduct.h | 6 +- .../Eigen/src/SparseExtra/BlockSparseMatrix.h | 1079 + .../Eigen/src/SparseExtra/CMakeLists.txt | 6 - .../src/SparseExtra/DynamicSparseMatrix.h | 71 +- .../Eigen/src/SparseExtra/MarketIO.h | 21 +- .../src/SparseExtra/MatrixMarketIterator.h | 43 +- .../Eigen/src/SparseExtra/RandomSetter.h | 10 +- .../SpecialFunctionsArrayAPI.h | 124 + .../SpecialFunctionsFunctors.h | 236 + .../SpecialFunctions/SpecialFunctionsHalf.h | 47 + .../SpecialFunctions/SpecialFunctionsImpl.h | 1565 ++ .../SpecialFunctionsPacketMath.h | 58 + .../arch/CUDA/CudaSpecialFunctions.h | 165 + .../Eigen/src/Splines/CMakeLists.txt | 6 - .../unsupported/Eigen/src/Splines/Spline.h | 74 +- .../Eigen/src/Splines/SplineFitting.h | 274 + .../unsupported/Eigen/src/Splines/SplineFwd.h | 3 + external/eigen3/unsupported/doc/Overview.dox | 11 +- .../unsupported/doc/examples/BVH_Example.cpp | 4 +- .../unsupported/doc/examples/EulerAngles.cpp | 46 + .../eigen3/unsupported/test/CMakeLists.txt | 177 +- .../eigen3/unsupported/test/EulerAngles.cpp | 208 + external/eigen3/unsupported/test/FFTW.cpp | 32 +- .../test/NonLinearOptimization.cpp | 46 +- .../unsupported/test/alignedvector3.cpp | 29 +- external/eigen3/unsupported/test/autodiff.cpp | 195 +- .../unsupported/test/autodiff_scalar.cpp | 98 + external/eigen3/unsupported/test/bdcsvd.cpp | 213 - .../unsupported/test/cxx11_eventcount.cpp | 142 + .../eigen3/unsupported/test/cxx11_meta.cpp | 357 + .../test/cxx11_non_blocking_thread_pool.cpp | 107 + .../unsupported/test/cxx11_runqueue.cpp | 235 + .../unsupported/test/cxx11_tensor_argmax.cpp | 294 + .../test/cxx11_tensor_argmax_cuda.cu | 254 + .../unsupported/test/cxx11_tensor_assign.cpp | 370 + .../test/cxx11_tensor_broadcast_sycl.cpp | 74 + .../test/cxx11_tensor_broadcasting.cpp | 194 + .../test/cxx11_tensor_cast_float16_cuda.cu | 82 + .../unsupported/test/cxx11_tensor_casts.cpp | 115 + .../test/cxx11_tensor_chipping.cpp | 425 + .../test/cxx11_tensor_comparisons.cpp | 84 + .../test/cxx11_tensor_complex_cuda.cu | 153 + .../cxx11_tensor_complex_cwise_ops_cuda.cu | 97 + .../test/cxx11_tensor_concatenation.cpp | 137 + .../unsupported/test/cxx11_tensor_const.cpp | 62 + .../test/cxx11_tensor_contract_cuda.cu | 216 + .../test/cxx11_tensor_contraction.cpp | 545 + .../test/cxx11_tensor_convolution.cpp | 149 + .../unsupported/test/cxx11_tensor_cuda.cu | 1287 ++ .../test/cxx11_tensor_custom_index.cpp | 100 + .../test/cxx11_tensor_custom_op.cpp | 111 + .../unsupported/test/cxx11_tensor_device.cu | 390 + .../test/cxx11_tensor_device_sycl.cpp | 31 + .../test/cxx11_tensor_dimension.cpp | 69 + .../unsupported/test/cxx11_tensor_empty.cpp | 40 + .../unsupported/test/cxx11_tensor_expr.cpp | 314 + .../unsupported/test/cxx11_tensor_fft.cpp | 273 + .../test/cxx11_tensor_fixed_size.cpp | 261 + .../test/cxx11_tensor_forced_eval.cpp | 79 + .../test/cxx11_tensor_forced_eval_sycl.cpp | 70 + .../test/cxx11_tensor_generator.cpp | 91 + .../unsupported/test/cxx11_tensor_ifft.cpp | 154 + .../test/cxx11_tensor_image_patch.cpp | 757 + .../test/cxx11_tensor_index_list.cpp | 386 + .../test/cxx11_tensor_inflation.cpp | 81 + .../unsupported/test/cxx11_tensor_intdiv.cpp | 147 + .../unsupported/test/cxx11_tensor_io.cpp | 136 + .../test/cxx11_tensor_layout_swap.cpp | 61 + .../unsupported/test/cxx11_tensor_lvalue.cpp | 42 + .../unsupported/test/cxx11_tensor_map.cpp | 277 + .../unsupported/test/cxx11_tensor_math.cpp | 46 + .../test/cxx11_tensor_mixed_indices.cpp | 53 + .../test/cxx11_tensor_morphing.cpp | 485 + .../test/cxx11_tensor_notification.cpp | 81 + .../test/cxx11_tensor_of_complex.cpp | 103 + .../test/cxx11_tensor_of_const_values.cpp | 105 + .../test/cxx11_tensor_of_float16_cuda.cu | 494 + .../test/cxx11_tensor_of_strings.cpp | 152 + .../unsupported/test/cxx11_tensor_padding.cpp | 93 + .../unsupported/test/cxx11_tensor_patch.cpp | 172 + .../unsupported/test/cxx11_tensor_random.cpp | 78 + .../test/cxx11_tensor_random_cuda.cu | 88 + .../test/cxx11_tensor_reduction.cpp | 508 + .../test/cxx11_tensor_reduction_cuda.cu | 157 + .../test/cxx11_tensor_reduction_sycl.cpp | 138 + .../unsupported/test/cxx11_tensor_ref.cpp | 248 + .../unsupported/test/cxx11_tensor_reverse.cpp | 190 + .../test/cxx11_tensor_roundings.cpp | 62 + .../unsupported/test/cxx11_tensor_scan.cpp | 110 + .../test/cxx11_tensor_scan_cuda.cu | 79 + .../test/cxx11_tensor_shuffling.cpp | 228 + .../unsupported/test/cxx11_tensor_simple.cpp | 327 + .../test/cxx11_tensor_striding.cpp | 119 + .../unsupported/test/cxx11_tensor_sugar.cpp | 81 + .../unsupported/test/cxx11_tensor_sycl.cpp | 159 + .../test/cxx11_tensor_symmetry.cpp | 818 + .../test/cxx11_tensor_thread_pool.cpp | 373 + .../unsupported/test/cxx11_tensor_uint128.cpp | 160 + .../test/cxx11_tensor_volume_patch.cpp | 112 + .../eigen3/unsupported/test/forward_adolc.cpp | 4 +- .../eigen3/unsupported/test/jacobisvd.cpp | 198 - .../unsupported/test/kronecker_product.cpp | 89 +- .../unsupported/test/levenberg_marquardt.cpp | 93 +- .../unsupported/test/matrix_function.cpp | 6 +- .../unsupported/test/matrix_functions.h | 42 +- .../eigen3/unsupported/test/matrix_power.cpp | 171 +- external/eigen3/unsupported/test/minres.cpp | 19 +- .../eigen3/unsupported/test/mpreal/mpreal.h | 6178 +++--- .../unsupported/test/mpreal_support.cpp | 10 +- .../unsupported/test/polynomialsolver.cpp | 7 +- .../eigen3/unsupported/test/sparse_extra.cpp | 1 - .../unsupported/test/special_functions.cpp | 345 + external/eigen3/unsupported/test/splines.cpp | 73 +- external/eigen3/unsupported/test/svd_common.h | 261 - 1108 files changed, 144672 insertions(+), 48533 deletions(-) delete mode 100644 external/eigen3/Eigen/Array delete mode 100644 external/eigen3/Eigen/Eigen2Support delete mode 100644 external/eigen3/Eigen/LeastSquares mode change 100644 => 100755 external/eigen3/Eigen/PardisoSupport delete mode 100644 external/eigen3/Eigen/src/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/Cholesky/CMakeLists.txt rename external/eigen3/Eigen/src/Cholesky/{LLT_MKL.h => LLT_LAPACKE.h} (71%) delete mode 100644 external/eigen3/Eigen/src/CholmodSupport/CMakeLists.txt create mode 100644 external/eigen3/Eigen/src/Core/AssignEvaluator.h mode change 100644 => 100755 external/eigen3/Eigen/src/Core/Assign_MKL.h delete mode 100644 external/eigen3/Eigen/src/Core/CMakeLists.txt create mode 100644 external/eigen3/Eigen/src/Core/ConditionEstimator.h create mode 100644 external/eigen3/Eigen/src/Core/CoreEvaluators.h create mode 100644 external/eigen3/Eigen/src/Core/CwiseTernaryOp.h delete mode 100644 external/eigen3/Eigen/src/Core/Flagged.h delete mode 100644 external/eigen3/Eigen/src/Core/Functors.h create mode 100644 external/eigen3/Eigen/src/Core/Inverse.h create mode 100644 external/eigen3/Eigen/src/Core/MathFunctionsImpl.h create mode 100644 external/eigen3/Eigen/src/Core/Product.h delete mode 100644 external/eigen3/Eigen/src/Core/ProductBase.h create mode 100644 external/eigen3/Eigen/src/Core/ProductEvaluators.h create mode 100644 external/eigen3/Eigen/src/Core/Solve.h create mode 100644 external/eigen3/Eigen/src/Core/SolverBase.h create mode 100644 external/eigen3/Eigen/src/Core/arch/AVX/Complex.h create mode 100644 external/eigen3/Eigen/src/Core/arch/AVX/MathFunctions.h create mode 100644 external/eigen3/Eigen/src/Core/arch/AVX/PacketMath.h create mode 100644 external/eigen3/Eigen/src/Core/arch/AVX/TypeCasting.h create mode 100644 external/eigen3/Eigen/src/Core/arch/AVX512/MathFunctions.h create mode 100644 external/eigen3/Eigen/src/Core/arch/AVX512/PacketMath.h delete mode 100644 external/eigen3/Eigen/src/Core/arch/AltiVec/CMakeLists.txt create mode 100644 external/eigen3/Eigen/src/Core/arch/AltiVec/MathFunctions.h mode change 100644 => 100755 external/eigen3/Eigen/src/Core/arch/AltiVec/PacketMath.h delete mode 100644 external/eigen3/Eigen/src/Core/arch/CMakeLists.txt create mode 100644 external/eigen3/Eigen/src/Core/arch/CUDA/Complex.h create mode 100644 external/eigen3/Eigen/src/Core/arch/CUDA/Half.h create mode 100644 external/eigen3/Eigen/src/Core/arch/CUDA/MathFunctions.h create mode 100644 external/eigen3/Eigen/src/Core/arch/CUDA/PacketMath.h create mode 100644 external/eigen3/Eigen/src/Core/arch/CUDA/PacketMathHalf.h create mode 100644 external/eigen3/Eigen/src/Core/arch/CUDA/TypeCasting.h delete mode 100644 external/eigen3/Eigen/src/Core/arch/Default/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/Core/arch/NEON/CMakeLists.txt create mode 100644 external/eigen3/Eigen/src/Core/arch/NEON/MathFunctions.h delete mode 100644 external/eigen3/Eigen/src/Core/arch/SSE/CMakeLists.txt mode change 100644 => 100755 external/eigen3/Eigen/src/Core/arch/SSE/PacketMath.h create mode 100644 external/eigen3/Eigen/src/Core/arch/SSE/TypeCasting.h create mode 100644 external/eigen3/Eigen/src/Core/arch/ZVector/Complex.h create mode 100644 external/eigen3/Eigen/src/Core/arch/ZVector/MathFunctions.h create mode 100755 external/eigen3/Eigen/src/Core/arch/ZVector/PacketMath.h create mode 100644 external/eigen3/Eigen/src/Core/functors/AssignmentFunctors.h create mode 100644 external/eigen3/Eigen/src/Core/functors/BinaryFunctors.h create mode 100644 external/eigen3/Eigen/src/Core/functors/NullaryFunctors.h create mode 100644 external/eigen3/Eigen/src/Core/functors/StlFunctors.h create mode 100644 external/eigen3/Eigen/src/Core/functors/TernaryFunctors.h create mode 100644 external/eigen3/Eigen/src/Core/functors/UnaryFunctors.h delete mode 100644 external/eigen3/Eigen/src/Core/products/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/Core/products/CoeffBasedProduct.h rename external/eigen3/Eigen/src/Core/products/{GeneralMatrixMatrixTriangular_MKL.h => GeneralMatrixMatrixTriangular_BLAS.h} (68%) rename external/eigen3/Eigen/src/Core/products/{GeneralMatrixMatrix_MKL.h => GeneralMatrixMatrix_BLAS.h} (76%) rename external/eigen3/Eigen/src/Core/products/{GeneralMatrixVector_MKL.h => GeneralMatrixVector_BLAS.h} (60%) rename external/eigen3/Eigen/src/Core/products/{SelfadjointMatrixMatrix_MKL.h => SelfadjointMatrixMatrix_BLAS.h} (68%) rename external/eigen3/Eigen/src/Core/products/{SelfadjointMatrixVector_MKL.h => SelfadjointMatrixVector_BLAS.h} (72%) rename external/eigen3/Eigen/src/Core/products/{TriangularMatrixMatrix_MKL.h => TriangularMatrixMatrix_BLAS.h} (76%) rename external/eigen3/Eigen/src/Core/products/{TriangularMatrixVector_MKL.h => TriangularMatrixVector_BLAS.h} (74%) rename external/eigen3/Eigen/src/Core/products/{TriangularSolverMatrix_MKL.h => TriangularSolverMatrix_BLAS.h} (75%) mode change 100644 => 100755 external/eigen3/Eigen/src/Core/util/BlasUtil.h delete mode 100644 external/eigen3/Eigen/src/Core/util/CMakeLists.txt mode change 100644 => 100755 external/eigen3/Eigen/src/Core/util/DisableStupidWarnings.h mode change 100644 => 100755 external/eigen3/Eigen/src/Core/util/MKL_support.h mode change 100644 => 100755 external/eigen3/Eigen/src/Core/util/Meta.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Block.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Cwise.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/CwiseOperators.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/AlignedBox.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/All.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/AngleAxis.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/Hyperplane.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/ParametrizedLine.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/Quaternion.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/Rotation2D.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/RotationBase.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/Scaling.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/Transform.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Geometry/Translation.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/LU.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Lazy.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/LeastSquares.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Macros.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/MathFunctions.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Memory.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Meta.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/Minor.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/QR.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/SVD.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/TriangularSolver.h delete mode 100644 external/eigen3/Eigen/src/Eigen2Support/VectorBlock.h delete mode 100644 external/eigen3/Eigen/src/Eigenvalues/CMakeLists.txt rename external/eigen3/Eigen/src/Eigenvalues/{ComplexSchur_MKL.h => ComplexSchur_LAPACKE.h} (65%) rename external/eigen3/Eigen/src/Eigenvalues/{RealSchur_MKL.h => RealSchur_LAPACKE.h} (64%) rename external/eigen3/Eigen/src/Eigenvalues/{SelfAdjointEigenSolver_MKL.h => SelfAdjointEigenSolver_LAPACKE.h} (62%) delete mode 100644 external/eigen3/Eigen/src/Geometry/CMakeLists.txt mode change 100644 => 100755 external/eigen3/Eigen/src/Geometry/Scaling.h delete mode 100644 external/eigen3/Eigen/src/Geometry/arch/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/Householder/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/IterativeLinearSolvers/CMakeLists.txt create mode 100644 external/eigen3/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h create mode 100644 external/eigen3/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h create mode 100644 external/eigen3/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h delete mode 100644 external/eigen3/Eigen/src/Jacobi/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/LU/CMakeLists.txt rename external/eigen3/Eigen/src/LU/{Inverse.h => InverseImpl.h} (86%) rename external/eigen3/Eigen/src/LU/{PartialPivLU_MKL.h => PartialPivLU_LAPACKE.h} (77%) delete mode 100644 external/eigen3/Eigen/src/LU/arch/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/MetisSupport/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/OrderingMethods/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/PaStiXSupport/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/PardisoSupport/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/QR/CMakeLists.txt rename external/eigen3/Eigen/src/QR/{ColPivHouseholderQR_MKL.h => ColPivHouseholderQR_LAPACKE.h} (64%) create mode 100644 external/eigen3/Eigen/src/QR/CompleteOrthogonalDecomposition.h rename external/eigen3/Eigen/src/QR/{HouseholderQR_MKL.h => HouseholderQR_LAPACKE.h} (77%) delete mode 100644 external/eigen3/Eigen/src/SPQRSupport/CMakeLists.txt create mode 100644 external/eigen3/Eigen/src/SVD/BDCSVD.h delete mode 100644 external/eigen3/Eigen/src/SVD/CMakeLists.txt rename external/eigen3/Eigen/src/SVD/{JacobiSVD_MKL.h => JacobiSVD_LAPACKE.h} (62%) rename external/eigen3/{unsupported => }/Eigen/src/SVD/SVDBase.h (52%) delete mode 100644 external/eigen3/Eigen/src/SparseCholesky/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/SparseCore/CMakeLists.txt create mode 100644 external/eigen3/Eigen/src/SparseCore/SparseAssign.h create mode 100644 external/eigen3/Eigen/src/SparseCore/SparseCompressedBase.h create mode 100644 external/eigen3/Eigen/src/SparseCore/SparseMap.h create mode 100644 external/eigen3/Eigen/src/SparseCore/SparseRef.h create mode 100644 external/eigen3/Eigen/src/SparseCore/SparseSolverBase.h delete mode 100644 external/eigen3/Eigen/src/SparseLU/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/SparseQR/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/StlSupport/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/SuperLUSupport/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/UmfPackSupport/CMakeLists.txt delete mode 100644 external/eigen3/Eigen/src/misc/CMakeLists.txt create mode 100644 external/eigen3/Eigen/src/misc/RealSvd2x2.h delete mode 100644 external/eigen3/Eigen/src/misc/Solve.h delete mode 100644 external/eigen3/Eigen/src/misc/SparseSolve.h create mode 100644 external/eigen3/Eigen/src/misc/lapack.h create mode 100755 external/eigen3/Eigen/src/misc/lapacke.h create mode 100644 external/eigen3/Eigen/src/misc/lapacke_mangling.h delete mode 100644 external/eigen3/Eigen/src/plugins/CMakeLists.txt create mode 100644 external/eigen3/README.md create mode 100644 external/eigen3/bench/analyze-blocking-sizes.cpp create mode 100644 external/eigen3/bench/benchmark-blocking-sizes.cpp create mode 100644 external/eigen3/bench/btl/cmake/FindBLAZE.cmake delete mode 100644 external/eigen3/bench/btl/cmake/FindGOTO.cmake delete mode 100644 external/eigen3/bench/btl/cmake/FindGOTO2.cmake create mode 100644 external/eigen3/bench/btl/cmake/FindOPENBLAS.cmake create mode 100644 external/eigen3/bench/btl/libs/blaze/CMakeLists.txt create mode 100644 external/eigen3/bench/btl/libs/blaze/blaze_interface.hh create mode 100644 external/eigen3/bench/btl/libs/blaze/main.cpp create mode 100644 external/eigen3/bench/btl/libs/tensors/CMakeLists.txt create mode 100644 external/eigen3/bench/btl/libs/tensors/main_linear.cpp create mode 100644 external/eigen3/bench/btl/libs/tensors/main_matmat.cpp create mode 100644 external/eigen3/bench/btl/libs/tensors/main_vecmat.cpp create mode 100644 external/eigen3/bench/btl/libs/tensors/tensor_interface.hh create mode 100644 external/eigen3/bench/dense_solvers.cpp create mode 100644 external/eigen3/bench/perf_monitoring/gemm/changesets.txt create mode 100644 external/eigen3/bench/perf_monitoring/gemm/gemm.cpp create mode 100644 external/eigen3/bench/perf_monitoring/gemm/gemm_settings.txt create mode 100644 external/eigen3/bench/perf_monitoring/gemm/lazy_gemm.cpp create mode 100644 external/eigen3/bench/perf_monitoring/gemm/lazy_gemm_settings.txt create mode 100755 external/eigen3/bench/perf_monitoring/gemm/make_plot.sh create mode 100755 external/eigen3/bench/perf_monitoring/gemm/run.sh create mode 100644 external/eigen3/bench/tensors/README create mode 100644 external/eigen3/bench/tensors/benchmark.h create mode 100644 external/eigen3/bench/tensors/benchmark_main.cc create mode 100644 external/eigen3/bench/tensors/contraction_benchmarks_cpu.cc create mode 100644 external/eigen3/bench/tensors/tensor_benchmarks.h create mode 100644 external/eigen3/bench/tensors/tensor_benchmarks_cpu.cc create mode 100644 external/eigen3/bench/tensors/tensor_benchmarks_fp16_gpu.cu create mode 100644 external/eigen3/bench/tensors/tensor_benchmarks_gpu.cu create mode 100644 external/eigen3/bench/tensors/tensor_benchmarks_sycl.cc delete mode 100644 external/eigen3/blas/chbmv.f delete mode 100644 external/eigen3/blas/chpmv.f delete mode 100644 external/eigen3/blas/ctbmv.f delete mode 100644 external/eigen3/blas/drotm.f delete mode 100644 external/eigen3/blas/drotmg.f delete mode 100644 external/eigen3/blas/dsbmv.f delete mode 100644 external/eigen3/blas/dspmv.f delete mode 100644 external/eigen3/blas/dtbmv.f create mode 100644 external/eigen3/blas/f2c/chbmv.c create mode 100644 external/eigen3/blas/f2c/chpmv.c create mode 100644 external/eigen3/blas/f2c/complexdots.c create mode 100644 external/eigen3/blas/f2c/ctbmv.c create mode 100644 external/eigen3/blas/f2c/d_cnjg.c create mode 100644 external/eigen3/blas/f2c/datatypes.h create mode 100644 external/eigen3/blas/f2c/drotm.c create mode 100644 external/eigen3/blas/f2c/drotmg.c create mode 100644 external/eigen3/blas/f2c/dsbmv.c create mode 100644 external/eigen3/blas/f2c/dspmv.c create mode 100644 external/eigen3/blas/f2c/dtbmv.c create mode 100644 external/eigen3/blas/f2c/lsame.c create mode 100644 external/eigen3/blas/f2c/r_cnjg.c create mode 100644 external/eigen3/blas/f2c/srotm.c create mode 100644 external/eigen3/blas/f2c/srotmg.c create mode 100644 external/eigen3/blas/f2c/ssbmv.c create mode 100644 external/eigen3/blas/f2c/sspmv.c create mode 100644 external/eigen3/blas/f2c/stbmv.c create mode 100644 external/eigen3/blas/f2c/zhbmv.c create mode 100644 external/eigen3/blas/f2c/zhpmv.c create mode 100644 external/eigen3/blas/f2c/ztbmv.c rename external/eigen3/blas/{ => fortran}/complexdots.f (100%) delete mode 100644 external/eigen3/blas/lsame.f delete mode 100644 external/eigen3/blas/srotm.f delete mode 100644 external/eigen3/blas/srotmg.f delete mode 100644 external/eigen3/blas/ssbmv.f delete mode 100644 external/eigen3/blas/sspmv.f delete mode 100644 external/eigen3/blas/stbmv.f delete mode 100644 external/eigen3/blas/zhbmv.f delete mode 100644 external/eigen3/blas/zhpmv.f delete mode 100644 external/eigen3/blas/ztbmv.f create mode 100644 external/eigen3/cmake/Eigen3Config.cmake.in create mode 100644 external/eigen3/cmake/Eigen3ConfigLegacy.cmake.in create mode 100644 external/eigen3/cmake/EigenUninstall.cmake create mode 100644 external/eigen3/cmake/FindBLASEXT.cmake create mode 100644 external/eigen3/cmake/FindComputeCpp.cmake create mode 100644 external/eigen3/cmake/FindHWLOC.cmake create mode 100644 external/eigen3/cmake/FindPTSCOTCH.cmake create mode 100644 external/eigen3/cmake/UseEigen3.cmake delete mode 100644 external/eigen3/doc/A10_Eigen2SupportModes.dox create mode 100644 external/eigen3/doc/CoeffwiseMathFunctionsTable.dox delete mode 100644 external/eigen3/doc/CustomizingEigen.dox create mode 100644 external/eigen3/doc/CustomizingEigen_CustomScalar.dox create mode 100644 external/eigen3/doc/CustomizingEigen_InheritingMatrix.dox create mode 100644 external/eigen3/doc/CustomizingEigen_NullaryExpr.dox create mode 100644 external/eigen3/doc/CustomizingEigen_Plugins.dox create mode 100644 external/eigen3/doc/DenseDecompositionBenchmark.dox create mode 100644 external/eigen3/doc/InplaceDecomposition.dox create mode 100644 external/eigen3/doc/LeastSquares.dox create mode 100644 external/eigen3/doc/NewExpressionType.dox create mode 100644 external/eigen3/doc/TopicCMakeGuide.dox create mode 100644 external/eigen3/doc/TutorialReshapeSlicing.dox create mode 100644 external/eigen3/doc/UsingBlasLapackBackends.dox create mode 100644 external/eigen3/doc/UsingNVCC.dox create mode 100644 external/eigen3/doc/examples/CustomizingEigen_Inheritance.cpp create mode 100644 external/eigen3/doc/examples/Cwise_erf.cpp create mode 100644 external/eigen3/doc/examples/Cwise_erfc.cpp create mode 100644 external/eigen3/doc/examples/Cwise_lgamma.cpp delete mode 100644 external/eigen3/doc/examples/MatrixBase_cwise_const.cpp create mode 100644 external/eigen3/doc/examples/TutorialInplaceLU.cpp create mode 100644 external/eigen3/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp create mode 100644 external/eigen3/doc/examples/make_circulant.cpp create mode 100644 external/eigen3/doc/examples/make_circulant.cpp.entry create mode 100644 external/eigen3/doc/examples/make_circulant.cpp.evaluator create mode 100644 external/eigen3/doc/examples/make_circulant.cpp.expression create mode 100644 external/eigen3/doc/examples/make_circulant.cpp.main create mode 100644 external/eigen3/doc/examples/make_circulant.cpp.preamble create mode 100644 external/eigen3/doc/examples/make_circulant.cpp.traits create mode 100644 external/eigen3/doc/examples/make_circulant2.cpp create mode 100644 external/eigen3/doc/examples/nullary_indexing.cpp create mode 100644 external/eigen3/doc/ftv2node.png create mode 100644 external/eigen3/doc/ftv2pnode.png create mode 100644 external/eigen3/doc/snippets/BiCGSTAB_simple.cpp create mode 100644 external/eigen3/doc/snippets/BiCGSTAB_step_by_step.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_arg.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_array_power_array.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_atan.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_boolean_not.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_boolean_xor.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_ceil.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_cosh.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_floor.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_isFinite.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_isInf.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_isNaN.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_log10.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_round.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_scalar_power_array.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_sign.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_sinh.cpp create mode 100644 external/eigen3/doc/snippets/Cwise_tanh.cpp create mode 100644 external/eigen3/doc/snippets/DenseBase_LinSpacedInt.cpp create mode 100644 external/eigen3/doc/snippets/DirectionWise_hnormalized.cpp create mode 100644 external/eigen3/doc/snippets/LeastSquaresNormalEquations.cpp create mode 100644 external/eigen3/doc/snippets/LeastSquaresQR.cpp create mode 100644 external/eigen3/doc/snippets/MatrixBase_cwiseSign.cpp create mode 100644 external/eigen3/doc/snippets/MatrixBase_hnormalized.cpp create mode 100644 external/eigen3/doc/snippets/MatrixBase_homogeneous.cpp delete mode 100644 external/eigen3/doc/snippets/MatrixBase_marked.cpp delete mode 100644 external/eigen3/doc/snippets/MatrixBase_part.cpp create mode 100644 external/eigen3/doc/snippets/MatrixBase_selfadjointView.cpp rename external/eigen3/doc/snippets/{MatrixBase_extract.cpp => MatrixBase_triangularView.cpp} (55%) create mode 100644 external/eigen3/doc/snippets/SparseMatrix_coeffs.cpp create mode 100644 external/eigen3/doc/snippets/TopicAliasing_mult4.cpp create mode 100644 external/eigen3/doc/snippets/TopicAliasing_mult5.cpp create mode 100644 external/eigen3/doc/snippets/Triangular_solve.cpp create mode 100644 external/eigen3/doc/snippets/Tutorial_ReshapeMat2Mat.cpp create mode 100644 external/eigen3/doc/snippets/Tutorial_ReshapeMat2Vec.cpp create mode 100644 external/eigen3/doc/snippets/Tutorial_SlicingCol.cpp create mode 100644 external/eigen3/doc/snippets/Tutorial_SlicingVec.cpp create mode 100644 external/eigen3/doc/snippets/VectorwiseOp_homogeneous.cpp create mode 100644 external/eigen3/doc/special_examples/random_cpp11.cpp create mode 100644 external/eigen3/failtest/bdcsvd_int.cpp create mode 100644 external/eigen3/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp create mode 100644 external/eigen3/failtest/cwiseunaryview_on_const_type_actually_const.cpp create mode 100644 external/eigen3/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp create mode 100644 external/eigen3/failtest/selfadjointview_on_const_type_actually_const.cpp create mode 100644 external/eigen3/failtest/sparse_ref_1.cpp create mode 100644 external/eigen3/failtest/sparse_ref_2.cpp create mode 100644 external/eigen3/failtest/sparse_ref_3.cpp create mode 100644 external/eigen3/failtest/sparse_ref_4.cpp create mode 100644 external/eigen3/failtest/sparse_ref_5.cpp create mode 100644 external/eigen3/failtest/sparse_storage_mismatch.cpp create mode 100644 external/eigen3/failtest/swap_1.cpp create mode 100644 external/eigen3/failtest/swap_2.cpp create mode 100644 external/eigen3/failtest/ternary_1.cpp create mode 100644 external/eigen3/failtest/ternary_2.cpp create mode 100644 external/eigen3/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp create mode 100644 external/eigen3/failtest/triangularview_on_const_type_actually_const.cpp create mode 100644 external/eigen3/lapack/svd.cpp create mode 100644 external/eigen3/test/array_of_string.cpp create mode 100644 external/eigen3/test/bdcsvd.cpp create mode 100644 external/eigen3/test/boostmultiprec.cpp create mode 100644 external/eigen3/test/bug1213.cpp create mode 100644 external/eigen3/test/bug1213.h create mode 100644 external/eigen3/test/bug1213_main.cpp create mode 100644 external/eigen3/test/constructor.cpp create mode 100644 external/eigen3/test/ctorleak.cpp create mode 100644 external/eigen3/test/cuda_basic.cu create mode 100644 external/eigen3/test/cuda_common.h delete mode 100644 external/eigen3/test/cwiseop.cpp create mode 100644 external/eigen3/test/dense_storage.cpp delete mode 100644 external/eigen3/test/eigen2/CMakeLists.txt delete mode 100644 external/eigen3/test/eigen2/eigen2_adjoint.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_alignedbox.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_array.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_basicstuff.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_bug_132.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_cholesky.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_commainitializer.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_cwiseop.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_determinant.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_dynalloc.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_eigensolver.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_first_aligned.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_geometry.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_geometry_with_eigen2_prefix.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_hyperplane.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_inverse.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_linearstructure.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_lu.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_map.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_meta.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_miscmatrices.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_mixingtypes.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_newstdvector.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_nomalloc.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_packetmath.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_parametrizedline.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_prec_inverse_4x4.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_product_large.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_product_small.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_qr.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_qtvector.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_regression.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_sizeof.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_smallvectors.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_sparse_basic.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_sparse_product.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_sparse_solvers.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_sparse_vector.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_stdvector.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_submatrices.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_sum.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_svd.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_swap.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_triangular.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_unalignedassert.cpp delete mode 100644 external/eigen3/test/eigen2/eigen2_visitor.cpp delete mode 100644 external/eigen3/test/eigen2/gsl_helper.h delete mode 100644 external/eigen3/test/eigen2/main.h delete mode 100644 external/eigen3/test/eigen2/product.h delete mode 100755 external/eigen3/test/eigen2/runtest.sh delete mode 100644 external/eigen3/test/eigen2/sparse.h delete mode 100644 external/eigen3/test/eigen2/testsuite.cmake create mode 100644 external/eigen3/test/evaluator_common.h create mode 100644 external/eigen3/test/evaluators.cpp create mode 100644 external/eigen3/test/fastmath.cpp mode change 100644 => 100755 external/eigen3/test/geo_transformations.cpp create mode 100644 external/eigen3/test/half_float.cpp create mode 100644 external/eigen3/test/incomplete_cholesky.cpp create mode 100644 external/eigen3/test/inplace_decomposition.cpp create mode 100644 external/eigen3/test/is_same_dense.cpp create mode 100644 external/eigen3/test/lscg.cpp create mode 100644 external/eigen3/test/numext.cpp delete mode 100755 external/eigen3/test/runtest.sh create mode 100644 external/eigen3/test/sparse_block.cpp create mode 100644 external/eigen3/test/sparse_ref.cpp create mode 100644 external/eigen3/test/svd_common.h create mode 100644 external/eigen3/test/svd_fill.h delete mode 100644 external/eigen3/test/testsuite.cmake create mode 100644 external/eigen3/unsupported/Eigen/CXX11/CMakeLists.txt create mode 100644 external/eigen3/unsupported/Eigen/CXX11/Tensor create mode 100644 external/eigen3/unsupported/Eigen/CXX11/TensorSymmetry create mode 100644 external/eigen3/unsupported/Eigen/CXX11/ThreadPool create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/README.md create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRef.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStriding.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorVolumePatch.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/DynamicSymmetry.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/StaticSymmetry.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/Symmetry.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/TensorSymmetry/util/TemplateGroupTheory.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/SimpleThreadPool.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadLocal.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/ThreadPool/ThreadYield.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Meta.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/util/CXX11Workarounds.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/util/EmulateArray.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/util/EmulateCXX11Meta.h create mode 100644 external/eigen3/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h create mode 100644 external/eigen3/unsupported/Eigen/EulerAngles delete mode 100644 external/eigen3/unsupported/Eigen/SVD create mode 100644 external/eigen3/unsupported/Eigen/SpecialFunctions mode change 100644 => 100755 external/eigen3/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h delete mode 100644 external/eigen3/unsupported/Eigen/src/AutoDiff/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/BVH/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt create mode 100644 external/eigen3/unsupported/Eigen/src/EulerAngles/CMakeLists.txt create mode 100644 external/eigen3/unsupported/Eigen/src/EulerAngles/EulerAngles.h create mode 100644 external/eigen3/unsupported/Eigen/src/EulerAngles/EulerSystem.h delete mode 100644 external/eigen3/unsupported/Eigen/src/FFT/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/IterativeSolvers/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h delete mode 100644 external/eigen3/unsupported/Eigen/src/KroneckerProduct/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/LevenbergMarquardt/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h delete mode 100644 external/eigen3/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/NonLinearOptimization/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/NumericalDiff/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/Polynomials/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/SVD/BDCSVD.h delete mode 100644 external/eigen3/unsupported/Eigen/src/SVD/CMakeLists.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/SVD/JacobiSVD.h delete mode 100644 external/eigen3/unsupported/Eigen/src/SVD/TODOBdcsvd.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/SVD/doneInBDCSVD.txt delete mode 100644 external/eigen3/unsupported/Eigen/src/Skyline/CMakeLists.txt create mode 100644 external/eigen3/unsupported/Eigen/src/SparseExtra/BlockSparseMatrix.h delete mode 100644 external/eigen3/unsupported/Eigen/src/SparseExtra/CMakeLists.txt create mode 100644 external/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h create mode 100644 external/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h create mode 100644 external/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h create mode 100644 external/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h create mode 100644 external/eigen3/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h create mode 100644 external/eigen3/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h delete mode 100644 external/eigen3/unsupported/Eigen/src/Splines/CMakeLists.txt create mode 100644 external/eigen3/unsupported/doc/examples/EulerAngles.cpp create mode 100644 external/eigen3/unsupported/test/EulerAngles.cpp create mode 100644 external/eigen3/unsupported/test/autodiff_scalar.cpp delete mode 100644 external/eigen3/unsupported/test/bdcsvd.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_eventcount.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_meta.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_non_blocking_thread_pool.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_runqueue.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_argmax.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_argmax_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_assign.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_broadcast_sycl.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_broadcasting.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_cast_float16_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_casts.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_chipping.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_comparisons.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_complex_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_concatenation.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_const.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_contract_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_contraction.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_convolution.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_custom_index.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_custom_op.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_device.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_device_sycl.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_dimension.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_empty.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_expr.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_fft.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_fixed_size.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_forced_eval.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_generator.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_ifft.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_image_patch.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_index_list.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_inflation.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_intdiv.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_io.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_layout_swap.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_lvalue.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_map.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_math.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_mixed_indices.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_morphing.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_notification.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_of_complex.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_of_const_values.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_of_float16_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_of_strings.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_padding.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_patch.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_random.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_random_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_reduction.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_reduction_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_reduction_sycl.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_ref.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_reverse.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_roundings.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_scan.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_scan_cuda.cu create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_shuffling.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_simple.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_striding.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_sugar.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_sycl.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_symmetry.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_thread_pool.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_uint128.cpp create mode 100644 external/eigen3/unsupported/test/cxx11_tensor_volume_patch.cpp delete mode 100644 external/eigen3/unsupported/test/jacobisvd.cpp create mode 100644 external/eigen3/unsupported/test/special_functions.cpp delete mode 100644 external/eigen3/unsupported/test/svd_common.h diff --git a/external/eigen3/.hg_archival.txt b/external/eigen3/.hg_archival.txt index 4dd5bd1..b8b604b 100644 --- a/external/eigen3/.hg_archival.txt +++ b/external/eigen3/.hg_archival.txt @@ -1,4 +1,4 @@ repo: 8a21fd850624c931e448cbcfb38168cb2717c790 -node: b9cd8366d4e8f49471c7afafc4c2a1b00e54a54d -branch: 3.2 -tag: 3.2.10 +node: 5a0156e40feb7c4136680b493c6e433d91a6f355 +branch: 3.3 +tag: 3.3.4 diff --git a/external/eigen3/.hgeol b/external/eigen3/.hgeol index 423676d..5327df1 100644 --- a/external/eigen3/.hgeol +++ b/external/eigen3/.hgeol @@ -1,6 +1,9 @@ [patterns] +*.sh = LF +*.MINPACK = CRLF scripts/*.in = LF debug/msvc/*.dat = CRLF +debug/msvc/*.natvis = CRLF unsupported/test/mpreal/*.* = CRLF ** = native diff --git a/external/eigen3/.hgignore b/external/eigen3/.hgignore index e33ba2e..769a47f 100644 --- a/external/eigen3/.hgignore +++ b/external/eigen3/.hgignore @@ -30,3 +30,5 @@ log patch a a.* +lapack/testing +lapack/reference diff --git a/external/eigen3/.hgtags b/external/eigen3/.hgtags index c831285..32ec946 100644 --- a/external/eigen3/.hgtags +++ b/external/eigen3/.hgtags @@ -21,15 +21,13 @@ a810d5dbab47acfe65b3350236efdd98f67d4d8a 3.1.0-alpha1 8383e883ebcc6f14695ff0b5e20bb631abab43fb 3.1.0-rc1 bf4cb8c934fa3a79f45f1e629610f0225e93e493 3.1.0-rc2 da195914abcc1d739027cbee7c52077aab30b336 3.2-beta1 -4b687cad1d23066f66863f4f87298447298443df 3.2-rc1 -1eeda7b1258bcd306018c0738e2b6a8543661141 3.2-rc2 -ffa86ffb557094721ca71dcea6aed2651b9fd610 3.2.0 -6b38706d90a9fe182e66ab88477b3dbde34b9f66 3.2.1 -1306d75b4a21891e59ff9bd96678882cf831e39f 3.2.2 -36fd1ba04c120cfdd90f3e4cede47f43b21d19ad 3.2.3 -10219c95fe653d4962aa9db4946f6fbea96dd740 3.2.4 -bdd17ee3b1b3a166cd5ec36dcad4fc1f3faf774a 3.2.5 -c58038c56923e0fd86de3ded18e03df442e66dfb 3.2.6 -b30b87236a1b1552af32ac34075ee5696a9b5a33 3.2.7 -07105f7124f9aef00a68c85e0fc606e65d3d6c15 3.2.8 -dc6cfdf9bcec5efc7b6593bddbbb3d675de53524 3.2.9 +a8e0d153fc5e239ef8b06e3665f1f9e8cb8d49c8 before-evaluators +09a8e21866106b49c5dec1d6d543e5794e82efa0 3.3-alpha1 +ce5a455b34c0a0ac3545a1497cb4a16c38ed90e8 3.3-beta1 +69d418c0699907bcd0bf9e0b3ba0a112ed091d85 3.3-beta2 +bef509908b9da05d0d07ffc0da105e2c8c6d3996 3.3-rc1 +04ab5fa4b241754afcf631117572276444c67239 3.3-rc2 +26667be4f70baf4f0d39e96f330714c87b399090 3.3.0 +f562a193118d4f40514e2f4a0ace6e974926ef06 3.3.1 +da9b4e14c2550e0d11078a3c39e6d56eba9905df 3.3.2 +67e894c6cd8f5f1f604b27d37ed47fdf012674ff 3.3.3 diff --git a/external/eigen3/CMakeLists.txt b/external/eigen3/CMakeLists.txt index 77e9f2d..f584002 100644 --- a/external/eigen3/CMakeLists.txt +++ b/external/eigen3/CMakeLists.txt @@ -1,4 +1,5 @@ -project(Eigen) +project(Eigen3) + cmake_minimum_required(VERSION 2.8.5) # guard against in-source builds @@ -7,6 +8,11 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ") endif() +# Alias Eigen_*_DIR to Eigen3_*_DIR: + +set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR}) +set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR}) + # guard against bad build-type strings if (NOT CMAKE_BUILD_TYPE) @@ -92,9 +98,11 @@ else() endif() option(EIGEN_BUILD_BTL "Build benchmark suite" OFF) -if(NOT WIN32) + +# Disable pkgconfig only for native Windows builds +if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows) option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ON) -endif(NOT WIN32) +endif() set(CMAKE_INCLUDE_CURRENT_DIR ON) @@ -108,7 +116,8 @@ endif() set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320") macro(ei_add_cxx_compiler_flag FLAG) - string(REGEX REPLACE "-" "" SFLAG ${FLAG}) + string(REGEX REPLACE "-" "" SFLAG1 ${FLAG}) + string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1}) check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG}) if(COMPILER_SUPPORT_${SFLAG}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}") @@ -117,18 +126,13 @@ endmacro(ei_add_cxx_compiler_flag) if(NOT MSVC) # We assume that other compilers are partly compatible with GNUCC - - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions") - set(CMAKE_CXX_FLAGS_DEBUG "-g3") - set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2") - - # clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag + + # clang outputs some warnings for unknown flags that are not caught by check_cxx_compiler_flag # adding -Werror turns such warnings into errors check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR) if(COMPILER_SUPPORT_WERROR) set(CMAKE_REQUIRED_FLAGS "-Werror") endif() - ei_add_cxx_compiler_flag("-pedantic") ei_add_cxx_compiler_flag("-Wall") ei_add_cxx_compiler_flag("-Wextra") @@ -142,6 +146,18 @@ if(NOT MSVC) ei_add_cxx_compiler_flag("-Wpointer-arith") ei_add_cxx_compiler_flag("-Wwrite-strings") ei_add_cxx_compiler_flag("-Wformat-security") + ei_add_cxx_compiler_flag("-Wshorten-64-to-32") + ei_add_cxx_compiler_flag("-Wlogical-op") + ei_add_cxx_compiler_flag("-Wenum-conversion") + ei_add_cxx_compiler_flag("-Wc++11-extensions") + ei_add_cxx_compiler_flag("-Wdouble-promotion") +# ei_add_cxx_compiler_flag("-Wconversion") + + # -Wshadow is insanely too strict with gcc, hopefully it will become usable with gcc 6 + # if(NOT CMAKE_COMPILER_IS_GNUCXX OR (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "5.0.0")) + if(NOT CMAKE_COMPILER_IS_GNUCXX) + ei_add_cxx_compiler_flag("-Wshadow") + endif() ei_add_cxx_compiler_flag("-Wno-psabi") ei_add_cxx_compiler_flag("-Wno-variadic-macros") @@ -151,7 +167,8 @@ if(NOT MSVC) ei_add_cxx_compiler_flag("-fno-common") ei_add_cxx_compiler_flag("-fstrict-aliasing") ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark - ei_add_cxx_compiler_flag("-wd2304") # disbale ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor + ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor + # The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails # Moreover we should not set both -strict-ansi and -ansi @@ -163,6 +180,11 @@ if(NOT MSVC) else() ei_add_cxx_compiler_flag("-ansi") endif() + + if(ANDROID_NDK) + ei_add_cxx_compiler_flag("-pie") + ei_add_cxx_compiler_flag("-fPIE") + endif() set(CMAKE_REQUIRED_FLAGS "") @@ -196,18 +218,65 @@ if(NOT MSVC) message(STATUS "Enabling SSE4.2 in tests/examples") endif() + option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF) + if(EIGEN_TEST_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + message(STATUS "Enabling AVX in tests/examples") + endif() + + option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF) + if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma") + message(STATUS "Enabling FMA in tests/examples") + endif() + + option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF) + if(EIGEN_TEST_AVX512) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -fabi-version=6 -DEIGEN_ENABLE_AVX512") + message(STATUS "Enabling AVX512 in tests/examples") + endif() + + option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF) + if(EIGEN_TEST_F16C) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c") + message(STATUS "Enabling F16C in tests/examples") + endif() + option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF) if(EIGEN_TEST_ALTIVEC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec") message(STATUS "Enabling AltiVec in tests/examples") endif() + option(EIGEN_TEST_VSX "Enable/Disable VSX in tests/examples" OFF) + if(EIGEN_TEST_VSX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -mvsx") + message(STATUS "Enabling VSX in tests/examples") + endif() + option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) if(EIGEN_TEST_NEON) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8") + if(EIGEN_TEST_FMA) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon") + endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard") + message(STATUS "Enabling NEON in tests/examples") + endif() + + option(EIGEN_TEST_NEON64 "Enable/Disable Neon in tests/examples" OFF) + if(EIGEN_TEST_NEON64) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") message(STATUS "Enabling NEON in tests/examples") endif() + option(EIGEN_TEST_ZVECTOR "Enable/Disable S390X(zEC13) ZVECTOR in tests/examples" OFF) + if(EIGEN_TEST_ZVECTOR) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z13 -mzvector") + message(STATUS "Enabling S390X(zEC13) ZVECTOR in tests/examples") + endif() + check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP) if(COMPILER_SUPPORT_OPENMP) option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) @@ -284,11 +353,23 @@ if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT) message(STATUS "Disabling alignment in tests/examples") endif() -option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF) +option(EIGEN_TEST_NO_EXCEPTIONS "Disables C++ exceptions" OFF) +if(EIGEN_TEST_NO_EXCEPTIONS) + ei_add_cxx_compiler_flag("-fno-exceptions") + message(STATUS "Disabling exceptions in tests/examples") +endif() + +option(EIGEN_TEST_CXX11 "Enable testing with C++11 and C++11 features (e.g. Tensor module)." OFF) + +set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture level to target when compiling CUDA code") include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) # Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR +if(EIGEN_INCLUDE_INSTALL_DIR) + message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.") +endif() + if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR) set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed") @@ -298,9 +379,8 @@ else() CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen header files are installed" ) endif() - set(CMAKEPACKAGE_INSTALL_DIR - "${CMAKE_INSTALL_LIBDIR}/cmake/eigen3" + "${CMAKE_INSTALL_DATADIR}/eigen3/cmake" CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where Eigen3Config.cmake is installed" ) set(PKGCONFIG_INSTALL_DIR @@ -308,6 +388,7 @@ set(PKGCONFIG_INSTALL_DIR CACHE PATH "The directory relative to CMAKE_PREFIX_PATH where eigen3.pc is installed" ) + # similar to set_target_properties but append the property instead of overwriting it macro(ei_add_target_property target prop value) @@ -329,7 +410,7 @@ if(EIGEN_BUILD_PKGCONFIG) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc DESTINATION ${PKGCONFIG_INSTALL_DIR} ) -endif(EIGEN_BUILD_PKGCONFIG) +endif() add_subdirectory(Eigen) @@ -355,6 +436,13 @@ else() add_subdirectory(lapack EXCLUDE_FROM_ALL) endif() +# add SYCL +option(EIGEN_TEST_SYCL "Add Sycl support." OFF) +if(EIGEN_TEST_SYCL) + set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}") + include(FindComputeCpp) +endif() + add_subdirectory(unsupported) add_subdirectory(demos EXCLUDE_FROM_ALL) @@ -403,6 +491,7 @@ if(cmake_generator_tolower MATCHES "makefile") message(STATUS "make check | Build and run the unit-tests. Read this page:") message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests") message(STATUS "make blas | Build BLAS library (not the same thing as Eigen)") + message(STATUS "make uninstall| Removes files installed by make install") message(STATUS "--------------+--------------------------------------------------------------") else() message(STATUS "To build/run the unit tests, read this page:") @@ -410,3 +499,98 @@ else() endif() message(STATUS "") + + +set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} ) +set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} ) +set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} ) +set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} ) +set ( EIGEN_DEFINITIONS "") +set ( EIGEN_INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/${INCLUDE_INSTALL_DIR}" ) +set ( EIGEN_ROOT_DIR ${CMAKE_INSTALL_PREFIX} ) + +# Interface libraries require at least CMake 3.0 +if (NOT CMAKE_VERSION VERSION_LESS 3.0) + include (CMakePackageConfigHelpers) + + # Imported target support + add_library (eigen INTERFACE) + + target_compile_definitions (eigen INTERFACE ${EIGEN_DEFINITIONS}) + target_include_directories (eigen INTERFACE + $ + $ + ) + + # Export as title case Eigen + set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen) + + install (TARGETS eigen EXPORT Eigen3Targets) + + configure_package_config_file ( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake + PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR + INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} + NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components + ) + # Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does + # not depend on architecture specific settings or libraries. More + # specifically, an Eigen3Config.cmake generated from a 64 bit target can be + # used for 32 bit targets as well (and vice versa). + set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P}) + unset (CMAKE_SIZEOF_VOID_P) + write_basic_package_version_file (Eigen3ConfigVersion.cmake + VERSION ${EIGEN_VERSION_NUMBER} + COMPATIBILITY SameMajorVersion) + set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P}) + + # The Eigen target will be located in the Eigen3 namespace. Other CMake + # targets can refer to it using Eigen3::Eigen. + export (TARGETS eigen NAMESPACE Eigen3:: FILE Eigen3Targets.cmake) + # Export Eigen3 package to CMake registry such that it can be easily found by + # CMake even if it has not been installed to a standard directory. + export (PACKAGE Eigen3) + + install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}) + +else (NOT CMAKE_VERSION VERSION_LESS 3.0) + # Fallback to legacy Eigen3Config.cmake without the imported target + + # If CMakePackageConfigHelpers module is available (CMake >= 2.8.8) + # create a relocatable Config file, otherwise leave the hardcoded paths + include(CMakePackageConfigHelpers OPTIONAL RESULT_VARIABLE CPCH_PATH) + + if(CPCH_PATH) + configure_package_config_file ( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake + PATH_VARS EIGEN_INCLUDE_DIR EIGEN_ROOT_DIR + INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} + NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components + ) + else() + # The PACKAGE_* variables are defined by the configure_package_config_file + # but without it we define them manually to the hardcoded paths + set(PACKAGE_INIT "") + set(PACKAGE_EIGEN_INCLUDE_DIR ${EIGEN_INCLUDE_DIR}) + set(PACKAGE_EIGEN_ROOT_DIR ${EIGEN_ROOT_DIR}) + configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigLegacy.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake + @ONLY ESCAPE_QUOTES ) + endif() + + write_basic_package_version_file( Eigen3ConfigVersion.cmake + VERSION ${EIGEN_VERSION_NUMBER} + COMPATIBILITY SameMajorVersion ) + +endif (NOT CMAKE_VERSION VERSION_LESS 3.0) + +install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/UseEigen3.cmake + ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake + DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} ) + +# Add uninstall target +add_custom_target ( uninstall + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake) diff --git a/external/eigen3/COPYING.MINPACK b/external/eigen3/COPYING.MINPACK index 11d8a9a..ae7984d 100644 --- a/external/eigen3/COPYING.MINPACK +++ b/external/eigen3/COPYING.MINPACK @@ -1,52 +1,52 @@ -Minpack Copyright Notice (1999) University of Chicago. All rights reserved - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the -following conditions are met: - -1. Redistributions of source code must retain the above -copyright notice, this list of conditions and the following -disclaimer. - -2. Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following -disclaimer in the documentation and/or other materials -provided with the distribution. - -3. The end-user documentation included with the -redistribution, if any, must include the following -acknowledgment: - - "This product includes software developed by the - University of Chicago, as Operator of Argonne National - Laboratory. - -Alternately, this acknowledgment may appear in the software -itself, if and wherever such third-party acknowledgments -normally appear. - -4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" -WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE -UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND -THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE -OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY -OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR -USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF -THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) -DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION -UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL -BE CORRECTED. - -5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT -HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF -ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, -INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF -ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF -PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER -SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT -(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, -EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE -POSSIBILITY OF SUCH LOSS OR DAMAGES. - +Minpack Copyright Notice (1999) University of Chicago. All rights reserved + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above +copyright notice, this list of conditions and the following +disclaimer. + +2. Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following +disclaimer in the documentation and/or other materials +provided with the distribution. + +3. The end-user documentation included with the +redistribution, if any, must include the following +acknowledgment: + + "This product includes software developed by the + University of Chicago, as Operator of Argonne National + Laboratory. + +Alternately, this acknowledgment may appear in the software +itself, if and wherever such third-party acknowledgments +normally appear. + +4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" +WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE +UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND +THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE +OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY +OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR +USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF +THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) +DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION +UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL +BE CORRECTED. + +5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT +HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF +ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, +INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF +ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF +PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER +SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT +(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, +EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE +POSSIBILITY OF SUCH LOSS OR DAMAGES. + diff --git a/external/eigen3/CTestConfig.cmake b/external/eigen3/CTestConfig.cmake index 0557c49..755b473 100644 --- a/external/eigen3/CTestConfig.cmake +++ b/external/eigen3/CTestConfig.cmake @@ -4,10 +4,10 @@ ## # The following are required to uses Dart and the Cdash dashboard ## ENABLE_TESTING() ## INCLUDE(CTest) -set(CTEST_PROJECT_NAME "Eigen3.2") +set(CTEST_PROJECT_NAME "Eigen3.3") set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC") set(CTEST_DROP_METHOD "http") set(CTEST_DROP_SITE "manao.inria.fr") -set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.2") +set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.3") set(CTEST_DROP_SITE_CDASH TRUE) diff --git a/external/eigen3/Eigen/Array b/external/eigen3/Eigen/Array deleted file mode 100644 index 3d004fb..0000000 --- a/external/eigen3/Eigen/Array +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef EIGEN_ARRAY_MODULE_H -#define EIGEN_ARRAY_MODULE_H - -// include Core first to handle Eigen2 support macros -#include "Core" - -#ifndef EIGEN2_SUPPORT - #error The Eigen/Array header does no longer exist in Eigen3. All that functionality has moved to Eigen/Core. -#endif - -#endif // EIGEN_ARRAY_MODULE_H diff --git a/external/eigen3/Eigen/CMakeLists.txt b/external/eigen3/Eigen/CMakeLists.txt index a92dd6f..9eb502b 100644 --- a/external/eigen3/Eigen/CMakeLists.txt +++ b/external/eigen3/Eigen/CMakeLists.txt @@ -16,4 +16,4 @@ install(FILES DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel ) -add_subdirectory(src) +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/external/eigen3/Eigen/Cholesky b/external/eigen3/Eigen/Cholesky index f727f5d..369d1f5 100644 --- a/external/eigen3/Eigen/Cholesky +++ b/external/eigen3/Eigen/Cholesky @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLESKY_MODULE_H #define EIGEN_CHOLESKY_MODULE_H @@ -10,20 +17,22 @@ * * * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices. - * Those decompositions are accessible via the following MatrixBase methods: - * - MatrixBase::llt(), + * Those decompositions are also accessible via the following methods: + * - MatrixBase::llt() * - MatrixBase::ldlt() + * - SelfAdjointView::llt() + * - SelfAdjointView::ldlt() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" #include "src/Cholesky/LLT.h" #include "src/Cholesky/LDLT.h" #ifdef EIGEN_USE_LAPACKE -#include "src/Cholesky/LLT_MKL.h" +#include "src/misc/lapacke.h" +#include "src/Cholesky/LLT_LAPACKE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/external/eigen3/Eigen/CholmodSupport b/external/eigen3/Eigen/CholmodSupport index 88c29a6..bed8924 100644 --- a/external/eigen3/Eigen/CholmodSupport +++ b/external/eigen3/Eigen/CholmodSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_CHOLMODSUPPORT_MODULE_H #define EIGEN_CHOLMODSUPPORT_MODULE_H @@ -33,12 +40,8 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/CholmodSupport/CholmodSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_CHOLMODSUPPORT_MODULE_H diff --git a/external/eigen3/Eigen/Core b/external/eigen3/Eigen/Core index 509c529..0f7fa63 100644 --- a/external/eigen3/Eigen/Core +++ b/external/eigen3/Eigen/Core @@ -14,6 +14,58 @@ // first thing Eigen does: stop the compiler from committing suicide #include "src/Core/util/DisableStupidWarnings.h" +// Handle NVCC/CUDA/SYCL +#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) + // Do not try asserts on CUDA and SYCL! + #ifndef EIGEN_NO_DEBUG + #define EIGEN_NO_DEBUG + #endif + + #ifdef EIGEN_INTERNAL_DEBUGGING + #undef EIGEN_INTERNAL_DEBUGGING + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif + + // All functions callable from CUDA code must be qualified with __device__ + #ifdef __CUDACC__ + // Do not try to vectorize on CUDA and SYCL! + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + + #define EIGEN_DEVICE_FUNC __host__ __device__ + // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro + // works properly on the device side + #include + #else + #define EIGEN_DEVICE_FUNC + #endif + +#else + #define EIGEN_DEVICE_FUNC + +#endif + +// When compiling CUDA device code with NVCC, pull in math functions from the +// global namespace. In host mode, and when device doee with clang, use the +// std versions. +#if defined(__CUDA_ARCH__) && defined(__NVCC__) + #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; +#else + #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; +#endif + +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) + #define EIGEN_EXCEPTIONS +#endif + +#ifdef EIGEN_EXCEPTIONS + #include +#endif + // then include this file where all our macros are defined. It's really important to do it first because // it's where we do all the alignment settings (platform detection and honoring the user's will if he // defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization. @@ -21,7 +73,7 @@ // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3) // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. -#if defined(__MINGW32__) && EIGEN_GNUC_AT_LEAST(4,6) +#if EIGEN_COMP_MINGW && EIGEN_GNUC_AT_LEAST(4,6) #pragma GCC optimize ("-fno-ipa-cp-clone") #endif @@ -31,26 +83,26 @@ // and inclusion of their respective header files #include "src/Core/util/MKL_support.h" -// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into -// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks -#if !EIGEN_ALIGN +// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into +// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks +#if EIGEN_MAX_ALIGN_BYTES==0 #ifndef EIGEN_DONT_VECTORIZE #define EIGEN_DONT_VECTORIZE #endif #endif -#ifdef _MSC_VER +#if EIGEN_COMP_MSVC #include // for _aligned_malloc -- need it regardless of whether vectorization is enabled - #if (_MSC_VER >= 1500) // 2008 or later + #if (EIGEN_COMP_MSVC >= 1500) // 2008 or later // Remember that usage of defined() in a #define is undefined by the standard. // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP. - #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(_M_X64) + #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER #endif #endif #else // Remember that usage of defined() in a #define is undefined by the standard - #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) ) + #if (defined __SSE2__) && ( (!EIGEN_COMP_GNUC) || EIGEN_COMP_ICC || EIGEN_GNUC_AT_LEAST(4,2) ) #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC #endif #endif @@ -82,6 +134,28 @@ #ifdef __SSE4_2__ #define EIGEN_VECTORIZE_SSE4_2 #endif + #ifdef __AVX__ + #define EIGEN_VECTORIZE_AVX + #define EIGEN_VECTORIZE_SSE3 + #define EIGEN_VECTORIZE_SSSE3 + #define EIGEN_VECTORIZE_SSE4_1 + #define EIGEN_VECTORIZE_SSE4_2 + #endif + #ifdef __AVX2__ + #define EIGEN_VECTORIZE_AVX2 + #endif + #ifdef __FMA__ + #define EIGEN_VECTORIZE_FMA + #endif + #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512) + #define EIGEN_VECTORIZE_AVX512 + #define EIGEN_VECTORIZE_AVX2 + #define EIGEN_VECTORIZE_AVX + #define EIGEN_VECTORIZE_FMA + #ifdef __AVX512DQ__ + #define EIGEN_VECTORIZE_AVX512DQ + #endif + #endif // include files @@ -95,9 +169,10 @@ extern "C" { // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: - #if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1110 + #if EIGEN_COMP_ICC >= 1110 #include #else + #include #include #include #ifdef EIGEN_VECTORIZE_SSE3 @@ -112,8 +187,20 @@ #ifdef EIGEN_VECTORIZE_SSE4_2 #include #endif + #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512) + #include + #endif #endif } // end extern "C" + #elif defined __VSX__ + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_VSX + #include + // We need to #undef all these ugly tokens defined in + // => use __vector instead of vector + #undef bool + #undef vector + #undef pixel #elif defined __ALTIVEC__ #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_ALTIVEC @@ -123,13 +210,35 @@ #undef bool #undef vector #undef pixel - #elif defined __ARM_NEON + #elif (defined __ARM_NEON) || (defined __ARM_NEON__) #define EIGEN_VECTORIZE #define EIGEN_VECTORIZE_NEON #include + #elif (defined __s390x__ && defined __VEC__) + #define EIGEN_VECTORIZE + #define EIGEN_VECTORIZE_ZVECTOR + #include #endif #endif +#if defined(__F16C__) && !defined(EIGEN_COMP_CLANG) + // We can use the optimized fp16 to float and float to fp16 conversion routines + #define EIGEN_HAS_FP16_C +#endif + +#if defined __CUDACC__ + #define EIGEN_VECTORIZE_CUDA + #include + #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 + #define EIGEN_HAS_CUDA_FP16 + #endif +#endif + +#if defined EIGEN_HAS_CUDA_FP16 + #include + #include +#endif + #if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) #define EIGEN_HAS_OPENMP #endif @@ -139,7 +248,7 @@ #endif // MSVC for windows mobile does not have the errno.h file -#if !(defined(_MSC_VER) && defined(_WIN32_WCE)) && !defined(__ARMCC_VERSION) +#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM #define EIGEN_HAS_ERRNO #endif @@ -159,29 +268,30 @@ // for min/max: #include +// for std::is_nothrow_move_assignable +#ifdef EIGEN_INCLUDE_TYPE_TRAITS +#include +#endif + // for outputting debug info #ifdef EIGEN_DEBUG_ASSIGN #include #endif // required for __cpuid, needs to be included after cmath -#if defined(_MSC_VER) && (defined(_M_IX86)||defined(_M_X64)) && (!defined(_WIN32_WCE)) +#if EIGEN_COMP_MSVC && EIGEN_ARCH_i386_OR_x86_64 && !EIGEN_OS_WINCE #include #endif -#if defined(_CPPUNWIND) || defined(__EXCEPTIONS) - #define EIGEN_EXCEPTIONS -#endif - -#ifdef EIGEN_EXCEPTIONS - #include -#endif - /** \brief Namespace containing all symbols from the %Eigen library. */ namespace Eigen { inline static const char *SimdInstructionSetsInUse(void) { -#if defined(EIGEN_VECTORIZE_SSE4_2) +#if defined(EIGEN_VECTORIZE_AVX512) + return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_AVX) + return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; +#elif defined(EIGEN_VECTORIZE_SSE4_2) return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2"; #elif defined(EIGEN_VECTORIZE_SSE4_1) return "SSE, SSE2, SSE3, SSSE3, SSE4.1"; @@ -193,8 +303,12 @@ inline static const char *SimdInstructionSetsInUse(void) { return "SSE, SSE2"; #elif defined(EIGEN_VECTORIZE_ALTIVEC) return "AltiVec"; +#elif defined(EIGEN_VECTORIZE_VSX) + return "VSX"; #elif defined(EIGEN_VECTORIZE_NEON) return "ARM NEON"; +#elif defined(EIGEN_VECTORIZE_ZVECTOR) + return "S390X ZVECTOR"; #else return "None"; #endif @@ -202,42 +316,21 @@ inline static const char *SimdInstructionSetsInUse(void) { } // end namespace Eigen -#define STAGE10_FULL_EIGEN2_API 10 -#define STAGE20_RESOLVE_API_CONFLICTS 20 -#define STAGE30_FULL_EIGEN3_API 30 -#define STAGE40_FULL_EIGEN3_STRICTNESS 40 -#define STAGE99_NO_EIGEN2_SUPPORT 99 - -#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE40_FULL_EIGEN3_STRICTNESS -#elif defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API -#elif defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE20_RESOLVE_API_CONFLICTS -#elif defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API - #define EIGEN2_SUPPORT - #define EIGEN2_SUPPORT_STAGE STAGE10_FULL_EIGEN2_API -#elif defined EIGEN2_SUPPORT - // default to stage 3, that's what it's always meant - #define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API - #define EIGEN2_SUPPORT_STAGE STAGE30_FULL_EIGEN3_API -#else - #define EIGEN2_SUPPORT_STAGE STAGE99_NO_EIGEN2_SUPPORT +#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || defined EIGEN2_SUPPORT +// This will generate an error message: +#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information #endif -#ifdef EIGEN2_SUPPORT -#undef minor -#endif +namespace Eigen { // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to // ensure QNX/QCC support using std::size_t; -// gcc 4.6.0 wants std:: for ptrdiff_t +// gcc 4.6.0 wants std:: for ptrdiff_t using std::ptrdiff_t; +} + /** \defgroup Core_Module Core module * This is the main module of Eigen providing dense matrix and vector support * (both fixed and dynamic size) with all the features corresponding to a BLAS library @@ -249,8 +342,8 @@ using std::ptrdiff_t; */ #include "src/Core/util/Constants.h" -#include "src/Core/util/ForwardDeclarations.h" #include "src/Core/util/Meta.h" +#include "src/Core/util/ForwardDeclarations.h" #include "src/Core/util/StaticAssert.h" #include "src/Core/util/XprHelper.h" #include "src/Core/util/Memory.h" @@ -258,41 +351,92 @@ using std::ptrdiff_t; #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" #include "src/Core/GenericPacketMath.h" +#include "src/Core/MathFunctionsImpl.h" -#if defined EIGEN_VECTORIZE_SSE +#if defined EIGEN_VECTORIZE_AVX512 + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/AVX/PacketMath.h" + #include "src/Core/arch/AVX512/PacketMath.h" + #include "src/Core/arch/AVX512/MathFunctions.h" +#elif defined EIGEN_VECTORIZE_AVX + // Use AVX for floats and doubles, SSE for integers + #include "src/Core/arch/SSE/PacketMath.h" + #include "src/Core/arch/SSE/Complex.h" + #include "src/Core/arch/SSE/MathFunctions.h" + #include "src/Core/arch/AVX/PacketMath.h" + #include "src/Core/arch/AVX/MathFunctions.h" + #include "src/Core/arch/AVX/Complex.h" + #include "src/Core/arch/AVX/TypeCasting.h" +#elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/SSE/Complex.h" -#elif defined EIGEN_VECTORIZE_ALTIVEC + #include "src/Core/arch/SSE/TypeCasting.h" +#elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) #include "src/Core/arch/AltiVec/PacketMath.h" + #include "src/Core/arch/AltiVec/MathFunctions.h" #include "src/Core/arch/AltiVec/Complex.h" #elif defined EIGEN_VECTORIZE_NEON #include "src/Core/arch/NEON/PacketMath.h" + #include "src/Core/arch/NEON/MathFunctions.h" #include "src/Core/arch/NEON/Complex.h" +#elif defined EIGEN_VECTORIZE_ZVECTOR + #include "src/Core/arch/ZVector/PacketMath.h" + #include "src/Core/arch/ZVector/MathFunctions.h" + #include "src/Core/arch/ZVector/Complex.h" +#endif + +// Half float support +#include "src/Core/arch/CUDA/Half.h" +#include "src/Core/arch/CUDA/PacketMathHalf.h" +#include "src/Core/arch/CUDA/TypeCasting.h" + +#if defined EIGEN_VECTORIZE_CUDA + #include "src/Core/arch/CUDA/PacketMath.h" + #include "src/Core/arch/CUDA/MathFunctions.h" #endif #include "src/Core/arch/Default/Settings.h" -#include "src/Core/Functors.h" +#include "src/Core/functors/TernaryFunctors.h" +#include "src/Core/functors/BinaryFunctors.h" +#include "src/Core/functors/UnaryFunctors.h" +#include "src/Core/functors/NullaryFunctors.h" +#include "src/Core/functors/StlFunctors.h" +#include "src/Core/functors/AssignmentFunctors.h" + +// Specialized functors to enable the processing of complex numbers +// on CUDA devices +#include "src/Core/arch/CUDA/Complex.h" + +#include "src/Core/IO.h" #include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseBase.h" #include "src/Core/MatrixBase.h" #include "src/Core/EigenBase.h" +#include "src/Core/Product.h" +#include "src/Core/CoreEvaluators.h" +#include "src/Core/AssignEvaluator.h" + #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874 // at least confirmed with Doxygen 1.5.5 and 1.5.6 #include "src/Core/Assign.h" #endif +#include "src/Core/ArrayBase.h" #include "src/Core/util/BlasUtil.h" #include "src/Core/DenseStorage.h" #include "src/Core/NestByValue.h" -#include "src/Core/ForceAlignedAccess.h" + +// #include "src/Core/ForceAlignedAccess.h" + #include "src/Core/ReturnByValue.h" #include "src/Core/NoAlias.h" #include "src/Core/PlainObjectBase.h" #include "src/Core/Matrix.h" #include "src/Core/Array.h" +#include "src/Core/CwiseTernaryOp.h" #include "src/Core/CwiseBinaryOp.h" #include "src/Core/CwiseUnaryOp.h" #include "src/Core/CwiseNullaryOp.h" @@ -300,32 +444,32 @@ using std::ptrdiff_t; #include "src/Core/SelfCwiseBinaryOp.h" #include "src/Core/Dot.h" #include "src/Core/StableNorm.h" -#include "src/Core/MapBase.h" #include "src/Core/Stride.h" +#include "src/Core/MapBase.h" #include "src/Core/Map.h" +#include "src/Core/Ref.h" #include "src/Core/Block.h" #include "src/Core/VectorBlock.h" -#include "src/Core/Ref.h" #include "src/Core/Transpose.h" #include "src/Core/DiagonalMatrix.h" #include "src/Core/Diagonal.h" #include "src/Core/DiagonalProduct.h" -#include "src/Core/PermutationMatrix.h" -#include "src/Core/Transpositions.h" #include "src/Core/Redux.h" #include "src/Core/Visitor.h" #include "src/Core/Fuzzy.h" -#include "src/Core/IO.h" #include "src/Core/Swap.h" #include "src/Core/CommaInitializer.h" -#include "src/Core/Flagged.h" -#include "src/Core/ProductBase.h" #include "src/Core/GeneralProduct.h" +#include "src/Core/Solve.h" +#include "src/Core/Inverse.h" +#include "src/Core/SolverBase.h" +#include "src/Core/PermutationMatrix.h" +#include "src/Core/Transpositions.h" #include "src/Core/TriangularMatrix.h" #include "src/Core/SelfAdjointView.h" #include "src/Core/products/GeneralBlockPanelKernel.h" #include "src/Core/products/Parallelizer.h" -#include "src/Core/products/CoeffBasedProduct.h" +#include "src/Core/ProductEvaluators.h" #include "src/Core/products/GeneralMatrixVector.h" #include "src/Core/products/GeneralMatrixMatrix.h" #include "src/Core/SolveTriangular.h" @@ -340,6 +484,7 @@ using std::ptrdiff_t; #include "src/Core/products/TriangularSolverVector.h" #include "src/Core/BandMatrix.h" #include "src/Core/CoreIterators.h" +#include "src/Core/ConditionEstimator.h" #include "src/Core/BooleanRedux.h" #include "src/Core/Select.h" @@ -347,18 +492,17 @@ using std::ptrdiff_t; #include "src/Core/Random.h" #include "src/Core/Replicate.h" #include "src/Core/Reverse.h" -#include "src/Core/ArrayBase.h" #include "src/Core/ArrayWrapper.h" #ifdef EIGEN_USE_BLAS -#include "src/Core/products/GeneralMatrixMatrix_MKL.h" -#include "src/Core/products/GeneralMatrixVector_MKL.h" -#include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h" -#include "src/Core/products/SelfadjointMatrixMatrix_MKL.h" -#include "src/Core/products/SelfadjointMatrixVector_MKL.h" -#include "src/Core/products/TriangularMatrixMatrix_MKL.h" -#include "src/Core/products/TriangularMatrixVector_MKL.h" -#include "src/Core/products/TriangularSolverMatrix_MKL.h" +#include "src/Core/products/GeneralMatrixMatrix_BLAS.h" +#include "src/Core/products/GeneralMatrixVector_BLAS.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h" +#include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h" +#include "src/Core/products/SelfadjointMatrixVector_BLAS.h" +#include "src/Core/products/TriangularMatrixMatrix_BLAS.h" +#include "src/Core/products/TriangularMatrixVector_BLAS.h" +#include "src/Core/products/TriangularSolverMatrix_BLAS.h" #endif // EIGEN_USE_BLAS #ifdef EIGEN_USE_MKL_VML @@ -369,8 +513,4 @@ using std::ptrdiff_t; #include "src/Core/util/ReenableStupidWarnings.h" -#ifdef EIGEN2_SUPPORT -#include "Eigen2Support" -#endif - #endif // EIGEN_CORE_H diff --git a/external/eigen3/Eigen/Eigen b/external/eigen3/Eigen/Eigen index 19b40ea..654c8dc 100644 --- a/external/eigen3/Eigen/Eigen +++ b/external/eigen3/Eigen/Eigen @@ -1,2 +1,2 @@ #include "Dense" -//#include "Sparse" +#include "Sparse" diff --git a/external/eigen3/Eigen/Eigen2Support b/external/eigen3/Eigen/Eigen2Support deleted file mode 100644 index 6aa009d..0000000 --- a/external/eigen3/Eigen/Eigen2Support +++ /dev/null @@ -1,95 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN2SUPPORT_H -#define EIGEN2SUPPORT_H - -#if (!defined(EIGEN2_SUPPORT)) || (!defined(EIGEN_CORE_H)) -#error Eigen2 support must be enabled by defining EIGEN2_SUPPORT before including any Eigen header -#endif - -#ifndef EIGEN_NO_EIGEN2_DEPRECATED_WARNING - -#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) -#warning "Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)" -#else -#pragma message ("Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. (Define EIGEN_NO_EIGEN2_DEPRECATED_WARNING to disable this warning)") -#endif - -#endif // EIGEN_NO_EIGEN2_DEPRECATED_WARNING - -#include "src/Core/util/DisableStupidWarnings.h" - -/** \ingroup Support_modules - * \defgroup Eigen2Support_Module Eigen2 support module - * - * \warning Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3. - * - * This module provides a couple of deprecated functions improving the compatibility with Eigen2. - * - * To use it, define EIGEN2_SUPPORT before including any Eigen header - * \code - * #define EIGEN2_SUPPORT - * \endcode - * - */ - -#include "src/Eigen2Support/Macros.h" -#include "src/Eigen2Support/Memory.h" -#include "src/Eigen2Support/Meta.h" -#include "src/Eigen2Support/Lazy.h" -#include "src/Eigen2Support/Cwise.h" -#include "src/Eigen2Support/CwiseOperators.h" -#include "src/Eigen2Support/TriangularSolver.h" -#include "src/Eigen2Support/Block.h" -#include "src/Eigen2Support/VectorBlock.h" -#include "src/Eigen2Support/Minor.h" -#include "src/Eigen2Support/MathFunctions.h" - - -#include "src/Core/util/ReenableStupidWarnings.h" - -// Eigen2 used to include iostream -#include - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ -using Eigen::Matrix##SizeSuffix##TypeSuffix; \ -using Eigen::Vector##SizeSuffix##TypeSuffix; \ -using Eigen::RowVector##SizeSuffix##TypeSuffix; - -#define EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(TypeSuffix) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) \ - -#define EIGEN_USING_MATRIX_TYPEDEFS \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(i) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(f) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(d) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cf) \ -EIGEN_USING_MATRIX_TYPEDEFS_FOR_TYPE(cd) - -#define USING_PART_OF_NAMESPACE_EIGEN \ -EIGEN_USING_MATRIX_TYPEDEFS \ -using Eigen::Matrix; \ -using Eigen::MatrixBase; \ -using Eigen::ei_random; \ -using Eigen::ei_real; \ -using Eigen::ei_imag; \ -using Eigen::ei_conj; \ -using Eigen::ei_abs; \ -using Eigen::ei_abs2; \ -using Eigen::ei_sqrt; \ -using Eigen::ei_exp; \ -using Eigen::ei_log; \ -using Eigen::ei_sin; \ -using Eigen::ei_cos; - -#endif // EIGEN2SUPPORT_H diff --git a/external/eigen3/Eigen/Eigenvalues b/external/eigen3/Eigen/Eigenvalues index 53c5a73..009e529 100644 --- a/external/eigen3/Eigen/Eigenvalues +++ b/external/eigen3/Eigen/Eigenvalues @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_EIGENVALUES_MODULE_H #define EIGEN_EIGENVALUES_MODULE_H @@ -25,6 +32,7 @@ * \endcode */ +#include "src/misc/RealSvd2x2.h" #include "src/Eigenvalues/Tridiagonalization.h" #include "src/Eigenvalues/RealSchur.h" #include "src/Eigenvalues/EigenSolver.h" @@ -37,9 +45,10 @@ #include "src/Eigenvalues/GeneralizedEigenSolver.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h" #ifdef EIGEN_USE_LAPACKE -#include "src/Eigenvalues/RealSchur_MKL.h" -#include "src/Eigenvalues/ComplexSchur_MKL.h" -#include "src/Eigenvalues/SelfAdjointEigenSolver_MKL.h" +#include "src/misc/lapacke.h" +#include "src/Eigenvalues/RealSchur_LAPACKE.h" +#include "src/Eigenvalues/ComplexSchur_LAPACKE.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/external/eigen3/Eigen/Geometry b/external/eigen3/Eigen/Geometry index efd9d45..716d529 100644 --- a/external/eigen3/Eigen/Geometry +++ b/external/eigen3/Eigen/Geometry @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_GEOMETRY_MODULE_H #define EIGEN_GEOMETRY_MODULE_H @@ -9,21 +16,17 @@ #include "LU" #include -#ifndef M_PI -#define M_PI 3.14159265358979323846 -#endif - /** \defgroup Geometry_Module Geometry module - * - * * * This module provides support for: * - fixed-size homogeneous transformations * - translation, scaling, 2D and 3D rotations - * - quaternions - * - \ref MatrixBase::cross() "cross product" - * - \ref MatrixBase::unitOrthogonal() "orthognal vector generation" - * - some linear components: parametrized-lines and hyperplanes + * - \link Quaternion quaternions \endlink + * - cross products (\ref MatrixBase::cross, \ref MatrixBase::cross3) + * - orthognal vector generation (\ref MatrixBase::unitOrthogonal) + * - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink + * - \link AlignedBox axis aligned bounding boxes \endlink + * - \link umeyama least-square transformation fitting \endlink * * \code * #include @@ -33,27 +36,23 @@ #include "src/Geometry/OrthoMethods.h" #include "src/Geometry/EulerAngles.h" -#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS - #include "src/Geometry/Homogeneous.h" - #include "src/Geometry/RotationBase.h" - #include "src/Geometry/Rotation2D.h" - #include "src/Geometry/Quaternion.h" - #include "src/Geometry/AngleAxis.h" - #include "src/Geometry/Transform.h" - #include "src/Geometry/Translation.h" - #include "src/Geometry/Scaling.h" - #include "src/Geometry/Hyperplane.h" - #include "src/Geometry/ParametrizedLine.h" - #include "src/Geometry/AlignedBox.h" - #include "src/Geometry/Umeyama.h" - - #if defined EIGEN_VECTORIZE_SSE - #include "src/Geometry/arch/Geometry_SSE.h" - #endif -#endif - -#ifdef EIGEN2_SUPPORT -#include "src/Eigen2Support/Geometry/All.h" +#include "src/Geometry/Homogeneous.h" +#include "src/Geometry/RotationBase.h" +#include "src/Geometry/Rotation2D.h" +#include "src/Geometry/Quaternion.h" +#include "src/Geometry/AngleAxis.h" +#include "src/Geometry/Transform.h" +#include "src/Geometry/Translation.h" +#include "src/Geometry/Scaling.h" +#include "src/Geometry/Hyperplane.h" +#include "src/Geometry/ParametrizedLine.h" +#include "src/Geometry/AlignedBox.h" +#include "src/Geometry/Umeyama.h" + +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX +#include "src/Geometry/arch/Geometry_SSE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/external/eigen3/Eigen/Householder b/external/eigen3/Eigen/Householder index 6e348db..89cd81b 100644 --- a/external/eigen3/Eigen/Householder +++ b/external/eigen3/Eigen/Householder @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_HOUSEHOLDER_MODULE_H #define EIGEN_HOUSEHOLDER_MODULE_H diff --git a/external/eigen3/Eigen/IterativeLinearSolvers b/external/eigen3/Eigen/IterativeLinearSolvers index 0f4159d..957d575 100644 --- a/external/eigen3/Eigen/IterativeLinearSolvers +++ b/external/eigen3/Eigen/IterativeLinearSolvers @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H #define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H @@ -12,28 +19,29 @@ * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. * Those solvers are accessible via the following classes: * - ConjugateGradient for selfadjoint (hermitian) matrices, + * - LeastSquaresConjugateGradient for rectangular least-square problems, * - BiCGSTAB for general square matrices. * * These iterative solvers are associated with some preconditioners: * - IdentityPreconditioner - not really useful - * - DiagonalPreconditioner - also called JAcobi preconditioner, work very well on diagonal dominant matrices. - * - IncompleteILUT - incomplete LU factorization with dual thresholding + * - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices. + * - IncompleteLUT - incomplete LU factorization with dual thresholding * * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, UmfPackSupport, SuperLUSupport. * - * \code - * #include - * \endcode + \code + #include + \endcode */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - +#include "src/IterativeLinearSolvers/SolveWithGuess.h" #include "src/IterativeLinearSolvers/IterativeSolverBase.h" #include "src/IterativeLinearSolvers/BasicPreconditioners.h" #include "src/IterativeLinearSolvers/ConjugateGradient.h" +#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" #include "src/IterativeLinearSolvers/BiCGSTAB.h" #include "src/IterativeLinearSolvers/IncompleteLUT.h" +#include "src/IterativeLinearSolvers/IncompleteCholesky.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/external/eigen3/Eigen/Jacobi b/external/eigen3/Eigen/Jacobi index ba8a4dc..17c1d78 100644 --- a/external/eigen3/Eigen/Jacobi +++ b/external/eigen3/Eigen/Jacobi @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_JACOBI_MODULE_H #define EIGEN_JACOBI_MODULE_H diff --git a/external/eigen3/Eigen/LU b/external/eigen3/Eigen/LU index db57955..6f6c556 100644 --- a/external/eigen3/Eigen/LU +++ b/external/eigen3/Eigen/LU @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_LU_MODULE_H #define EIGEN_LU_MODULE_H @@ -16,25 +23,23 @@ * \endcode */ -#include "src/misc/Solve.h" #include "src/misc/Kernel.h" #include "src/misc/Image.h" #include "src/LU/FullPivLU.h" #include "src/LU/PartialPivLU.h" #ifdef EIGEN_USE_LAPACKE -#include "src/LU/PartialPivLU_MKL.h" +#include "src/misc/lapacke.h" +#include "src/LU/PartialPivLU_LAPACKE.h" #endif #include "src/LU/Determinant.h" -#include "src/LU/Inverse.h" +#include "src/LU/InverseImpl.h" -#if defined EIGEN_VECTORIZE_SSE +// Use the SSE optimized version whenever possible. At the moment the +// SSE version doesn't compile when AVX is enabled +#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX #include "src/LU/arch/Inverse_SSE.h" #endif -#ifdef EIGEN2_SUPPORT - #include "src/Eigen2Support/LU.h" -#endif - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_LU_MODULE_H diff --git a/external/eigen3/Eigen/LeastSquares b/external/eigen3/Eigen/LeastSquares deleted file mode 100644 index 35137c2..0000000 --- a/external/eigen3/Eigen/LeastSquares +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef EIGEN_REGRESSION_MODULE_H -#define EIGEN_REGRESSION_MODULE_H - -#ifndef EIGEN2_SUPPORT -#error LeastSquares is only available in Eigen2 support mode (define EIGEN2_SUPPORT) -#endif - -// exclude from normal eigen3-only documentation -#ifdef EIGEN2_SUPPORT - -#include "Core" - -#include "src/Core/util/DisableStupidWarnings.h" - -#include "Eigenvalues" -#include "Geometry" - -/** \defgroup LeastSquares_Module LeastSquares module - * This module provides linear regression and related features. - * - * \code - * #include - * \endcode - */ - -#include "src/Eigen2Support/LeastSquares.h" - -#include "src/Core/util/ReenableStupidWarnings.h" - -#endif // EIGEN2_SUPPORT - -#endif // EIGEN_REGRESSION_MODULE_H diff --git a/external/eigen3/Eigen/MetisSupport b/external/eigen3/Eigen/MetisSupport index 6a113f7..85c41bf 100644 --- a/external/eigen3/Eigen/MetisSupport +++ b/external/eigen3/Eigen/MetisSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_METISSUPPORT_MODULE_H #define EIGEN_METISSUPPORT_MODULE_H diff --git a/external/eigen3/Eigen/OrderingMethods b/external/eigen3/Eigen/OrderingMethods index 7c0f1ff..d8ea361 100644 --- a/external/eigen3/Eigen/OrderingMethods +++ b/external/eigen3/Eigen/OrderingMethods @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_ORDERINGMETHODS_MODULE_H #define EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/external/eigen3/Eigen/PaStiXSupport b/external/eigen3/Eigen/PaStiXSupport index 7c616ee..de3a63b 100644 --- a/external/eigen3/Eigen/PaStiXSupport +++ b/external/eigen3/Eigen/PaStiXSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PASTIXSUPPORT_MODULE_H #define EIGEN_PASTIXSUPPORT_MODULE_H @@ -5,7 +12,6 @@ #include "src/Core/util/DisableStupidWarnings.h" -#include extern "C" { #include #include @@ -35,12 +41,8 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/PaStiXSupport/PaStiXSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/external/eigen3/Eigen/PardisoSupport b/external/eigen3/Eigen/PardisoSupport old mode 100644 new mode 100755 index 99330ce..340edf5 --- a/external/eigen3/Eigen/PardisoSupport +++ b/external/eigen3/Eigen/PardisoSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_PARDISOSUPPORT_MODULE_H #define EIGEN_PARDISOSUPPORT_MODULE_H @@ -7,8 +14,6 @@ #include -#include - /** \ingroup Support_modules * \defgroup PardisoSupport_Module PardisoSupport module * diff --git a/external/eigen3/Eigen/QR b/external/eigen3/Eigen/QR index ac5b026..80838e3 100644 --- a/external/eigen3/Eigen/QR +++ b/external/eigen3/Eigen/QR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_QR_MODULE_H #define EIGEN_QR_MODULE_H @@ -15,31 +22,26 @@ * * This module provides various QR decompositions * This module also provides some MatrixBase methods, including: - * - MatrixBase::qr(), + * - MatrixBase::householderQr() + * - MatrixBase::colPivHouseholderQr() + * - MatrixBase::fullPivHouseholderQr() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" #include "src/QR/HouseholderQR.h" #include "src/QR/FullPivHouseholderQR.h" #include "src/QR/ColPivHouseholderQR.h" +#include "src/QR/CompleteOrthogonalDecomposition.h" #ifdef EIGEN_USE_LAPACKE -#include "src/QR/HouseholderQR_MKL.h" -#include "src/QR/ColPivHouseholderQR_MKL.h" -#endif - -#ifdef EIGEN2_SUPPORT -#include "src/Eigen2Support/QR.h" +#include "src/misc/lapacke.h" +#include "src/QR/HouseholderQR_LAPACKE.h" +#include "src/QR/ColPivHouseholderQR_LAPACKE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" -#ifdef EIGEN2_SUPPORT -#include "Eigenvalues" -#endif - #endif // EIGEN_QR_MODULE_H /* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/external/eigen3/Eigen/QtAlignedMalloc b/external/eigen3/Eigen/QtAlignedMalloc index 46f7d83..c6571f1 100644 --- a/external/eigen3/Eigen/QtAlignedMalloc +++ b/external/eigen3/Eigen/QtAlignedMalloc @@ -1,3 +1,9 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_QTMALLOC_MODULE_H #define EIGEN_QTMALLOC_MODULE_H @@ -8,7 +14,7 @@ #include "src/Core/util/DisableStupidWarnings.h" -void *qMalloc(size_t size) +void *qMalloc(std::size_t size) { return Eigen::internal::aligned_malloc(size); } @@ -18,7 +24,7 @@ void qFree(void *ptr) Eigen::internal::aligned_free(ptr); } -void *qRealloc(void *ptr, size_t size) +void *qRealloc(void *ptr, std::size_t size) { void* newPtr = Eigen::internal::aligned_malloc(size); memcpy(newPtr, ptr, size); diff --git a/external/eigen3/Eigen/SPQRSupport b/external/eigen3/Eigen/SPQRSupport index 7f1eb47..f70390c 100644 --- a/external/eigen3/Eigen/SPQRSupport +++ b/external/eigen3/Eigen/SPQRSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPQRSUPPORT_MODULE_H #define EIGEN_SPQRSUPPORT_MODULE_H @@ -21,8 +28,6 @@ * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" #include "src/CholmodSupport/CholmodSupport.h" #include "src/SPQRSupport/SuiteSparseQRSupport.h" diff --git a/external/eigen3/Eigen/SVD b/external/eigen3/Eigen/SVD index fd31001..86143c2 100644 --- a/external/eigen3/Eigen/SVD +++ b/external/eigen3/Eigen/SVD @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SVD_MODULE_H #define EIGEN_SVD_MODULE_H @@ -12,23 +19,26 @@ * * * This module provides SVD decomposition for matrices (both real and complex). - * This decomposition is accessible via the following MatrixBase method: + * Two decomposition algorithms are provided: + * - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very slow for larger ones. + * - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast for large problems. + * These decompositions are accessible via the respective classes and following MatrixBase methods: * - MatrixBase::jacobiSvd() + * - MatrixBase::bdcSvd() * * \code * #include * \endcode */ -#include "src/misc/Solve.h" +#include "src/misc/RealSvd2x2.h" +#include "src/SVD/UpperBidiagonalization.h" +#include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" +#include "src/SVD/BDCSVD.h" #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) -#include "src/SVD/JacobiSVD_MKL.h" -#endif -#include "src/SVD/UpperBidiagonalization.h" - -#ifdef EIGEN2_SUPPORT -#include "src/Eigen2Support/SVD.h" +#include "src/misc/lapacke.h" +#include "src/SVD/JacobiSVD_LAPACKE.h" #endif #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/external/eigen3/Eigen/Sparse b/external/eigen3/Eigen/Sparse index 7cc9c09..136e681 100644 --- a/external/eigen3/Eigen/Sparse +++ b/external/eigen3/Eigen/Sparse @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSE_MODULE_H #define EIGEN_SPARSE_MODULE_H @@ -11,14 +18,16 @@ * - \ref SparseQR_Module * - \ref IterativeLinearSolvers_Module * - * \code - * #include - * \endcode + \code + #include + \endcode */ #include "SparseCore" #include "OrderingMethods" +#ifndef EIGEN_MPL2_ONLY #include "SparseCholesky" +#endif #include "SparseLU" #include "SparseQR" #include "IterativeLinearSolvers" diff --git a/external/eigen3/Eigen/SparseCholesky b/external/eigen3/Eigen/SparseCholesky index 9f5056a..b6a320c 100644 --- a/external/eigen3/Eigen/SparseCholesky +++ b/external/eigen3/Eigen/SparseCholesky @@ -34,8 +34,6 @@ #error The SparseCholesky module has nothing to offer in MPL2 only mode #endif -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" #include "src/SparseCholesky/SimplicialCholesky.h" #ifndef EIGEN_MPL2_ONLY diff --git a/external/eigen3/Eigen/SparseCore b/external/eigen3/Eigen/SparseCore index 24bcf01..76966c4 100644 --- a/external/eigen3/Eigen/SparseCore +++ b/external/eigen3/Eigen/SparseCore @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSECORE_MODULE_H #define EIGEN_SPARSECORE_MODULE_H @@ -26,37 +33,35 @@ * This module depends on: Core. */ -namespace Eigen { - -/** The type used to identify a general sparse storage. */ -struct Sparse {}; - -} - #include "src/SparseCore/SparseUtil.h" #include "src/SparseCore/SparseMatrixBase.h" +#include "src/SparseCore/SparseAssign.h" #include "src/SparseCore/CompressedStorage.h" #include "src/SparseCore/AmbiVector.h" +#include "src/SparseCore/SparseCompressedBase.h" #include "src/SparseCore/SparseMatrix.h" +#include "src/SparseCore/SparseMap.h" #include "src/SparseCore/MappedSparseMatrix.h" #include "src/SparseCore/SparseVector.h" -#include "src/SparseCore/SparseBlock.h" -#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseRef.h" #include "src/SparseCore/SparseCwiseUnaryOp.h" #include "src/SparseCore/SparseCwiseBinaryOp.h" +#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseBlock.h" #include "src/SparseCore/SparseDot.h" -#include "src/SparseCore/SparsePermutation.h" #include "src/SparseCore/SparseRedux.h" -#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparseDiagonalProduct.h" #include "src/SparseCore/ConservativeSparseSparseProduct.h" #include "src/SparseCore/SparseSparseProductWithPruning.h" #include "src/SparseCore/SparseProduct.h" #include "src/SparseCore/SparseDenseProduct.h" -#include "src/SparseCore/SparseDiagonalProduct.h" -#include "src/SparseCore/SparseTriangularView.h" #include "src/SparseCore/SparseSelfAdjointView.h" +#include "src/SparseCore/SparseTriangularView.h" #include "src/SparseCore/TriangularSolver.h" -#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparsePermutation.h" +#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseSolverBase.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/external/eigen3/Eigen/SparseLU b/external/eigen3/Eigen/SparseLU index 8527a49..38b38b5 100644 --- a/external/eigen3/Eigen/SparseLU +++ b/external/eigen3/Eigen/SparseLU @@ -20,9 +20,6 @@ * Please, see the documentation of the SparseLU class for more details. */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - // Ordering interface #include "OrderingMethods" diff --git a/external/eigen3/Eigen/SparseQR b/external/eigen3/Eigen/SparseQR index 4ee4206..a6f3b7f 100644 --- a/external/eigen3/Eigen/SparseQR +++ b/external/eigen3/Eigen/SparseQR @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSEQR_MODULE_H #define EIGEN_SPARSEQR_MODULE_H @@ -21,9 +28,6 @@ * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "OrderingMethods" #include "src/SparseCore/SparseColEtree.h" #include "src/SparseQR/SparseQR.h" diff --git a/external/eigen3/Eigen/StdDeque b/external/eigen3/Eigen/StdDeque index f272347..bc68397 100644 --- a/external/eigen3/Eigen/StdDeque +++ b/external/eigen3/Eigen/StdDeque @@ -14,7 +14,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ #define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) diff --git a/external/eigen3/Eigen/StdList b/external/eigen3/Eigen/StdList index 225c1e1..4c6262c 100644 --- a/external/eigen3/Eigen/StdList +++ b/external/eigen3/Eigen/StdList @@ -13,7 +13,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ #define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) diff --git a/external/eigen3/Eigen/StdVector b/external/eigen3/Eigen/StdVector index 6b22627..0c4697a 100644 --- a/external/eigen3/Eigen/StdVector +++ b/external/eigen3/Eigen/StdVector @@ -14,7 +14,7 @@ #include "Core" #include -#if (defined(_MSC_VER) && defined(_WIN64)) /* MSVC auto aligns in 64 bit builds */ +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && (EIGEN_MAX_STATIC_ALIGN_BYTES<=16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ #define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) diff --git a/external/eigen3/Eigen/SuperLUSupport b/external/eigen3/Eigen/SuperLUSupport index 575e14f..59312a8 100644 --- a/external/eigen3/Eigen/SuperLUSupport +++ b/external/eigen3/Eigen/SuperLUSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SUPERLUSUPPORT_MODULE_H #define EIGEN_SUPERLUSUPPORT_MODULE_H @@ -36,6 +43,8 @@ namespace Eigen { struct SluMatrix; } * - class SuperLU: a supernodal sequential LU factorization. * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative methods). * + * \warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported. + * * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined because it is too polluting. * * \code @@ -48,12 +57,8 @@ namespace Eigen { struct SluMatrix; } * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/SuperLUSupport/SuperLUSupport.h" - #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_SUPERLUSUPPORT_MODULE_H diff --git a/external/eigen3/Eigen/UmfPackSupport b/external/eigen3/Eigen/UmfPackSupport index 7b1b660..00eec80 100644 --- a/external/eigen3/Eigen/UmfPackSupport +++ b/external/eigen3/Eigen/UmfPackSupport @@ -1,3 +1,10 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_UMFPACKSUPPORT_MODULE_H #define EIGEN_UMFPACKSUPPORT_MODULE_H @@ -26,9 +33,6 @@ extern "C" { * */ -#include "src/misc/Solve.h" -#include "src/misc/SparseSolve.h" - #include "src/UmfPackSupport/UmfPackSupport.h" #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/external/eigen3/Eigen/src/CMakeLists.txt b/external/eigen3/Eigen/src/CMakeLists.txt deleted file mode 100644 index c326f37..0000000 --- a/external/eigen3/Eigen/src/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -file(GLOB Eigen_src_subdirectories "*") -escape_string_as_regex(ESCAPED_CMAKE_CURRENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") -foreach(f ${Eigen_src_subdirectories}) - if(NOT f MATCHES "\\.txt" AND NOT f MATCHES "${ESCAPED_CMAKE_CURRENT_SOURCE_DIR}/[.].+" ) - add_subdirectory(${f}) - endif() -endforeach() diff --git a/external/eigen3/Eigen/src/Cholesky/CMakeLists.txt b/external/eigen3/Eigen/src/Cholesky/CMakeLists.txt deleted file mode 100644 index d01488b..0000000 --- a/external/eigen3/Eigen/src/Cholesky/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Cholesky_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Cholesky_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Cholesky COMPONENT Devel - ) diff --git a/external/eigen3/Eigen/src/Cholesky/LDLT.h b/external/eigen3/Eigen/src/Cholesky/LDLT.h index abd30bd..fcee7b2 100644 --- a/external/eigen3/Eigen/src/Cholesky/LDLT.h +++ b/external/eigen3/Eigen/src/Cholesky/LDLT.h @@ -13,7 +13,7 @@ #ifndef EIGEN_LDLT_H #define EIGEN_LDLT_H -namespace Eigen { +namespace Eigen { namespace internal { template struct LDLT_Traits; @@ -28,8 +28,8 @@ namespace internal { * * \brief Robust Cholesky decomposition of a matrix with pivoting * - * \param MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition - * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * \tparam _MatrixType the type of the matrix of which to compute the LDL^T Cholesky decomposition + * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. * The other triangular part won't be read. * * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite @@ -43,7 +43,9 @@ namespace internal { * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky * decomposition to determine whether a system of equations has a solution. * - * \sa MatrixBase::ldlt(), class LLT + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT */ template class LDLT { @@ -52,15 +54,15 @@ template class LDLT enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options & ~RowMajorBit, // these are the options for the TmpMatrixType, we need a ColMajor matrix here! MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, UpLo = _UpLo }; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; - typedef typename MatrixType::Index Index; - typedef Matrix TmpMatrixType; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; + typedef Matrix TmpMatrixType; typedef Transpositions TranspositionType; typedef PermutationMatrix PermutationType; @@ -72,11 +74,11 @@ template class LDLT * The default constructor is useful in cases in which the user intends to * perform decompositions via LDLT::compute(const MatrixType&). */ - LDLT() - : m_matrix(), - m_transpositions(), + LDLT() + : m_matrix(), + m_transpositions(), m_sign(internal::ZeroSign), - m_isInitialized(false) + m_isInitialized(false) {} /** \brief Default Constructor with memory preallocation @@ -85,7 +87,7 @@ template class LDLT * according to the specified problem \a size. * \sa LDLT() */ - LDLT(Index size) + explicit LDLT(Index size) : m_matrix(size, size), m_transpositions(size), m_temporary(size), @@ -96,16 +98,35 @@ template class LDLT /** \brief Constructor with decomposition * * This calculates the decomposition for the input \a matrix. + * * \sa LDLT(Index size) */ - LDLT(const MatrixType& matrix) + template + explicit LDLT(const EigenBase& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_transpositions(matrix.rows()), m_temporary(matrix.rows()), m_sign(internal::ZeroSign), m_isInitialized(false) { - compute(matrix); + compute(matrix.derived()); + } + + /** \brief Constructs a LDLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa LDLT(const EigenBase&) + */ + template + explicit LDLT(EigenBase& matrix) + : m_matrix(matrix.derived()), + m_transpositions(matrix.rows()), + m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), + m_isInitialized(false) + { + compute(matrix.derived()); } /** Clear any existing decomposition @@ -151,13 +172,6 @@ template class LDLT eigen_assert(m_isInitialized && "LDLT is not initialized."); return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign; } - - #ifdef EIGEN2_SUPPORT - inline bool isPositiveDefinite() const - { - return isPositive(); - } - #endif /** \returns true if the matrix is negative (semidefinite) */ inline bool isNegative(void) const @@ -173,37 +187,38 @@ template class LDLT * \note_about_checking_solutions * * More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$ - * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$, + * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$, * \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then * \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function * computes the least-square solution of \f$ A x = b \f$ is \f$ A \f$ is singular. * - * \sa MatrixBase::ldlt() + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt() */ template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "LDLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LDLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); } - #ifdef EIGEN2_SUPPORT - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = this->solve(b); - return true; - } - #endif - template bool solveInPlace(MatrixBase &bAndX) const; - LDLT& compute(const MatrixType& matrix); + template + LDLT& compute(const EigenBase& matrix); + + /** \returns an estimate of the reciprocal condition number of the matrix of + * which \c *this is the LDLT decomposition. + */ + RealScalar rcond() const + { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } template LDLT& rankUpdate(const MatrixBase& w, const RealScalar& alpha=1); @@ -220,6 +235,13 @@ template class LDLT MatrixType reconstructedMatrix() const; + /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint. + * + * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as: + * \code x = decomposition.adjoint().solve(b) \endcode + */ + const LDLT& adjoint() const { return *this; }; + inline Index rows() const { return m_matrix.rows(); } inline Index cols() const { return m_matrix.cols(); } @@ -231,11 +253,17 @@ template class LDLT ComputationInfo info() const { eigen_assert(m_isInitialized && "LDLT is not initialized."); - return Success; + return m_info; } + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif + protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); @@ -248,10 +276,12 @@ template class LDLT * is not stored), and the diagonal entries correspond to D. */ MatrixType m_matrix; + RealScalar m_l1_norm; TranspositionType m_transpositions; TmpMatrixType m_temporary; internal::SignMatrix m_sign; bool m_isInitialized; + ComputationInfo m_info; }; namespace internal { @@ -266,15 +296,17 @@ template<> struct ldlt_inplace using std::abs; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; + typedef typename TranspositionType::StorageIndex IndexType; eigen_assert(mat.rows()==mat.cols()); const Index size = mat.rows(); + bool found_zero_pivot = false; + bool ret = true; if (size <= 1) { transpositions.setIdentity(); - if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef; - else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef; + if (numext::real(mat.coeff(0,0)) > static_cast(0) ) sign = PositiveSemiDef; + else if (numext::real(mat.coeff(0,0)) < static_cast(0)) sign = NegativeSemiDef; else sign = ZeroSign; return true; } @@ -286,7 +318,7 @@ template<> struct ldlt_inplace mat.diagonal().tail(size-k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); index_of_biggest_in_corner += k; - transpositions.coeffRef(k) = index_of_biggest_in_corner; + transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner); if(k != index_of_biggest_in_corner) { // apply the transposition while taking care to consider only @@ -295,7 +327,7 @@ template<> struct ldlt_inplace mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k)); mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s)); std::swap(mat.coeffRef(k,k),mat.coeffRef(index_of_biggest_in_corner,index_of_biggest_in_corner)); - for(int i=k+1;i struct ldlt_inplace if(rs>0) A21.noalias() -= A20 * temp.head(k); } - + // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot - // was smaller than the cutoff value. However, soince LDLT is not rank-revealing - // we should only make sure we do not introduce INF or NaN values. - // LAPACK also uses 0 as the cutoff value. + // was smaller than the cutoff value. However, since LDLT is not rank-revealing + // we should only make sure that we do not introduce INF or NaN values. + // Remark that LAPACK also uses 0 as the cutoff value. RealScalar realAkk = numext::real(mat.coeffRef(k,k)); - if((rs>0) && (abs(realAkk) > RealScalar(0))) + bool pivot_is_valid = (abs(realAkk) > RealScalar(0)); + + if(k==0 && !pivot_is_valid) + { + // The entire diagonal is zero, there is nothing more to do + // except filling the transpositions, and checking whether the matrix is zero. + sign = ZeroSign; + for(Index j = 0; j0) && pivot_is_valid) A21 /= realAkk; + if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed + else if(!pivot_is_valid) found_zero_pivot = true; + if (sign == PositiveSemiDef) { - if (realAkk < 0) sign = Indefinite; + if (realAkk < static_cast(0)) sign = Indefinite; } else if (sign == NegativeSemiDef) { - if (realAkk > 0) sign = Indefinite; + if (realAkk > static_cast(0)) sign = Indefinite; } else if (sign == ZeroSign) { - if (realAkk > 0) sign = PositiveSemiDef; - else if (realAkk < 0) sign = NegativeSemiDef; + if (realAkk > static_cast(0)) sign = PositiveSemiDef; + else if (realAkk < static_cast(0)) sign = NegativeSemiDef; } } - return true; + return ret; } // Reference for the algorithm: Davis and Hager, "Multiple Rank @@ -356,7 +406,6 @@ template<> struct ldlt_inplace using numext::isfinite; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; const Index size = mat.rows(); eigen_assert(mat.cols() == size && w.size()==size); @@ -420,16 +469,16 @@ template struct LDLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } }; template struct LDLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } }; } // end namespace internal @@ -437,21 +486,35 @@ template struct LDLT_Traits /** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix */ template -LDLT& LDLT::compute(const MatrixType& a) +template +LDLT& LDLT::compute(const EigenBase& a) { check_template_parameters(); - + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); - m_matrix = a; + m_matrix = a.derived(); + + // Compute matrix L1 norm = max abs column sum. + m_l1_norm = RealScalar(0); + // TODO move this code to SelfAdjointView + for (Index col = 0; col < size; ++col) { + RealScalar abs_col_sum; + if (_UpLo == Lower) + abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>(); + else + abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); + if (abs_col_sum > m_l1_norm) + m_l1_norm = abs_col_sum; + } m_transpositions.resize(size); m_isInitialized = false; m_temporary.resize(size); m_sign = internal::ZeroSign; - internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign); + m_info = internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success : NumericalIssue; m_isInitialized = true; return *this; @@ -466,18 +529,19 @@ template template LDLT& LDLT::rankUpdate(const MatrixBase& w, const typename LDLT::RealScalar& sigma) { + typedef typename TranspositionType::StorageIndex IndexType; const Index size = w.rows(); if (m_isInitialized) { eigen_assert(m_matrix.rows()==size); } else - { + { m_matrix.resize(size,size); m_matrix.setZero(); m_transpositions.resize(size); for (Index i = 0; i < size; i++) - m_transpositions.coeffRef(i) = i; + m_transpositions.coeffRef(i) = IndexType(i); m_temporary.resize(size); m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef; m_isInitialized = true; @@ -488,53 +552,45 @@ LDLT& LDLT::rankUpdate(const MatrixBase -struct solve_retval, Rhs> - : solve_retval_base, Rhs> +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const { - typedef LDLT<_MatrixType,_UpLo> LDLTType; - EIGEN_MAKE_SOLVE_HELPERS(LDLTType,Rhs) - - template void evalTo(Dest& dst) const + eigen_assert(rhs.rows() == rows()); + // dst = P b + dst = m_transpositions * rhs; + + // dst = L^-1 (P b) + matrixL().solveInPlace(dst); + + // dst = D^-1 (L^-1 P b) + // more precisely, use pseudo-inverse of D (see bug 241) + using std::abs; + const typename Diagonal::RealReturnType vecD(vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon + // as motivated by LAPACK's xGELSS: + // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); + // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest + // diagonal element is not well justified and leads to numerical issues in some cases. + // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. + RealScalar tolerance = RealScalar(1) / NumTraits::highest(); + + for (Index i = 0; i < vecD.size(); ++i) { - eigen_assert(rhs().rows() == dec().matrixLDLT().rows()); - // dst = P b - dst = dec().transpositionsP() * rhs(); - - // dst = L^-1 (P b) - dec().matrixL().solveInPlace(dst); - - // dst = D^-1 (L^-1 P b) - // more precisely, use pseudo-inverse of D (see bug 241) - using std::abs; - using std::max; - typedef typename LDLTType::MatrixType MatrixType; - typedef typename LDLTType::RealScalar RealScalar; - const typename Diagonal::RealReturnType vectorD(dec().vectorD()); - // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon - // as motivated by LAPACK's xGELSS: - // RealScalar tolerance = (max)(vectorD.array().abs().maxCoeff() *NumTraits::epsilon(),RealScalar(1) / NumTraits::highest()); - // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest - // diagonal element is not well justified and to numerical issues in some cases. - // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. - RealScalar tolerance = RealScalar(1) / NumTraits::highest(); - - for (Index i = 0; i < vectorD.size(); ++i) { - if(abs(vectorD(i)) > tolerance) - dst.row(i) /= vectorD(i); - else - dst.row(i).setZero(); - } + if(abs(vecD(i)) > tolerance) + dst.row(i) /= vecD(i); + else + dst.row(i).setZero(); + } - // dst = L^-T (D^-1 L^-1 P b) - dec().matrixU().solveInPlace(dst); + // dst = L^-T (D^-1 L^-1 P b) + matrixU().solveInPlace(dst); - // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b - dst = dec().transpositionsP().transpose() * dst; - } -}; + // dst = P^-1 (L^-T D^-1 L^-1 P b) = A^-1 b + dst = m_transpositions.transpose() * dst; } +#endif /** \internal use x = ldlt_object.solve(x); * @@ -588,6 +644,7 @@ MatrixType LDLT::reconstructedMatrix() const /** \cholesky_module * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa MatrixBase::ldlt() */ template inline const LDLT::PlainObject, UpLo> @@ -598,6 +655,7 @@ SelfAdjointView::ldlt() const /** \cholesky_module * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa SelfAdjointView::ldlt() */ template inline const LDLT::PlainObject> diff --git a/external/eigen3/Eigen/src/Cholesky/LLT.h b/external/eigen3/Eigen/src/Cholesky/LLT.h index 7c11a2d..87ca8d4 100644 --- a/external/eigen3/Eigen/src/Cholesky/LLT.h +++ b/external/eigen3/Eigen/src/Cholesky/LLT.h @@ -10,7 +10,7 @@ #ifndef EIGEN_LLT_H #define EIGEN_LLT_H -namespace Eigen { +namespace Eigen { namespace internal{ template struct LLT_Traits; @@ -22,8 +22,8 @@ template struct LLT_Traits; * * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features * - * \param MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition - * \param UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. + * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition + * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. * The other triangular part won't be read. * * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite @@ -40,8 +40,10 @@ template struct LLT_Traits; * * Example: \include LLT_example.cpp * Output: \verbinclude LLT_example.out - * - * \sa MatrixBase::llt(), class LDLT + * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT */ /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) * Note that during the decomposition, only the upper triangular part of A is considered. Therefore, @@ -54,12 +56,12 @@ template class LLT enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; - typedef typename MatrixType::Index Index; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + typedef typename MatrixType::StorageIndex StorageIndex; enum { PacketSize = internal::packet_traits::size, @@ -83,14 +85,30 @@ template class LLT * according to the specified problem \a size. * \sa LLT() */ - LLT(Index size) : m_matrix(size, size), + explicit LLT(Index size) : m_matrix(size, size), m_isInitialized(false) {} - LLT(const MatrixType& matrix) + template + explicit LLT(const EigenBase& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_isInitialized(false) { - compute(matrix); + compute(matrix.derived()); + } + + /** \brief Constructs a LDLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when + * \c MatrixType is a Eigen::Ref. + * + * \sa LLT(const EigenBase&) + */ + template + explicit LLT(EigenBase& matrix) + : m_matrix(matrix.derived()), + m_isInitialized(false) + { + compute(matrix.derived()); } /** \returns a view of the upper triangular matrix U */ @@ -115,33 +133,33 @@ template class LLT * Example: \include LLT_solve.cpp * Output: \verbinclude LLT_solve.out * - * \sa solveInPlace(), MatrixBase::llt() + * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt() */ template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_matrix.rows()==b.rows() && "LLT::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); } - #ifdef EIGEN2_SUPPORT - template - bool solve(const MatrixBase& b, ResultType *result) const - { - *result = this->solve(b); - return true; - } - - bool isPositiveDefinite() const { return true; } - #endif - template void solveInPlace(MatrixBase &bAndX) const; - LLT& compute(const MatrixType& matrix); + template + LLT& compute(const EigenBase& matrix); + + /** \returns an estimate of the reciprocal condition number of the matrix of + * which \c *this is the Cholesky decomposition. + */ + RealScalar rcond() const + { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_info == Success && "LLT failed because matrix appears to be negative"); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } /** \returns the LLT decomposition matrix * @@ -167,24 +185,38 @@ template class LLT return m_info; } + /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix is self-adjoint. + * + * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as: + * \code x = decomposition.adjoint().solve(b) \endcode + */ + const LLT& adjoint() const { return *this; }; + inline Index rows() const { return m_matrix.rows(); } inline Index cols() const { return m_matrix.cols(); } template LLT rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const; + #endif + protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + /** \internal * Used to compute and store L * The strict upper part is not used and even not initialized. */ MatrixType m_matrix; + RealScalar m_l1_norm; bool m_isInitialized; ComputationInfo m_info; }; @@ -194,12 +226,11 @@ namespace internal { template struct llt_inplace; template -static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) +static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) { using std::sqrt; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; typedef typename MatrixType::ColXpr ColXpr; typedef typename internal::remove_all::type ColXprCleaned; typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; @@ -268,11 +299,10 @@ template struct llt_inplace { typedef typename NumTraits::Real RealScalar; template - static typename MatrixType::Index unblocked(MatrixType& mat) + static Index unblocked(MatrixType& mat) { using std::sqrt; - typedef typename MatrixType::Index Index; - + eigen_assert(mat.rows()==mat.cols()); const Index size = mat.rows(); for(Index k = 0; k < size; ++k) @@ -295,9 +325,8 @@ template struct llt_inplace } template - static typename MatrixType::Index blocked(MatrixType& m) + static Index blocked(MatrixType& m) { - typedef typename MatrixType::Index Index; eigen_assert(m.rows()==m.cols()); Index size = m.rows(); if(size<32) @@ -322,36 +351,36 @@ template struct llt_inplace Index ret; if((ret=unblocked(A11))>=0) return k+ret; if(rs>0) A11.adjoint().template triangularView().template solveInPlace(A21); - if(rs>0) A22.template selfadjointView().rankUpdate(A21,-1); // bottleneck + if(rs>0) A22.template selfadjointView().rankUpdate(A21,typename NumTraits::Literal(-1)); // bottleneck } return -1; } template - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } }; - + template struct llt_inplace { typedef typename NumTraits::Real RealScalar; template - static EIGEN_STRONG_INLINE typename MatrixType::Index unblocked(MatrixType& mat) + static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat) { Transpose matt(mat); return llt_inplace::unblocked(matt); } template - static EIGEN_STRONG_INLINE typename MatrixType::Index blocked(MatrixType& mat) + static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat) { Transpose matt(mat); return llt_inplace::blocked(matt); } template - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) { Transpose matt(mat); return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); @@ -362,8 +391,8 @@ template struct LLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m; } - static inline MatrixU getU(const MatrixType& m) { return m.adjoint(); } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace::blocked(m)==-1; } }; @@ -372,8 +401,8 @@ template struct LLT_Traits { typedef const TriangularView MatrixL; typedef const TriangularView MatrixU; - static inline MatrixL getL(const MatrixType& m) { return m.adjoint(); } - static inline MatrixU getU(const MatrixType& m) { return m; } + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } static bool inplace_decomposition(MatrixType& m) { return llt_inplace::blocked(m)==-1; } }; @@ -388,14 +417,28 @@ template struct LLT_Traits * Output: \verbinclude TutorialLinAlgComputeTwice.out */ template -LLT& LLT::compute(const MatrixType& a) +template +LLT& LLT::compute(const EigenBase& a) { check_template_parameters(); - + eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); m_matrix.resize(size, size); - m_matrix = a; + m_matrix = a.derived(); + + // Compute matrix L1 norm = max abs column sum. + m_l1_norm = RealScalar(0); + // TODO move this code to SelfAdjointView + for (Index col = 0; col < size; ++col) { + RealScalar abs_col_sum; + if (_UpLo == Lower) + abs_col_sum = m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>(); + else + abs_col_sum = m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); + if (abs_col_sum > m_l1_norm) + m_l1_norm = abs_col_sum; + } m_isInitialized = true; bool ok = Traits::inplace_decomposition(m_matrix); @@ -423,33 +466,24 @@ LLT<_MatrixType,_UpLo> LLT<_MatrixType,_UpLo>::rankUpdate(const VectorType& v, c return *this; } - -namespace internal { -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef LLT<_MatrixType,UpLo> LLTType; - EIGEN_MAKE_SOLVE_HELPERS(LLTType,Rhs) - template void evalTo(Dest& dst) const - { - dst = rhs(); - dec().solveInPlace(dst); - } -}; +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const +{ + dst = rhs; + solveInPlace(dst); } +#endif /** \internal use x = llt_object.solve(x); - * + * * This is the \em in-place version of solve(). * * \param bAndX represents both the right-hand side matrix b and result x. * - * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD. - * - * This version avoids a copy when the right hand side matrix b is not - * needed anymore. + * This version avoids a copy when the right hand side matrix b is not needed anymore. * * \sa LLT::solve(), MatrixBase::llt() */ @@ -475,6 +509,7 @@ MatrixType LLT::reconstructedMatrix() const /** \cholesky_module * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() */ template inline const LLT::PlainObject> @@ -485,6 +520,7 @@ MatrixBase::llt() const /** \cholesky_module * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() */ template inline const LLT::PlainObject, UpLo> diff --git a/external/eigen3/Eigen/src/Cholesky/LLT_MKL.h b/external/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h similarity index 71% rename from external/eigen3/Eigen/src/Cholesky/LLT_MKL.h rename to external/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h index 66675d7..bc6489e 100644 --- a/external/eigen3/Eigen/src/Cholesky/LLT_MKL.h +++ b/external/eigen3/Eigen/src/Cholesky/LLT_LAPACKE.h @@ -25,41 +25,38 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * LLt decomposition based on LAPACKE_?potrf function. ******************************************************************************** */ -#ifndef EIGEN_LLT_MKL_H -#define EIGEN_LLT_MKL_H - -#include "Eigen/src/Core/util/MKL_support.h" -#include +#ifndef EIGEN_LLT_LAPACKE_H +#define EIGEN_LLT_LAPACKE_H namespace Eigen { namespace internal { -template struct mkl_llt; +template struct lapacke_llt; -#define EIGEN_MKL_LLT(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template<> struct mkl_llt \ +#define EIGEN_LAPACKE_LLT(EIGTYPE, BLASTYPE, LAPACKE_PREFIX) \ +template<> struct lapacke_llt \ { \ template \ - static inline typename MatrixType::Index potrf(MatrixType& m, char uplo) \ + static inline Index potrf(MatrixType& m, char uplo) \ { \ lapack_int matrix_order; \ lapack_int size, lda, info, StorageOrder; \ EIGTYPE* a; \ eigen_assert(m.rows()==m.cols()); \ /* Set up parameters for ?potrf */ \ - size = m.rows(); \ + size = convert_index(m.rows()); \ StorageOrder = MatrixType::Flags&RowMajorBit?RowMajor:ColMajor; \ matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ a = &(m.coeffRef(0,0)); \ - lda = m.outerStride(); \ + lda = convert_index(m.outerStride()); \ \ - info = LAPACKE_##MKLPREFIX##potrf( matrix_order, uplo, size, (MKLTYPE*)a, lda ); \ + info = LAPACKE_##LAPACKE_PREFIX##potrf( matrix_order, uplo, size, (BLASTYPE*)a, lda ); \ info = (info==0) ? -1 : info>0 ? info-1 : size; \ return info; \ } \ @@ -67,36 +64,36 @@ template<> struct mkl_llt \ template<> struct llt_inplace \ { \ template \ - static typename MatrixType::Index blocked(MatrixType& m) \ + static Index blocked(MatrixType& m) \ { \ - return mkl_llt::potrf(m, 'L'); \ + return lapacke_llt::potrf(m, 'L'); \ } \ template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ { return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); } \ }; \ template<> struct llt_inplace \ { \ template \ - static typename MatrixType::Index blocked(MatrixType& m) \ + static Index blocked(MatrixType& m) \ { \ - return mkl_llt::potrf(m, 'U'); \ + return lapacke_llt::potrf(m, 'U'); \ } \ template \ - static typename MatrixType::Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const typename MatrixType::RealScalar& sigma) \ { \ Transpose matt(mat); \ return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); \ } \ }; -EIGEN_MKL_LLT(double, double, d) -EIGEN_MKL_LLT(float, float, s) -EIGEN_MKL_LLT(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LLT(scomplex, MKL_Complex8, c) +EIGEN_LAPACKE_LLT(double, double, d) +EIGEN_LAPACKE_LLT(float, float, s) +EIGEN_LAPACKE_LLT(dcomplex, lapack_complex_double, z) +EIGEN_LAPACKE_LLT(scomplex, lapack_complex_float, c) } // end namespace internal } // end namespace Eigen -#endif // EIGEN_LLT_MKL_H +#endif // EIGEN_LLT_LAPACKE_H diff --git a/external/eigen3/Eigen/src/CholmodSupport/CMakeLists.txt b/external/eigen3/Eigen/src/CholmodSupport/CMakeLists.txt deleted file mode 100644 index 814dfa6..0000000 --- a/external/eigen3/Eigen/src/CholmodSupport/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_CholmodSupport_SRCS "*.h") - -INSTALL(FILES - ${Eigen_CholmodSupport_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/CholmodSupport COMPONENT Devel - ) diff --git a/external/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h b/external/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h index 99dbe17..5719720 100644 --- a/external/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/external/eigen3/Eigen/src/CholmodSupport/CholmodSupport.h @@ -14,46 +14,52 @@ namespace Eigen { namespace internal { -template -void cholmod_configure_matrix(CholmodType& mat) -{ - if (internal::is_same::value) - { - mat.xtype = CHOLMOD_REAL; - mat.dtype = CHOLMOD_SINGLE; - } - else if (internal::is_same::value) - { +template struct cholmod_configure_matrix; + +template<> struct cholmod_configure_matrix { + template + static void run(CholmodType& mat) { mat.xtype = CHOLMOD_REAL; mat.dtype = CHOLMOD_DOUBLE; } - else if (internal::is_same >::value) - { - mat.xtype = CHOLMOD_COMPLEX; - mat.dtype = CHOLMOD_SINGLE; - } - else if (internal::is_same >::value) - { +}; + +template<> struct cholmod_configure_matrix > { + template + static void run(CholmodType& mat) { mat.xtype = CHOLMOD_COMPLEX; mat.dtype = CHOLMOD_DOUBLE; } - else - { - eigen_assert(false && "Scalar type not supported by CHOLMOD"); - } -} +}; + +// Other scalar types are not yet suppotred by Cholmod +// template<> struct cholmod_configure_matrix { +// template +// static void run(CholmodType& mat) { +// mat.xtype = CHOLMOD_REAL; +// mat.dtype = CHOLMOD_SINGLE; +// } +// }; +// +// template<> struct cholmod_configure_matrix > { +// template +// static void run(CholmodType& mat) { +// mat.xtype = CHOLMOD_COMPLEX; +// mat.dtype = CHOLMOD_SINGLE; +// } +// }; } // namespace internal /** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object. * Note that the data are shared. */ -template -cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) +template +cholmod_sparse viewAsCholmod(Ref > mat) { cholmod_sparse res; res.nzmax = mat.nonZeros(); - res.nrow = mat.rows();; + res.nrow = mat.rows(); res.ncol = mat.cols(); res.p = mat.outerIndexPtr(); res.i = mat.innerIndexPtr(); @@ -74,11 +80,11 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) res.dtype = 0; res.stype = -1; - if (internal::is_same<_Index,int>::value) + if (internal::is_same<_StorageIndex,int>::value) { res.itype = CHOLMOD_INT; } - else if (internal::is_same<_Index,SuiteSparse_long>::value) + else if (internal::is_same<_StorageIndex,long>::value) { res.itype = CHOLMOD_LONG; } @@ -88,7 +94,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) } // setup res.xtype - internal::cholmod_configure_matrix<_Scalar>(res); + internal::cholmod_configure_matrix<_Scalar>::run(res); res.stype = 0; @@ -98,16 +104,23 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat) template const cholmod_sparse viewAsCholmod(const SparseMatrix<_Scalar,_Options,_Index>& mat) { - cholmod_sparse res = viewAsCholmod(mat.const_cast_derived()); + cholmod_sparse res = viewAsCholmod(Ref >(mat.const_cast_derived())); + return res; +} + +template +const cholmod_sparse viewAsCholmod(const SparseVector<_Scalar,_Options,_Index>& mat) +{ + cholmod_sparse res = viewAsCholmod(Ref >(mat.const_cast_derived())); return res; } /** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. * The data are not copied but shared. */ template -cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) +cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) { - cholmod_sparse res = viewAsCholmod(mat.matrix().const_cast_derived()); + cholmod_sparse res = viewAsCholmod(Ref >(mat.matrix().const_cast_derived())); if(UpLo==Upper) res.stype = 1; if(UpLo==Lower) res.stype = -1; @@ -131,19 +144,19 @@ cholmod_dense viewAsCholmod(MatrixBase& mat) res.x = (void*)(mat.derived().data()); res.z = 0; - internal::cholmod_configure_matrix(res); + internal::cholmod_configure_matrix::run(res); return res; } /** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix. * The data are not copied but shared. */ -template -MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) +template +MappedSparseMatrix viewAsEigen(cholmod_sparse& cm) { - return MappedSparseMatrix - (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], - static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); + return MappedSparseMatrix + (cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], + static_cast(cm.p), static_cast(cm.i),static_cast(cm.x) ); } enum CholmodMode { @@ -157,29 +170,39 @@ enum CholmodMode { * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT */ template -class CholmodBase : internal::noncopyable +class CholmodBase : public SparseSolverBase { + protected: + typedef SparseSolverBase Base; + using Base::derived; + using Base::m_isInitialized; public: typedef _MatrixType MatrixType; enum { UpLo = _UpLo }; typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::RealScalar RealScalar; typedef MatrixType CholMatrixType; - typedef typename MatrixType::Index Index; + typedef typename MatrixType::StorageIndex StorageIndex; + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime + }; public: CholmodBase() - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) { - m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); + EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); + m_shiftOffset[0] = m_shiftOffset[1] = 0.0; cholmod_start(&m_cholmod); } - CholmodBase(const MatrixType& matrix) - : m_cholmodFactor(0), m_info(Success), m_isInitialized(false) + explicit CholmodBase(const MatrixType& matrix) + : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) { - m_shiftOffset[0] = m_shiftOffset[1] = RealScalar(0.0); + EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); + m_shiftOffset[0] = m_shiftOffset[1] = 0.0; cholmod_start(&m_cholmod); compute(matrix); } @@ -191,11 +214,8 @@ class CholmodBase : internal::noncopyable cholmod_finish(&m_cholmod); } - inline Index cols() const { return m_cholmodFactor->n; } - inline Index rows() const { return m_cholmodFactor->n; } - - Derived& derived() { return *static_cast(this); } - const Derived& derived() const { return *static_cast(this); } + inline StorageIndex cols() const { return internal::convert_index(m_cholmodFactor->n); } + inline StorageIndex rows() const { return internal::convert_index(m_cholmodFactor->n); } /** \brief Reports whether previous computation was successful. * @@ -216,34 +236,6 @@ class CholmodBase : internal::noncopyable return derived(); } - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::solve_retval - solve(const MatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); - } - - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. - * - * \sa compute() - */ - template - inline const internal::sparse_solve_retval - solve(const SparseMatrixBase& b) const - { - eigen_assert(m_isInitialized && "LLT is not initialized."); - eigen_assert(rows()==b.rows() - && "CholmodDecomposition::solve(): invalid number of rows of the right hand side matrix b"); - return internal::sparse_solve_retval(*this, b.derived()); - } - /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. * * This function is particularly useful when solving for several problems having the same structure. @@ -277,7 +269,7 @@ class CholmodBase : internal::noncopyable eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod); - + // If the factorization failed, minor is the column at which it did. On success minor == n. this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); m_factorizationIsOk = true; @@ -290,20 +282,22 @@ class CholmodBase : internal::noncopyable #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal */ template - void _solve(const MatrixBase &b, MatrixBase &dest) const + void _solve_impl(const MatrixBase &b, MatrixBase &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; EIGEN_UNUSED_VARIABLE(size); eigen_assert(size==b.rows()); + + // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref. + Ref > b_ref(b.derived()); - // note: cd stands for Cholmod Dense - Rhs& b_ref(b.const_cast_derived()); cholmod_dense b_cd = viewAsCholmod(b_ref); cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); if(!x_cd) { this->m_info = NumericalIssue; + return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) dest = Matrix::Map(reinterpret_cast(x_cd->x),b.rows(),b.cols()); @@ -311,8 +305,8 @@ class CholmodBase : internal::noncopyable } /** \internal */ - template - void _solve(const SparseMatrix &b, SparseMatrix &dest) const + template + void _solve_impl(const SparseMatrixBase &b, SparseMatrixBase &dest) const { eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); const Index size = m_cholmodFactor->n; @@ -320,14 +314,16 @@ class CholmodBase : internal::noncopyable eigen_assert(size==b.rows()); // note: cs stands for Cholmod Sparse - cholmod_sparse b_cs = viewAsCholmod(b); + Ref > b_ref(b.const_cast_derived()); + cholmod_sparse b_cs = viewAsCholmod(b_ref); cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); if(!x_cs) { this->m_info = NumericalIssue; + return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) - dest = viewAsEigen(*x_cs); + dest.derived() = viewAsEigen(*x_cs); cholmod_free_sparse(&x_cs, &m_cholmod); } #endif // EIGEN_PARSED_BY_DOXYGEN @@ -344,10 +340,61 @@ class CholmodBase : internal::noncopyable */ Derived& setShift(const RealScalar& offset) { - m_shiftOffset[0] = offset; + m_shiftOffset[0] = double(offset); return derived(); } + /** \returns the determinant of the underlying matrix from the current factorization */ + Scalar determinant() const + { + using std::exp; + return exp(logDeterminant()); + } + + /** \returns the log determinant of the underlying matrix from the current factorization */ + Scalar logDeterminant() const + { + using std::log; + using numext::real; + eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()"); + + RealScalar logDet = 0; + Scalar *x = static_cast(m_cholmodFactor->x); + if (m_cholmodFactor->is_super) + { + // Supernodal factorization stored as a packed list of dense column-major blocs, + // as described by the following structure: + + // super[k] == index of the first column of the j-th super node + StorageIndex *super = static_cast(m_cholmodFactor->super); + // pi[k] == offset to the description of row indices + StorageIndex *pi = static_cast(m_cholmodFactor->pi); + // px[k] == offset to the respective dense block + StorageIndex *px = static_cast(m_cholmodFactor->px); + + Index nb_super_nodes = m_cholmodFactor->nsuper; + for (Index k=0; k < nb_super_nodes; ++k) + { + StorageIndex ncols = super[k + 1] - super[k]; + StorageIndex nrows = pi[k + 1] - pi[k]; + + Map, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows+1)); + logDet += sk.real().log().sum(); + } + } + else + { + // Simplicial factorization stored as standard CSC matrix. + StorageIndex *p = static_cast(m_cholmodFactor->p); + Index size = m_cholmodFactor->n; + for (Index k=0; kis_ll) + logDet *= 2.0; + return logDet; + }; + template void dumpMemory(Stream& /*s*/) {} @@ -355,9 +402,8 @@ class CholmodBase : internal::noncopyable protected: mutable cholmod_common m_cholmod; cholmod_factor* m_cholmodFactor; - RealScalar m_shiftOffset[2]; + double m_shiftOffset[2]; mutable ComputationInfo m_info; - bool m_isInitialized; int m_factorizationIsOk; int m_analysisIsOk; }; @@ -376,9 +422,13 @@ class CholmodBase : internal::noncopyable * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * - * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLLT + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT */ template class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLLT<_MatrixType, _UpLo> > @@ -395,7 +445,7 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl CholmodSimplicialLLT(const MatrixType& matrix) : Base() { init(); - Base::compute(matrix); + this->compute(matrix); } ~CholmodSimplicialLLT() {} @@ -423,9 +473,13 @@ class CholmodSimplicialLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimpl * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * - * \sa \ref TutorialSparseDirectSolvers, class CholmodSupernodalLLT, class SimplicialLDLT + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT */ template class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimplicialLDLT<_MatrixType, _UpLo> > @@ -442,7 +496,7 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp CholmodSimplicialLDLT(const MatrixType& matrix) : Base() { init(); - Base::compute(matrix); + this->compute(matrix); } ~CholmodSimplicialLDLT() {} @@ -468,9 +522,13 @@ class CholmodSimplicialLDLT : public CholmodBase<_MatrixType, _UpLo, CholmodSimp * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * - * \sa \ref TutorialSparseDirectSolvers + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept */ template class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSupernodalLLT<_MatrixType, _UpLo> > @@ -487,7 +545,7 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper CholmodSupernodalLLT(const MatrixType& matrix) : Base() { init(); - Base::compute(matrix); + this->compute(matrix); } ~CholmodSupernodalLLT() {} @@ -515,9 +573,13 @@ class CholmodSupernodalLLT : public CholmodBase<_MatrixType, _UpLo, CholmodSuper * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower * or Upper. Default is Lower. * + * \implsparsesolverconcept + * * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non compressed. * - * \sa \ref TutorialSparseDirectSolvers + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept */ template class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecomposition<_MatrixType, _UpLo> > @@ -534,7 +596,7 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom CholmodDecomposition(const MatrixType& matrix) : Base() { init(); - Base::compute(matrix); + this->compute(matrix); } ~CholmodDecomposition() {} @@ -572,36 +634,6 @@ class CholmodDecomposition : public CholmodBase<_MatrixType, _UpLo, CholmodDecom } }; -namespace internal { - -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -template -struct sparse_solve_retval, Rhs> - : sparse_solve_retval_base, Rhs> -{ - typedef CholmodBase<_MatrixType,_UpLo,Derived> Dec; - EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const - { - dec()._solve(rhs(),dst); - } -}; - -} // end namespace internal - } // end namespace Eigen #endif // EIGEN_CHOLMODSUPPORT_H diff --git a/external/eigen3/Eigen/src/Core/Array.h b/external/eigen3/Eigen/src/Core/Array.h index 0b9c38c..e10020d 100644 --- a/external/eigen3/Eigen/src/Core/Array.h +++ b/external/eigen3/Eigen/src/Core/Array.h @@ -12,7 +12,16 @@ namespace Eigen { -/** \class Array +namespace internal { +template +struct traits > : traits > +{ + typedef ArrayXpr XprKind; + typedef ArrayBase > XprBase; +}; +} + +/** \class Array * \ingroup Core_Module * * \brief General-purpose arrays with easy API for coefficient-wise operations @@ -24,20 +33,14 @@ namespace Eigen { * API for the %Matrix class provides easy access to linear-algebra * operations. * + * See documentation of class Matrix for detailed information on the template parameters + * storage layout. + * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. * - * \sa \ref TutorialArrayClass, \ref TopicClassHierarchy + * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy */ -namespace internal { -template -struct traits > : traits > -{ - typedef ArrayXpr XprKind; - typedef ArrayBase > XprBase; -}; -} - template class Array : public PlainObjectBase > @@ -69,11 +72,27 @@ class Array * the usage of 'using'. This should be done only for operator=. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const EigenBase &other) { return Base::operator=(other); } + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() + */ + /* This overload is needed because the usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const Scalar &value) + { + Base::setConstant(value); + return *this; + } + /** Copies the value of the expression \a other into \c *this with automatic resizing. * * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), @@ -84,7 +103,8 @@ class Array * remain row-vectors and vectors remain vectors. */ template - EIGEN_STRONG_INLINE Array& operator=(const ArrayBase& other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array& operator=(const DenseBase& other) { return Base::_set(other); } @@ -92,11 +112,12 @@ class Array /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const Array& other) { return Base::_set(other); } - + /** Default constructor. * * For fixed-size matrices, does nothing. @@ -107,6 +128,7 @@ class Array * * \sa resize(Index,Index) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array() : Base() { Base::_check_template_params(); @@ -116,6 +138,7 @@ class Array #ifndef EIGEN_PARSED_BY_DOXYGEN // FIXME is it still needed ?? /** \internal */ + EIGEN_DEVICE_FUNC Array(internal::constructor_without_unaligned_array_assert) : Base(internal::constructor_without_unaligned_array_assert()) { @@ -124,56 +147,64 @@ class Array } #endif -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - Array(Array&& other) +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) : Base(std::move(other)) { Base::_check_template_params(); if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) Base::_set_noalias(other); } - Array& operator=(Array&& other) + EIGEN_DEVICE_FUNC + Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { other.swap(*this); return *this; } #endif - /** Constructs a vector or row-vector with given dimension. \only_for_vectors - * - * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, - * it is redundant to pass the dimension here, so it makes more sense to use the default - * constructor Matrix() instead. - */ - EIGEN_STRONG_INLINE explicit Array(Index dim) - : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim) + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(const T& x) { Base::_check_template_params(); - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array) - eigen_assert(dim >= 0); - eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + Base::template _init1(x); } - #ifndef EIGEN_PARSED_BY_DOXYGEN template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1) { Base::_check_template_params(); this->template _init2(val0, val1); } #else - /** constructs an uninitialized matrix with \a rows rows and \a cols columns. + /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC explicit Array(const Scalar *data); + /** Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass the dimension here, so it makes more sense to use the default + * constructor Array() instead. + */ + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Array(Index dim); + /** constructs an initialized 1x1 Array with the given coefficient */ + Array(const Scalar& value); + /** constructs an uninitialized array with \a rows rows and \a cols columns. * - * This is useful for dynamic-size matrices. For fixed-size matrices, + * This is useful for dynamic-size arrays. For fixed-size arrays, * it is redundant to pass these parameters, so one should use the default constructor - * Matrix() instead. */ + * Array() instead. */ Array(Index rows, Index cols); /** constructs an initialized 2D vector with given coefficients */ Array(const Scalar& val0, const Scalar& val1); #endif /** constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2) { Base::_check_template_params(); @@ -183,6 +214,7 @@ class Array m_storage.data()[2] = val2; } /** constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3) { Base::_check_template_params(); @@ -193,51 +225,27 @@ class Array m_storage.data()[3] = val3; } - explicit Array(const Scalar *data); - - /** Constructor copying the value of the expression \a other */ - template - EIGEN_STRONG_INLINE Array(const ArrayBase& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } /** Copy constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Array& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } - /** Copy constructor with in-place evaluation */ - template - EIGEN_STRONG_INLINE Array(const ReturnByValue& other) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - other.evalTo(*this); - } + : Base(other) + { } - /** \sa MatrixBase::operator=(const EigenBase&) */ - template - EIGEN_STRONG_INLINE Array(const EigenBase &other) - : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) - { - Base::_check_template_params(); - Base::_resize_to_match(other); - *this = other; - } + private: + struct PrivateType {}; + public: - /** Override MatrixBase::swap() since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. - */ + /** \sa MatrixBase::operator=(const EigenBase&) */ template - void swap(ArrayBase const & other) - { this->_swap(other.derived()); } - - inline Index innerStride() const { return 1; } - inline Index outerStride() const { return this->innerSize(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Array(const EigenBase &other, + typename internal::enable_if::value, + PrivateType>::type = PrivateType()) + : Base(other.derived()) + { } + + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } #ifdef EIGEN_ARRAY_PLUGIN #include EIGEN_ARRAY_PLUGIN diff --git a/external/eigen3/Eigen/src/Core/ArrayBase.h b/external/eigen3/Eigen/src/Core/ArrayBase.h index 33ff553..3dbc708 100644 --- a/external/eigen3/Eigen/src/Core/ArrayBase.h +++ b/external/eigen3/Eigen/src/Core/ArrayBase.h @@ -32,7 +32,7 @@ template class MatrixWrapper; * \tparam Derived is the derived type, e.g., an array or an expression type. * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. * * \sa class MatrixBase, \ref TopicClassHierarchy */ @@ -47,13 +47,11 @@ template class ArrayBase typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; typedef DenseBase Base; - using Base::operator*; using Base::RowsAtCompileTime; using Base::ColsAtCompileTime; using Base::SizeAtCompileTime; @@ -62,8 +60,7 @@ template class ArrayBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; - + using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -83,25 +80,14 @@ template class ArrayBase #endif // not EIGEN_PARSED_BY_DOXYGEN #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily - * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const - * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either - * PlainObject or const PlainObject&. - */ - typedef Array::Scalar, - internal::traits::RowsAtCompileTime, - internal::traits::ColsAtCompileTime, - AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), - internal::traits::MaxRowsAtCompileTime, - internal::traits::MaxColsAtCompileTime - > PlainObject; - + typedef typename Base::PlainObject PlainObject; /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) # include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/ArrayCwiseUnaryOps.h" @@ -112,44 +98,62 @@ template class ArrayBase # include EIGEN_ARRAYBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const ArrayBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } - - Derived& operator+=(const Scalar& scalar) - { return *this = derived() + scalar; } - Derived& operator-=(const Scalar& scalar) - { return *this = derived() - scalar; } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator=(const Scalar &value) + { Base::setConstant(value); return derived(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const Scalar& scalar); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const Scalar& scalar); template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const ArrayBase& other); template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const ArrayBase& other); template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const ArrayBase& other); template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const ArrayBase& other); public: + EIGEN_DEVICE_FUNC ArrayBase& array() { return *this; } + EIGEN_DEVICE_FUNC const ArrayBase& array() const { return *this; } /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array * \sa MatrixBase::array() */ - MatrixWrapper matrix() { return derived(); } - const MatrixWrapper matrix() const { return derived(); } + EIGEN_DEVICE_FUNC + MatrixWrapper matrix() { return MatrixWrapper(derived()); } + EIGEN_DEVICE_FUNC + const MatrixWrapper matrix() const { return MatrixWrapper(derived()); } // template // inline void evalTo(Dest& dst) const { dst = matrix(); } protected: + EIGEN_DEVICE_FUNC ArrayBase() : Base() {} private: @@ -171,11 +175,10 @@ template class ArrayBase */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase::operator-=(const ArrayBase &other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -185,11 +188,10 @@ ArrayBase::operator-=(const ArrayBase &other) */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase::operator+=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } @@ -199,11 +201,10 @@ ArrayBase::operator+=(const ArrayBase& other) */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase::operator*=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::mul_assign_op()); return derived(); } @@ -213,11 +214,10 @@ ArrayBase::operator*=(const ArrayBase& other) */ template template -EIGEN_STRONG_INLINE Derived & +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & ArrayBase::operator/=(const ArrayBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::div_assign_op()); return derived(); } diff --git a/external/eigen3/Eigen/src/Core/ArrayWrapper.h b/external/eigen3/Eigen/src/Core/ArrayWrapper.h index b4641e2..688aadd 100644 --- a/external/eigen3/Eigen/src/Core/ArrayWrapper.h +++ b/external/eigen3/Eigen/src/Core/ArrayWrapper.h @@ -32,7 +32,8 @@ struct traits > // Let's remove NestByRefBit enum { Flags0 = traits::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } @@ -44,6 +45,7 @@ class ArrayWrapper : public ArrayBase > typedef ArrayBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) + typedef typename internal::remove_all::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue::value, @@ -51,76 +53,45 @@ class ArrayWrapper : public ArrayBase > const Scalar >::type ScalarWithConstIfNotLvalue; - typedef typename internal::nested::type NestedExpressionType; + typedef typename internal::ref_selector::non_const_type NestedExpressionType; - inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + using Base::coeffRef; + EIGEN_DEVICE_FUNC + explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } - inline CoeffReturnType coeff(Index rowId, Index colId) const - { - return m_expression.coeff(rowId, colId); - } - - inline Scalar& coeffRef(Index rowId, Index colId) - { - return m_expression.const_cast_derived().coeffRef(rowId, colId); - } - + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { - return m_expression.const_cast_derived().coeffRef(rowId, colId); - } - - inline CoeffReturnType coeff(Index index) const - { - return m_expression.coeff(index); - } - - inline Scalar& coeffRef(Index index) - { - return m_expression.const_cast_derived().coeffRef(index); + return m_expression.coeffRef(rowId, colId); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { - return m_expression.const_cast_derived().coeffRef(index); - } - - template - inline const PacketScalar packet(Index rowId, Index colId) const - { - return m_expression.template packet(rowId, colId); - } - - template - inline void writePacket(Index rowId, Index colId, const PacketScalar& val) - { - m_expression.const_cast_derived().template writePacket(rowId, colId, val); - } - - template - inline const PacketScalar packet(Index index) const - { - return m_expression.template packet(index); - } - - template - inline void writePacket(Index index, const PacketScalar& val) - { - m_expression.const_cast_derived().template writePacket(index, val); + return m_expression.coeffRef(index); } template + EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { dst = m_expression; } const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC nestedExpression() const { return m_expression; @@ -128,10 +99,12 @@ class ArrayWrapper : public ArrayBase > /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index) */ - void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } + EIGEN_DEVICE_FUNC + void resize(Index newSize) { m_expression.resize(newSize); } /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index,Index)*/ - void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.resize(rows,cols); } protected: NestedExpressionType m_expression; @@ -157,7 +130,8 @@ struct traits > // Let's remove NestByRefBit enum { Flags0 = traits::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } @@ -169,6 +143,7 @@ class MatrixWrapper : public MatrixBase > typedef MatrixBase > Base; EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) + typedef typename internal::remove_all::type NestedExpression; typedef typename internal::conditional< internal::is_lvalue::value, @@ -176,72 +151,40 @@ class MatrixWrapper : public MatrixBase > const Scalar >::type ScalarWithConstIfNotLvalue; - typedef typename internal::nested::type NestedExpressionType; + typedef typename internal::ref_selector::non_const_type NestedExpressionType; - inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {} + using Base::coeffRef; + EIGEN_DEVICE_FUNC + explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_expression.data(); } - inline CoeffReturnType coeff(Index rowId, Index colId) const - { - return m_expression.coeff(rowId, colId); - } - - inline Scalar& coeffRef(Index rowId, Index colId) - { - return m_expression.const_cast_derived().coeffRef(rowId, colId); - } - + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return m_expression.derived().coeffRef(rowId, colId); } - inline CoeffReturnType coeff(Index index) const - { - return m_expression.coeff(index); - } - - inline Scalar& coeffRef(Index index) - { - return m_expression.const_cast_derived().coeffRef(index); - } - + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { - return m_expression.const_cast_derived().coeffRef(index); - } - - template - inline const PacketScalar packet(Index rowId, Index colId) const - { - return m_expression.template packet(rowId, colId); - } - - template - inline void writePacket(Index rowId, Index colId, const PacketScalar& val) - { - m_expression.const_cast_derived().template writePacket(rowId, colId, val); - } - - template - inline const PacketScalar packet(Index index) const - { - return m_expression.template packet(index); - } - - template - inline void writePacket(Index index, const PacketScalar& val) - { - m_expression.const_cast_derived().template writePacket(index, val); + return m_expression.coeffRef(index); } + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { @@ -250,10 +193,12 @@ class MatrixWrapper : public MatrixBase > /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index) */ - void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } + EIGEN_DEVICE_FUNC + void resize(Index newSize) { m_expression.resize(newSize); } /** Forwards the resizing request to the nested expression * \sa DenseBase::resize(Index,Index)*/ - void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); } + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { m_expression.resize(rows,cols); } protected: NestedExpressionType m_expression; diff --git a/external/eigen3/Eigen/src/Core/Assign.h b/external/eigen3/Eigen/src/Core/Assign.h index f481731..53806ba 100644 --- a/external/eigen3/Eigen/src/Core/Assign.h +++ b/external/eigen3/Eigen/src/Core/Assign.h @@ -14,478 +14,6 @@ namespace Eigen { -namespace internal { - -/*************************************************************************** -* Part 1 : the logic deciding a strategy for traversal and unrolling * -***************************************************************************/ - -template -struct assign_traits -{ -public: - enum { - DstIsAligned = Derived::Flags & AlignedBit, - DstHasDirectAccess = Derived::Flags & DirectAccessBit, - SrcIsAligned = OtherDerived::Flags & AlignedBit, - JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned - }; - -private: - enum { - InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) - : int(Derived::RowsAtCompileTime), - InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) - : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) - : int(Derived::MaxRowsAtCompileTime), - MaxSizeAtCompileTime = Derived::SizeAtCompileTime, - PacketSize = packet_traits::size - }; - - enum { - StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), - MightVectorize = StorageOrdersAgree - && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), - MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 - && int(DstIsAligned) && int(SrcIsAligned), - MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), - MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess - && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), - /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, - so it's only good for large enough sizes. */ - MaySliceVectorize = MightVectorize && DstHasDirectAccess - && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) - /* slice vectorization can be slow, so we only want it if the slices are big, which is - indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block - in a fixed-size matrix */ - }; - -public: - enum { - Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) - : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) - : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) - : int(MayLinearize) ? int(LinearTraversal) - : int(DefaultTraversal), - Vectorized = int(Traversal) == InnerVectorizedTraversal - || int(Traversal) == LinearVectorizedTraversal - || int(Traversal) == SliceVectorizedTraversal - }; - -private: - enum { - UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), - MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), - MayUnrollInner = int(InnerSize) != Dynamic - && int(OtherDerived::CoeffReadCost) != Dynamic - && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) - }; - -public: - enum { - Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) - ? ( - int(MayUnrollCompletely) ? int(CompleteUnrolling) - : int(MayUnrollInner) ? int(InnerUnrolling) - : int(NoUnrolling) - ) - : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(Traversal) == int(LinearTraversal) - ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) - : int(NoUnrolling) - }; - -#ifdef EIGEN_DEBUG_ASSIGN - static void debug() - { - EIGEN_DEBUG_VAR(DstIsAligned) - EIGEN_DEBUG_VAR(SrcIsAligned) - EIGEN_DEBUG_VAR(JointAlignment) - EIGEN_DEBUG_VAR(InnerSize) - EIGEN_DEBUG_VAR(InnerMaxSize) - EIGEN_DEBUG_VAR(PacketSize) - EIGEN_DEBUG_VAR(StorageOrdersAgree) - EIGEN_DEBUG_VAR(MightVectorize) - EIGEN_DEBUG_VAR(MayLinearize) - EIGEN_DEBUG_VAR(MayInnerVectorize) - EIGEN_DEBUG_VAR(MayLinearVectorize) - EIGEN_DEBUG_VAR(MaySliceVectorize) - EIGEN_DEBUG_VAR(Traversal) - EIGEN_DEBUG_VAR(UnrollingLimit) - EIGEN_DEBUG_VAR(MayUnrollCompletely) - EIGEN_DEBUG_VAR(MayUnrollInner) - EIGEN_DEBUG_VAR(Unrolling) - } -#endif -}; - -/*************************************************************************** -* Part 2 : meta-unrollers -***************************************************************************/ - -/************************ -*** Default traversal *** -************************/ - -template -struct assign_DefaultTraversal_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime - }; - - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeffByOuterInner(outer, inner, src); - assign_DefaultTraversal_CompleteUnrolling::run(dst, src); - } -}; - -template -struct assign_DefaultTraversal_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template -struct assign_DefaultTraversal_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.copyCoeffByOuterInner(outer, Index, src); - assign_DefaultTraversal_InnerUnrolling::run(dst, src, outer); - } -}; - -template -struct assign_DefaultTraversal_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template -struct assign_LinearTraversal_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.copyCoeff(Index, src); - assign_LinearTraversal_CompleteUnrolling::run(dst, src); - } -}; - -template -struct assign_LinearTraversal_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template -struct assign_innervec_CompleteUnrolling -{ - enum { - outer = Index / Derived1::InnerSizeAtCompileTime, - inner = Index % Derived1::InnerSizeAtCompileTime, - JointAlignment = assign_traits::JointAlignment - }; - - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - dst.template copyPacketByOuterInner(outer, inner, src); - assign_innervec_CompleteUnrolling::size, Stop>::run(dst, src); - } -}; - -template -struct assign_innervec_CompleteUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} -}; - -template -struct assign_innervec_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) - { - dst.template copyPacketByOuterInner(outer, Index, src); - assign_innervec_InnerUnrolling::size, Stop>::run(dst, src, outer); - } -}; - -template -struct assign_innervec_InnerUnrolling -{ - static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} -}; - -/*************************************************************************** -* Part 3 : implementation of all cases -***************************************************************************/ - -template::Traversal, - int Unrolling = assign_traits::Unrolling, - int Version = Specialized> -struct assign_impl; - -/************************ -*** Default traversal *** -************************/ - -template -struct assign_impl -{ - static inline void run(Derived1 &, const Derived2 &) { } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; ++inner) - dst.copyCoeffByOuterInner(outer, inner, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_DefaultTraversal_CompleteUnrolling - ::run(dst, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_DefaultTraversal_InnerUnrolling - ::run(dst, src, outer); - } -}; - -/*********************** -*** Linear traversal *** -***********************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - for(Index i = 0; i < size; ++i) - dst.copyCoeff(i, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_LinearTraversal_CompleteUnrolling - ::run(dst, src); - } -}; - -/************************** -*** Inner vectorization *** -**************************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index packetSize = packet_traits::size; - for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; inner+=packetSize) - dst.template copyPacketByOuterInner(outer, inner, src); - } -}; - -template -struct assign_impl -{ - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - assign_innervec_CompleteUnrolling - ::run(dst, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) - assign_innervec_InnerUnrolling - ::run(dst, src, outer); - } -}; - -/*************************** -*** Linear vectorization *** -***************************/ - -template -struct unaligned_assign_impl -{ - template - static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {} -}; - -template <> -struct unaligned_assign_impl -{ - // MSVC must not inline this functions. If it does, it fails to optimize the - // packet access path. -#ifdef _MSC_VER - template - static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#else - template - static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) -#endif - { - for (typename Derived::Index index = start; index < end; ++index) - dst.copyCoeff(index, src); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - const Index size = dst.size(); - typedef packet_traits PacketTraits; - enum { - packetSize = PacketTraits::size, - dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits::DstIsAligned) , - srcAlignment = assign_traits::JointAlignment - }; - const Index alignedStart = assign_traits::DstIsAligned ? 0 - : internal::first_aligned(&dst.coeffRef(0), size); - const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; - - unaligned_assign_impl::DstIsAligned!=0>::run(src,dst,0,alignedStart); - - for(Index index = alignedStart; index < alignedEnd; index += packetSize) - { - dst.template copyPacket(index, src); - } - - unaligned_assign_impl<>::run(src,dst,alignedEnd,size); - } -}; - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) - { - enum { size = Derived1::SizeAtCompileTime, - packetSize = packet_traits::size, - alignedSize = (size/packetSize)*packetSize }; - - assign_innervec_CompleteUnrolling::run(dst, src); - assign_DefaultTraversal_CompleteUnrolling::run(dst, src); - } -}; - -/************************** -*** Slice vectorization *** -***************************/ - -template -struct assign_impl -{ - typedef typename Derived1::Index Index; - static inline void run(Derived1 &dst, const Derived2 &src) - { - typedef typename Derived1::Scalar Scalar; - typedef packet_traits PacketTraits; - enum { - packetSize = PacketTraits::size, - alignable = PacketTraits::AlignedOnScalar, - dstIsAligned = assign_traits::DstIsAligned, - dstAlignment = alignable ? Aligned : int(dstIsAligned), - srcAlignment = assign_traits::JointAlignment - }; - const Scalar *dst_ptr = &dst.coeffRef(0,0); - if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) - { - // the pointer is not aligend-on scalar, so alignment is not possible - return assign_impl::run(dst, src); - } - const Index packetAlignedMask = packetSize - 1; - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; - Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); - - for(Index outer = 0; outer < outerSize; ++outer) - { - const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); - // do the non-vectorizable part of the assignment - for(Index inner = 0; inner(outer, inner, src); - - // do the non-vectorizable part of the assignment - for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); - } - } -}; - -} // end namespace internal - -/*************************************************************************** -* Part 4 : implementation of DenseBase methods -***************************************************************************/ - template template EIGEN_STRONG_INLINE Derived& DenseBase @@ -499,90 +27,62 @@ EIGEN_STRONG_INLINE Derived& DenseBase EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) -#ifdef EIGEN_DEBUG_ASSIGN - internal::assign_traits::debug(); -#endif eigen_assert(rows() == other.rows() && cols() == other.cols()); - internal::assign_impl::Traversal) - : int(InvalidTraversal)>::run(derived(),other.derived()); -#ifndef EIGEN_NO_DEBUG - checkTransposeAliasing(other.derived()); -#endif + internal::call_assignment_no_alias(derived(),other.derived()); + return derived(); } -namespace internal { - -template::Flags) & EvalBeforeAssigningBit) != 0, - bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1) - | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". - // revert to || as soon as not needed anymore. - (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1)) - && int(Derived::SizeAtCompileTime) != 1> -struct assign_selector; - -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } - template - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; } -}; -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } -}; -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } - template - static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose dstTrans(dst); other.evalTo(dstTrans); return dst; } -}; -template -struct assign_selector { - static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } -}; - -} // end namespace internal - template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) { - return internal::assign_selector::run(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) { - return internal::assign_selector::evalTo(derived(), other.derived()); + internal::call_assignment(derived(), other.derived()); + return derived(); } template template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const ReturnByValue& other) { - return internal::assign_selector::evalTo(derived(), other.derived()); + other.derived().evalTo(derived()); + return derived(); } } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/AssignEvaluator.h b/external/eigen3/Eigen/src/Core/AssignEvaluator.h new file mode 100644 index 0000000..b0ec7b7 --- /dev/null +++ b/external/eigen3/Eigen/src/Core/AssignEvaluator.h @@ -0,0 +1,935 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_EVALUATOR_H +#define EIGEN_ASSIGN_EVALUATOR_H + +namespace Eigen { + +// This implementation is based on Assign.h + +namespace internal { + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +// copy_using_evaluator_traits is based on assign_traits + +template +struct copy_using_evaluator_traits +{ + typedef typename DstEvaluator::XprType Dst; + typedef typename Dst::Scalar DstScalar; + + enum { + DstFlags = DstEvaluator::Flags, + SrcFlags = SrcEvaluator::Flags + }; + +public: + enum { + DstAlignment = DstEvaluator::Alignment, + SrcAlignment = SrcEvaluator::Alignment, + DstHasDirectAccess = DstFlags & DirectAccessBit, + JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) + }; + +private: + enum { + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(DstFlags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + OuterStride = int(outer_stride_at_compile_time::ret), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime + }; + + // TODO distinguish between linear traversal and inner-traversals + typedef typename find_best_packet::type LinearPacketType; + typedef typename find_best_packet::type InnerPacketType; + + enum { + LinearPacketSize = unpacket_traits::size, + InnerPacketSize = unpacket_traits::size + }; + +public: + enum { + LinearRequiredAlignment = unpacket_traits::alignment, + InnerRequiredAlignment = unpacket_traits::alignment + }; + +private: + enum { + DstIsRowMajor = DstFlags&RowMajorBit, + SrcIsRowMajor = SrcFlags&RowMajorBit, + StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), + MightVectorize = bool(StorageOrdersAgree) + && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) + && bool(functor_traits::PacketAccess), + MayInnerVectorize = MightVectorize + && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 + && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 + && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), + MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), + MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess + && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) + && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize))) + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix + However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ + }; + +public: + enum { + Traversal = int(MayLinearVectorize) && (LinearPacketSize>InnerPacketSize) ? int(LinearVectorizedTraversal) + : int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal), + Vectorized = int(Traversal) == InnerVectorizedTraversal + || int(Traversal) == LinearVectorizedTraversal + || int(Traversal) == SliceVectorizedTraversal + }; + + typedef typename conditional::type PacketType; + +private: + enum { + ActualPacketSize = int(Traversal)==LinearVectorizedTraversal ? LinearPacketSize + : Vectorized ? InnerPacketSize + : 1, + UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize, + MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic + && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit), + MayUnrollInner = int(InnerSize) != Dynamic + && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit) + }; + +public: + enum { + Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) + ? ( + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) + ) + : int(Traversal) == int(LinearVectorizedTraversal) + ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment))) + ? int(CompleteUnrolling) + : int(NoUnrolling) ) + : int(Traversal) == int(LinearTraversal) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(NoUnrolling) ) +#if EIGEN_UNALIGNED_VECTORIZE + : int(Traversal) == int(SliceVectorizedTraversal) + ? ( bool(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) ) +#endif + : int(NoUnrolling) + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; + std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + std::cerr << "DstFlags" << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(DstAlignment) + EIGEN_DEBUG_VAR(SrcAlignment) + EIGEN_DEBUG_VAR(LinearRequiredAlignment) + EIGEN_DEBUG_VAR(InnerRequiredAlignment) + EIGEN_DEBUG_VAR(JointAlignment) + EIGEN_DEBUG_VAR(InnerSize) + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(LinearPacketSize) + EIGEN_DEBUG_VAR(InnerPacketSize) + EIGEN_DEBUG_VAR(ActualPacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) + EIGEN_DEBUG_VAR(MayInnerVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + std::cerr << "Traversal" << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; + EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(MayUnrollCompletely) + EIGEN_DEBUG_VAR(MayUnrollInner) + std::cerr << "Unrolling" << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; + std::cerr << std::endl; + } +#endif +}; + +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +/************************ +*** Default traversal *** +************************/ + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.assignCoeffByOuterInner(outer, inner); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.assignCoeffByOuterInner(outer, Index_); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index) { } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) + { + kernel.assignCoeff(Index); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime, + SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, + DstAlignment = Kernel::AssignmentTraits::DstAlignment + }; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + kernel.template assignPacketByOuterInner(outer, inner); + enum { NextIndex = Index + unpacket_traits::size }; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + typedef typename Kernel::PacketType PacketType; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer) + { + kernel.template assignPacketByOuterInner(outer, Index_); + enum { NextIndex = Index_ + unpacket_traits::size }; + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +// dense_assignment_loop is based on assign_impl + +template +struct dense_assignment_loop; + +/************************ +*** Default traversal *** +************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE run(Kernel &kernel) + { + for(Index outer = 0; outer < kernel.outerSize(); ++outer) { + for(Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +/*************************** +*** Linear vectorization *** +***************************/ + + +// The goal of unaligned_dense_assignment_loop is simply to factorize the handling +// of the non vectorizable beginning and ending parts + +template +struct unaligned_dense_assignment_loop +{ + // if IsAligned = true, then do nothing + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&, Index, Index) {} +}; + +template <> +struct unaligned_dense_assignment_loop +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. + // FIXME check which version exhibits this issue +#if EIGEN_COMP_MSVC + template + static EIGEN_DONT_INLINE void run(Kernel &kernel, + Index start, + Index end) +#else + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, + Index start, + Index end) +#endif + { + for (Index index = start; index < end; ++index) + kernel.assignCoeff(index); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + requestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment, + packetSize = unpacket_traits::size, + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = packet_traits::AlignedOnScalar ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment), + srcAlignment = Kernel::AssignmentTraits::JointAlignment + }; + const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(kernel.dstDataPtr(), size); + const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + unaligned_dense_assignment_loop::run(kernel, 0, alignedStart); + + for(Index index = alignedStart; index < alignedEnd; index += packetSize) + kernel.template assignPacket(index); + + unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { size = DstXprType::SizeAtCompileTime, + packetSize =unpacket_traits::size, + alignedSize = (size/packetSize)*packetSize }; + + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct dense_assignment_loop +{ + typedef typename Kernel::PacketType PacketType; + enum { + SrcAlignment = Kernel::AssignmentTraits::SrcAlignment, + DstAlignment = Kernel::AssignmentTraits::DstAlignment + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index packetSize = unpacket_traits::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) + kernel.template assignPacketByOuterInner(outer, inner); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::AssignmentTraits Traits; + const Index outerSize = kernel.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + const Index size = kernel.size(); + for(Index i = 0; i < size; ++i) + kernel.assignCoeff(i); + } +}; + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +/************************** +*** Slice vectorization *** +***************************/ + +template +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::Scalar Scalar; + typedef typename Kernel::PacketType PacketType; + enum { + packetSize = unpacket_traits::size, + requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment), + alignable = packet_traits::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment)>=sizeof(Scalar), + dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment)>=int(requestedAlignment), + dstAlignment = alignable ? int(requestedAlignment) + : int(Kernel::AssignmentTraits::DstAlignment) + }; + const Scalar *dst_ptr = kernel.dstDataPtr(); + if((!bool(dstIsAligned)) && (UIntPtr(dst_ptr) % sizeof(Scalar))>0) + { + // the pointer is not aligend-on scalar, so alignment is not possible + return dense_assignment_loop::run(kernel); + } + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner(outer, inner); + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner +struct dense_assignment_loop +{ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel) + { + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + typedef typename Kernel::PacketType PacketType; + + enum { size = DstXprType::InnerSizeAtCompileTime, + packetSize =unpacket_traits::size, + vectorizableSize = (size/packetSize)*packetSize }; + + for(Index outer = 0; outer < kernel.outerSize(); ++outer) + { + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } + } +}; +#endif + + +/*************************************************************************** +* Part 4 : Generic dense assignment kernel +***************************************************************************/ + +// This class generalize the assignment of a coefficient (or packet) from one dense evaluator +// to another dense writable evaluator. +// It is parametrized by the two evaluators, and the actual assignment functor. +// This abstraction level permits to keep the evaluation loops as simple and as generic as possible. +// One can customize the assignment using this generic dense_assignment_kernel with different +// functors, or by completely overloading it, by-passing a functor. +template +class generic_dense_assignment_kernel +{ +protected: + typedef typename DstEvaluatorTypeT::XprType DstXprType; + typedef typename SrcEvaluatorTypeT::XprType SrcXprType; +public: + + typedef DstEvaluatorTypeT DstEvaluatorType; + typedef SrcEvaluatorTypeT SrcEvaluatorType; + typedef typename DstEvaluatorType::Scalar Scalar; + typedef copy_using_evaluator_traits AssignmentTraits; + typedef typename AssignmentTraits::PacketType PacketType; + + + EIGEN_DEVICE_FUNC generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) + { + #ifdef EIGEN_DEBUG_ASSIGN + AssignmentTraits::debug(); + #endif + } + + EIGEN_DEVICE_FUNC Index size() const { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC Index rows() const { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC Index outerStride() const { return m_dstExpr.outerStride(); } + + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const { return m_src; } + + /// Assign src(row,col) to dst(row,col) through the assignment functor. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) + { + m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) + { + m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignCoeff(row, col); + } + + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) + { + m_functor.template assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) + { + m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) + { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacket(row, col); + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::RowsAtCompileTime) == 1 ? 0 + : int(Traits::ColsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? outer + : inner; + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) + { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::ColsAtCompileTime) == 1 ? 0 + : int(Traits::RowsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? inner + : outer; + } + + EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const + { + return m_dstExpr.data(); + } + +protected: + DstEvaluatorType& m_dst; + const SrcEvaluatorType& m_src; + const Functor &m_functor; + // TODO find a way to avoid the needs of the original expression + DstXprType& m_dstExpr; +}; + +/*************************************************************************** +* Part 5 : Entry point for dense rectangular assignment +***************************************************************************/ + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const Functor &/*func*/) +{ + EIGEN_ONLY_USED_FOR_DEBUG(dst); + EIGEN_ONLY_USED_FOR_DEBUG(src); + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void resize_if_allowed(DstXprType &dst, const SrcXprType& src, const internal::assign_op &/*func*/) +{ + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if(((dst.rows()!=dstRows) || (dst.cols()!=dstCols))) + dst.resize(dstRows, dstCols); + eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor &func) +{ + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + + SrcEvaluatorType srcEvaluator(src); + + // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, + // we need to resize the destination after the source evaluator has been created. + resize_if_allowed(dst, src, func); + + DstEvaluatorType dstEvaluator(dst); + + typedef generic_dense_assignment_kernel Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop::run(kernel); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) +{ + call_dense_assignment_loop(dst, src, internal::assign_op()); +} + +/*************************************************************************** +* Part 6 : Generic assignment +***************************************************************************/ + +// Based on the respective shapes of the destination and source, +// the class AssignmentKind determine the kind of assignment mechanism. +// AssignmentKind must define a Kind typedef. +template struct AssignmentKind; + +// Assignement kind defined in this file: +struct Dense2Dense {}; +struct EigenBase2EigenBase {}; + +template struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; +template<> struct AssignmentKind { typedef Dense2Dense Kind; }; + +// This is the main assignment class +template< typename DstXprType, typename SrcXprType, typename Functor, + typename Kind = typename AssignmentKind< typename evaluator_traits::Shape , typename evaluator_traits::Shape >::Kind, + typename EnableIf = void> +struct Assignment; + + +// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic transposition. +// Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite complicated. +// So this intermediate function removes everything related to "assume-aliasing" such that Assignment +// does not has to bother about these annoying details. + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(const Dst& dst, const Src& src) +{ + call_assignment(dst, src, internal::assign_op()); +} + +// Deal with "assume-aliasing" +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing::value, void*>::type = 0) +{ + typename plain_matrix_type::type tmp(src); + call_assignment_no_alias(dst, tmp, func); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if::value, void*>::type = 0) +{ + call_assignment_no_alias(dst, src, func); +} + +// by-pass "assume-aliasing" +// When there is no aliasing, we require that 'dst' has been properly resized +template class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment(NoAlias& dst, const Src& src, const Func& func) +{ + call_assignment_no_alias(dst.expression(), src, func); +} + + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) +{ + enum { + NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) + || (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1) + ) && int(Dst::SizeAtCompileTime) != 1 + }; + + typedef typename internal::conditional, Dst>::type ActualDstTypeCleaned; + typedef typename internal::conditional, Dst&>::type ActualDstType; + ActualDstType actualDst(dst); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned,Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename ActualDstTypeCleaned::Scalar,typename Src::Scalar); + + Assignment::run(actualDst, src, func); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias(Dst& dst, const Src& src) +{ + call_assignment_no_alias(dst, src, internal::assign_op()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) +{ + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst,Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func,typename Dst::Scalar,typename Src::Scalar); + + Assignment::run(dst, src, func); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) +{ + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); +} + +// forward declaration +template void check_for_aliasing(const Dst &dst, const Src &src); + +// Generic Dense to Dense assignment +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> +struct Assignment +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Functor &func) + { +#ifndef EIGEN_NO_DEBUG + internal::check_for_aliasing(dst, src); +#endif + + call_dense_assignment_loop(dst, src, func); + } +}; + +// Generic assignment through evalTo. +// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template< typename DstXprType, typename SrcXprType, typename Functor, typename Weak> +struct Assignment +{ + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.evalTo(dst); + } + + // NOTE The following two functions are templated to avoid their instanciation if not needed + // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type. + template + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.addTo(dst); + } + + template + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.subTo(dst); + } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_EVALUATOR_H diff --git a/external/eigen3/Eigen/src/Core/Assign_MKL.h b/external/eigen3/Eigen/src/Core/Assign_MKL.h old mode 100644 new mode 100755 index 7772951..6c2ab92 --- a/external/eigen3/Eigen/src/Core/Assign_MKL.h +++ b/external/eigen3/Eigen/src/Core/Assign_MKL.h @@ -1,6 +1,7 @@ /* Copyright (c) 2011, Intel Corporation. All rights reserved. - + Copyright (C) 2015 Gael Guennebaud + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -37,17 +38,13 @@ namespace Eigen { namespace internal { -template struct vml_call -{ enum { IsSupported = 0 }; }; - -template +template class vml_assign_traits { private: enum { DstHasDirectAccess = Dst::Flags & DirectAccessBit, SrcHasDirectAccess = Src::Flags & DirectAccessBit, - StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime) @@ -57,165 +54,120 @@ class vml_assign_traits : int(Dst::MaxRowsAtCompileTime), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, - MightEnableVml = vml_call::IsSupported && StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess - && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, + MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1, MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, - LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD, - MayEnableVml = MightEnableVml && LargeEnough, - MayLinearize = MayEnableVml && MightLinearize + LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD }; public: enum { - Traversal = MayLinearize ? LinearVectorizedTraversal - : MayEnableVml ? InnerVectorizedTraversal - : DefaultTraversal + EnableVml = MightEnableVml && LargeEnough, + Traversal = MightLinearize ? LinearTraversal : DefaultTraversal }; }; -template::Traversal > -struct vml_assign_impl - : assign_impl,Traversal,Unrolling,BuiltIn> -{ -}; - -template -struct vml_assign_impl -{ - typedef typename Derived1::Scalar Scalar; - typedef typename Derived1::Index Index; - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - for(Index outer = 0; outer < outerSize; ++outer) { - const Scalar *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : - &(src.nestedExpression().coeffRef(0, outer)); - Scalar *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); - vml_call::run(src.functor(), innerSize, src_ptr, dst_ptr ); - } - } -}; - -template -struct vml_assign_impl -{ - static inline void run(Derived1& dst, const CwiseUnaryOp& src) - { - // in case we want to (or have to) skip VML at runtime we can call: - // assign_impl,Traversal,Unrolling,BuiltIn>::run(dst,src); - vml_call::run(src.functor(), dst.size(), src.nestedExpression().data(), dst.data() ); - } -}; - -// Macroses - -#define EIGEN_MKL_VML_SPECIALIZE_ASSIGN(TRAVERSAL,UNROLLING) \ - template \ - struct assign_impl, TRAVERSAL, UNROLLING, Specialized> { \ - static inline void run(Derived1 &dst, const Eigen::CwiseUnaryOp &src) { \ - vml_assign_impl::run(dst, src); \ - } \ - }; - -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(DefaultTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(InnerVectorizedTraversal,InnerUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,CompleteUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(LinearVectorizedTraversal,NoUnrolling) -EIGEN_MKL_VML_SPECIALIZE_ASSIGN(SliceVectorizedTraversal,NoUnrolling) - - +#define EIGEN_PP_EXPAND(ARG) ARG #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) -#define EIGEN_MKL_VML_MODE VML_HA +#define EIGEN_VMLMODE_EXPAND_LA , VML_HA #else -#define EIGEN_MKL_VML_MODE VML_LA +#define EIGEN_VMLMODE_EXPAND_LA , VML_LA #endif -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst); \ - } \ +#define EIGEN_VMLMODE_EXPAND__ + +#define EIGEN_VMLMODE_PREFIX_LA vm +#define EIGEN_VMLMODE_PREFIX__ v +#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_,VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested> \ + struct Assignment, SrcXprNested>, assign_op, \ + Dense2Dense, typename enable_if::EnableVml>::type> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + if(vml_assign_traits::Traversal==LinearTraversal) { \ + VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \ + &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ + }; \ + + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) + + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA) +// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template< typename DstXprType, typename SrcXprNested, typename Plain> \ + struct Assignment, SrcXprNested, \ + const CwiseNullaryOp,Plain> >, assign_op, \ + Dense2Dense, typename enable_if::EnableVml>::type> { \ + typedef CwiseBinaryOp, SrcXprNested, \ + const CwiseNullaryOp,Plain> > SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + VMLTYPE exponent = reinterpret_cast(src.rhs().functor().m_other); \ + if(vml_assign_traits::Traversal==LinearTraversal) \ + { \ + VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \ + (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE) ); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for(Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \ + &(src.lhs().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \ + VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \ + (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_##VMLMODE)); \ + } \ + } \ + } \ }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& /*func*/, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(size, (const VMLTYPE*)src, (VMLTYPE*)dst, vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE) \ - template<> struct vml_call< scalar_##EIGENOP##_op > { \ - enum { IsSupported = 1 }; \ - static inline void run( const scalar_##EIGENOP##_op& func, \ - int size, const EIGENTYPE* src, EIGENTYPE* dst) { \ - EIGENTYPE exponent = func.m_exponent; \ - MKL_INT64 vmlMode = EIGEN_MKL_VML_MODE; \ - VMLOP(&size, (const VMLTYPE*)src, (const VMLTYPE*)&exponent, \ - (VMLTYPE*)dst, &vmlMode); \ - } \ - }; - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vs##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, vz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX(EIGENOP, VMLOP) - - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vms##VMLOP, float, float) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmd##VMLOP, double, double) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmc##VMLOP, scomplex, MKL_Complex8) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALL_LA(EIGENOP, vmz##VMLOP, dcomplex, MKL_Complex16) - -#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL_LA(EIGENOP, VMLOP) \ - EIGEN_MKL_VML_DECLARE_UNARY_CALLS_COMPLEX_LA(EIGENOP, VMLOP) - - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sin, Sin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(asin, Asin) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(cos, Cos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(acos, Acos) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(tan, Tan) -//EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(exp, Exp) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(log, Ln) -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_LA(sqrt, Sqrt) - -EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr) - -// The vm*powx functions are not avaibale in the windows version of MKL. -#ifndef _WIN32 -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmspowx_, float, float) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdpowx_, double, double) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcpowx_, scomplex, MKL_Complex8) -EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzpowx_, dcomplex, MKL_Complex16) -#endif + +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA) } // end namespace internal diff --git a/external/eigen3/Eigen/src/Core/BandMatrix.h b/external/eigen3/Eigen/src/Core/BandMatrix.h index ffd7fe8..4978c91 100644 --- a/external/eigen3/Eigen/src/Core/BandMatrix.h +++ b/external/eigen3/Eigen/src/Core/BandMatrix.h @@ -32,7 +32,7 @@ class BandMatrixBase : public EigenBase }; typedef typename internal::traits::Scalar Scalar; typedef Matrix DenseMatrixType; - typedef typename DenseMatrixType::Index Index; + typedef typename DenseMatrixType::StorageIndex StorageIndex; typedef typename internal::traits::CoefficientsType CoefficientsType; typedef EigenBase Base; @@ -161,15 +161,15 @@ class BandMatrixBase : public EigenBase * * \brief Represents a rectangular matrix with a banded storage * - * \param _Scalar Numeric type, i.e. float, double, int - * \param Rows Number of rows, or \b Dynamic - * \param Cols Number of columns, or \b Dynamic - * \param Supers Number of super diagonal - * \param Subs Number of sub diagonal - * \param _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint - * The former controls \ref TopicStorageOrders "storage order", and defaults to - * column-major. The latter controls whether the matrix represents a selfadjoint - * matrix in which case either Supers of Subs have to be null. + * \tparam _Scalar Numeric type, i.e. float, double, int + * \tparam _Rows Number of rows, or \b Dynamic + * \tparam _Cols Number of columns, or \b Dynamic + * \tparam _Supers Number of super diagonal + * \tparam _Subs Number of sub diagonal + * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint + * The former controls \ref TopicStorageOrders "storage order", and defaults to + * column-major. The latter controls whether the matrix represents a selfadjoint + * matrix in which case either Supers of Subs have to be null. * * \sa class TridiagonalMatrix */ @@ -179,7 +179,7 @@ struct traits > { typedef _Scalar Scalar; typedef Dense StorageKind; - typedef DenseIndex Index; + typedef Eigen::Index StorageIndex; enum { CoeffReadCost = NumTraits::ReadCost, RowsAtCompileTime = _Rows, @@ -201,10 +201,10 @@ class BandMatrix : public BandMatrixBase::Scalar Scalar; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; typedef typename internal::traits::CoefficientsType CoefficientsType; - inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) + explicit inline BandMatrix(Index rows=Rows, Index cols=Cols, Index supers=Supers, Index subs=Subs) : m_coeffs(1+supers+subs,cols), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -241,7 +241,7 @@ struct traits::CoeffReadCost, RowsAtCompileTime = _Rows, @@ -264,9 +264,9 @@ class BandMatrixWrapper : public BandMatrixBase::Scalar Scalar; typedef typename internal::traits::CoefficientsType CoefficientsType; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; - inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) + explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows=_Rows, Index cols=_Cols, Index supers=_Supers, Index subs=_Subs) : m_coeffs(coeffs), m_rows(rows), m_supers(supers), m_subs(subs) { @@ -302,9 +302,9 @@ class BandMatrixWrapper : public BandMatrixBase class TridiagonalMatrix : public BandMatrix { typedef BandMatrix Base; - typedef typename Base::Index Index; + typedef typename Base::StorageIndex StorageIndex; public: - TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} + explicit TridiagonalMatrix(Index size = Size) : Base(size,size,Options&SelfAdjoint?0:1,1) {} inline typename Base::template DiagonalIntReturnType<1>::Type super() { return Base::template diagonal<1>(); } @@ -327,6 +327,25 @@ class TridiagonalMatrix : public BandMatrix +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template +struct evaluator_traits > + : public evaluator_traits_base > +{ + typedef BandShape Shape; +}; + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + } // end namespace internal } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/Block.h b/external/eigen3/Eigen/src/Core/Block.h index 87bedfa..11de45c 100644 --- a/external/eigen3/Eigen/src/Core/Block.h +++ b/external/eigen3/Eigen/src/Core/Block.h @@ -13,38 +13,6 @@ namespace Eigen { -/** \class Block - * \ingroup Core_Module - * - * \brief Expression of a fixed-size or dynamic-size block - * - * \param XprType the type of the expression in which we are taking a block - * \param BlockRows the number of rows of the block we are taking at compile time (optional) - * \param BlockCols the number of columns of the block we are taking at compile time (optional) - * - * This class represents an expression of either a fixed-size or dynamic-size block. It is the return - * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block(Index,Index) and - * most of the time this is the only way it is used. - * - * However, if you want to directly maniputate block expressions, - * for instance if you want to write a function returning such an expression, you - * will need to use this class. - * - * Here is an example illustrating the dynamic case: - * \include class_Block.cpp - * Output: \verbinclude class_Block.out - * - * \note Even though this expression has dynamic size, in the case where \a XprType - * has fixed size, this expression inherits a fixed maximal size which means that evaluating - * it does not cause a dynamic memory allocation. - * - * Here is an example illustrating the fixed-size case: - * \include class_FixedBlock.cpp - * Output: \verbinclude class_FixedBlock.out - * - * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock - */ - namespace internal { template struct traits > : traits @@ -52,7 +20,7 @@ struct traits > : traits::Scalar Scalar; typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; - typedef typename nested::type XprTypeNested; + typedef typename ref_selector::type XprTypeNested; typedef typename remove_reference::type _XprTypeNested; enum{ MatrixRows = traits::RowsAtCompileTime, @@ -65,6 +33,7 @@ struct traits > : traits::MaxColsAtCompileTime), + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -77,18 +46,16 @@ struct traits > : traits::ret) : int(inner_stride_at_compile_time::ret), - MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) - && (InnerStrideAtCompileTime == 1) - ? PacketAccessBit : 0, - MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0, - FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (traits::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - Flags0 = traits::Flags & ( (HereditaryBits & ~RowMajorBit) | - DirectAccessBit | - MaskPacketAccessBit | - MaskAlignedBit), - Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit + Flags = (traits::Flags & (DirectAccessBit | (InnerPanel?CompressedAccessBit:0))) | FlagsLvalueBit | FlagsRowMajorBit, + // FIXME DirectAccessBit should not be handled by expressions + // + // Alignment is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + Alignment = 0 }; }; @@ -99,6 +66,40 @@ template class BlockImpl; +/** \class Block + * \ingroup Core_Module + * + * \brief Expression of a fixed-size or dynamic-size block + * + * \tparam XprType the type of the expression in which we are taking a block + * \tparam BlockRows the number of rows of the block we are taking at compile time (optional) + * \tparam BlockCols the number of columns of the block we are taking at compile time (optional) + * \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or + * to set of columns of a column major matrix (optional). The parameter allows to determine + * at compile time whether aligned access is possible on the block expression. + * + * This class represents an expression of either a fixed-size or dynamic-size block. It is the return + * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block(Index,Index) and + * most of the time this is the only way it is used. + * + * However, if you want to directly maniputate block expressions, + * for instance if you want to write a function returning such an expression, you + * will need to use this class. + * + * Here is an example illustrating the dynamic case: + * \include class_Block.cpp + * Output: \verbinclude class_Block.out + * + * \note Even though this expression has dynamic size, in the case where \a XprType + * has fixed size, this expression inherits a fixed maximal size which means that evaluating + * it does not cause a dynamic memory allocation. + * + * Here is an example illustrating the fixed-size case: + * \include class_FixedBlock.cpp + * Output: \verbinclude class_FixedBlock.out + * + * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock + */ template class Block : public BlockImpl::StorageKind> { @@ -108,9 +109,12 @@ template class typedef Impl Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Block) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) + + typedef typename internal::remove_all::type NestedExpression; /** Column or Row constructor */ + EIGEN_DEVICE_FUNC inline Block(XprType& xpr, Index i) : Impl(xpr,i) { eigen_assert( (i>=0) && ( @@ -120,25 +124,27 @@ template class /** Fixed-size constructor */ - inline Block(XprType& xpr, Index a_startRow, Index a_startCol) - : Impl(xpr, a_startRow, a_startCol) + EIGEN_DEVICE_FUNC + inline Block(XprType& xpr, Index startRow, Index startCol) + : Impl(xpr, startRow, startCol) { EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) - eigen_assert(a_startRow >= 0 && BlockRows >= 1 && a_startRow + BlockRows <= xpr.rows() - && a_startCol >= 0 && BlockCols >= 1 && a_startCol + BlockCols <= xpr.cols()); + eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows() + && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols()); } /** Dynamic-size constructor */ + EIGEN_DEVICE_FUNC inline Block(XprType& xpr, - Index a_startRow, Index a_startCol, + Index startRow, Index startCol, Index blockRows, Index blockCols) - : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) { eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows) && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols)); - eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow <= xpr.rows() - blockRows - && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols); + eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows + && startCol >= 0 && blockCols >= 0 && startCol <= xpr.cols() - blockCols); } }; @@ -149,14 +155,15 @@ class BlockImpl : public internal::BlockImpl_dense { typedef internal::BlockImpl_dense Impl; - typedef typename XprType::Index Index; + typedef typename XprType::StorageIndex StorageIndex; public: typedef Impl Base; EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) - inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} - inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {} - inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols) - : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {} + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {} + EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {} + EIGEN_DEVICE_FUNC + inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) {} }; namespace internal { @@ -166,16 +173,18 @@ template >::type { typedef Block BlockType; + typedef typename internal::ref_selector::non_const_type XprTypeNested; public: typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) - class InnerIterator; + // class InnerIterator; // FIXME apparently never used /** Column or Row constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index i) : m_xpr(xpr), // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime, @@ -190,75 +199,76 @@ template inline PacketScalar packet(Index rowId, Index colId) const { - return m_xpr.template packet - (rowId + m_startRow.value(), colId + m_startCol.value()); + return m_xpr.template packet(rowId + m_startRow.value(), colId + m_startCol.value()); } template inline void writePacket(Index rowId, Index colId, const PacketScalar& val) { - m_xpr.const_cast_derived().template writePacket - (rowId + m_startRow.value(), colId + m_startCol.value(), val); + m_xpr.template writePacket(rowId + m_startRow.value(), colId + m_startCol.value(), val); } template @@ -272,40 +282,46 @@ template inline void writePacket(Index index, const PacketScalar& val) { - m_xpr.const_cast_derived().template writePacket + m_xpr.template writePacket (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val); } #ifdef EIGEN_PARSED_BY_DOXYGEN /** \sa MapBase::data() */ - inline const Scalar* data() const; - inline Index innerStride() const; - inline Index outerStride() const; + EIGEN_DEVICE_FUNC inline const Scalar* data() const; + EIGEN_DEVICE_FUNC inline Index innerStride() const; + EIGEN_DEVICE_FUNC inline Index outerStride() const; #endif - const typename internal::remove_all::type& nestedExpression() const + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + XprType& nestedExpression() { return m_xpr; } - Index startRow() const + EIGEN_DEVICE_FUNC + StorageIndex startRow() const { return m_startRow.value(); } - Index startCol() const + EIGEN_DEVICE_FUNC + StorageIndex startCol() const { return m_startCol.value(); } protected: - const typename XprType::Nested m_xpr; - const internal::variable_if_dynamic m_startRow; - const internal::variable_if_dynamic m_startCol; - const internal::variable_if_dynamic m_blockRows; - const internal::variable_if_dynamic m_blockCols; + XprTypeNested m_xpr; + const internal::variable_if_dynamic m_startRow; + const internal::variable_if_dynamic m_startCol; + const internal::variable_if_dynamic m_blockRows; + const internal::variable_if_dynamic m_blockCols; }; /** \internal Internal implementation of dense Blocks in the direct access case.*/ @@ -314,6 +330,10 @@ class BlockImpl_dense : public MapBase > { typedef Block BlockType; + typedef typename internal::ref_selector::non_const_type XprTypeNested; + enum { + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0 + }; public: typedef MapBase Base; @@ -322,42 +342,52 @@ class BlockImpl_dense /** Column or Row constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index i) - : Base(internal::const_cast_ptr(&xpr.coeffRef( - (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0, - (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0)), + : Base(xpr.data() + i * ( ((BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) + || ((BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) && ( XprTypeIsRowMajor)) ? xpr.innerStride() : xpr.outerStride()), BlockRows==1 ? 1 : xpr.rows(), BlockCols==1 ? 1 : xpr.cols()), - m_xpr(xpr) + m_xpr(xpr), + m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0), + m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0) { init(); } /** Fixed-size constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol)), + m_xpr(xpr), m_startRow(startRow), m_startCol(startCol) { init(); } /** Dynamic-size constructor */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) - : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol)), blockRows, blockCols), - m_xpr(xpr) + : Base(xpr.data()+xpr.innerStride()*(XprTypeIsRowMajor?startCol:startRow) + xpr.outerStride()*(XprTypeIsRowMajor?startRow:startCol), blockRows, blockCols), + m_xpr(xpr), m_startRow(startRow), m_startCol(startCol) { init(); } - const typename internal::remove_all::type& nestedExpression() const + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& nestedExpression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + XprType& nestedExpression() { return m_xpr; } /** \sa MapBase::innerStride() */ + EIGEN_DEVICE_FUNC inline Index innerStride() const { return internal::traits::HasSameStorageOrderAsXprType @@ -366,11 +396,24 @@ class BlockImpl_dense } /** \sa MapBase::outerStride() */ + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_outerStride; } + EIGEN_DEVICE_FUNC + StorageIndex startRow() const + { + return m_startRow.value(); + } + + EIGEN_DEVICE_FUNC + StorageIndex startCol() const + { + return m_startCol.value(); + } + #ifndef __SUNPRO_CC // FIXME sunstudio is not friendly with the above friend... // META-FIXME there is no 'friend' keyword around here. Is this obsolete? @@ -379,6 +422,7 @@ class BlockImpl_dense #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal used by allowAligned() */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols) : Base(data, blockRows, blockCols), m_xpr(xpr) { @@ -387,6 +431,7 @@ class BlockImpl_dense #endif protected: + EIGEN_DEVICE_FUNC void init() { m_outerStride = internal::traits::HasSameStorageOrderAsXprType @@ -394,7 +439,9 @@ class BlockImpl_dense : m_xpr.innerStride(); } - typename XprType::Nested m_xpr; + XprTypeNested m_xpr; + const internal::variable_if_dynamic m_startRow; + const internal::variable_if_dynamic m_startCol; Index m_outerStride; }; diff --git a/external/eigen3/Eigen/src/Core/BooleanRedux.h b/external/eigen3/Eigen/src/Core/BooleanRedux.h index be9f48a..8409d87 100644 --- a/external/eigen3/Eigen/src/Core/BooleanRedux.h +++ b/external/eigen3/Eigen/src/Core/BooleanRedux.h @@ -17,9 +17,10 @@ namespace internal { template struct all_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) @@ -43,11 +44,12 @@ struct all_unroller template struct any_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Derived::RowsAtCompileTime, - row = (UnrollCount-1) % Derived::RowsAtCompileTime + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; - + static inline bool run(const Derived &mat) { return any_unroller::run(mat) || mat.coeff(row, col); @@ -78,19 +80,19 @@ struct any_unroller template inline bool DenseBase::all() const { + typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller::run(derived()); + return internal::all_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (!coeff(i, j)) return false; + if (!evaluator.coeff(i, j)) return false; return true; } } @@ -102,19 +104,19 @@ inline bool DenseBase::all() const template inline bool DenseBase::any() const { + typedef internal::evaluator Evaluator; enum { unroll = SizeAtCompileTime != Dynamic - && CoeffReadCost != Dynamic - && NumTraits::AddCost != Dynamic - && SizeAtCompileTime * (CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT + && SizeAtCompileTime * (Evaluator::CoeffReadCost + NumTraits::AddCost) <= EIGEN_UNROLLING_LIMIT }; + Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller::run(derived()); + return internal::any_unroller::run(evaluator); else { for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if (coeff(i, j)) return true; + if (evaluator.coeff(i, j)) return true; return false; } } @@ -124,7 +126,7 @@ inline bool DenseBase::any() const * \sa all(), any() */ template -inline typename DenseBase::Index DenseBase::count() const +inline Eigen::Index DenseBase::count() const { return derived().template cast().template cast().sum(); } @@ -136,7 +138,11 @@ inline typename DenseBase::Index DenseBase::count() const template inline bool DenseBase::hasNaN() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isNaN().any(); +#else return !((derived().array()==derived().array()).all()); +#endif } /** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values. @@ -146,7 +152,11 @@ inline bool DenseBase::hasNaN() const template inline bool DenseBase::allFinite() const { +#if EIGEN_COMP_MSVC || (defined __FAST_MATH__) + return derived().array().isFinite().all(); +#else return !((derived()-derived()).hasNaN()); +#endif } } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/CMakeLists.txt b/external/eigen3/Eigen/src/Core/CMakeLists.txt deleted file mode 100644 index 2346fc2..0000000 --- a/external/eigen3/Eigen/src/Core/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -FILE(GLOB Eigen_Core_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core COMPONENT Devel - ) - -ADD_SUBDIRECTORY(products) -ADD_SUBDIRECTORY(util) -ADD_SUBDIRECTORY(arch) diff --git a/external/eigen3/Eigen/src/Core/CommaInitializer.h b/external/eigen3/Eigen/src/Core/CommaInitializer.h index 5dd3ade..d218e98 100644 --- a/external/eigen3/Eigen/src/Core/CommaInitializer.h +++ b/external/eigen3/Eigen/src/Core/CommaInitializer.h @@ -22,14 +22,14 @@ namespace Eigen { * the return type of MatrixBase::operator<<, and most of the time this is the only * way it is used. * - * \sa \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() + * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() */ template struct CommaInitializer { typedef typename XprType::Scalar Scalar; - typedef typename XprType::Index Index; + EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const Scalar& s) : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) { @@ -37,6 +37,7 @@ struct CommaInitializer } template + EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const DenseBase& other) : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows()) { @@ -46,6 +47,7 @@ struct CommaInitializer /* Copy/Move constructor which transfers ownership. This is crucial in * absence of return value optimization to avoid assertions during destruction. */ // FIXME in C++11 mode this could be replaced by a proper RValue constructor + EIGEN_DEVICE_FUNC inline CommaInitializer(const CommaInitializer& o) : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) { // Mark original object as finished. In absence of R-value references we need to const_cast: @@ -55,6 +57,7 @@ struct CommaInitializer } /* inserts a scalar value in the target matrix */ + EIGEN_DEVICE_FUNC CommaInitializer& operator,(const Scalar& s) { if (m_col==m_xpr.cols()) @@ -74,6 +77,7 @@ struct CommaInitializer /* inserts a matrix expression in the target matrix */ template + EIGEN_DEVICE_FUNC CommaInitializer& operator,(const DenseBase& other) { if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows)) @@ -93,7 +97,11 @@ struct CommaInitializer return *this; } + EIGEN_DEVICE_FUNC inline ~CommaInitializer() +#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS + EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) +#endif { finished(); } @@ -105,6 +113,7 @@ struct CommaInitializer * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished()); * \endcode */ + EIGEN_DEVICE_FUNC inline XprType& finished() { eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0) && m_col == m_xpr.cols() @@ -112,7 +121,7 @@ struct CommaInitializer return m_xpr; } - XprType& m_xpr; // target expression + XprType& m_xpr; // target expression Index m_row; // current row id Index m_col; // current col id Index m_currentBlockRows; // current block height diff --git a/external/eigen3/Eigen/src/Core/ConditionEstimator.h b/external/eigen3/Eigen/src/Core/ConditionEstimator.h new file mode 100644 index 0000000..aa7efdc --- /dev/null +++ b/external/eigen3/Eigen/src/Core/ConditionEstimator.h @@ -0,0 +1,175 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com) +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CONDITIONESTIMATOR_H +#define EIGEN_CONDITIONESTIMATOR_H + +namespace Eigen { + +namespace internal { + +template +struct rcond_compute_sign { + static inline Vector run(const Vector& v) { + const RealVector v_abs = v.cwiseAbs(); + return (v_abs.array() == static_cast(0)) + .select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs)); + } +}; + +// Partial specialization to avoid elementwise division for real vectors. +template +struct rcond_compute_sign { + static inline Vector run(const Vector& v) { + return (v.array() < static_cast(0)) + .select(-Vector::Ones(v.size()), Vector::Ones(v.size())); + } +}; + +/** + * \returns an estimate of ||inv(matrix)||_1 given a decomposition of + * \a matrix that implements .solve() and .adjoint().solve() methods. + * + * This function implements Algorithms 4.1 and 5.1 from + * http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf + * which also forms the basis for the condition number estimators in + * LAPACK. Since at most 10 calls to the solve method of dec are + * performed, the total cost is O(dims^2), as opposed to O(dims^3) + * needed to compute the inverse matrix explicitly. + * + * The most common usage is in estimating the condition number + * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be + * computed directly in O(n^2) operations. + * + * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and + * LLT. + * + * \sa FullPivLU, PartialPivLU, LDLT, LLT. + */ +template +typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec) +{ + typedef typename Decomposition::MatrixType MatrixType; + typedef typename Decomposition::Scalar Scalar; + typedef typename Decomposition::RealScalar RealScalar; + typedef typename internal::plain_col_type::type Vector; + typedef typename internal::plain_col_type::type RealVector; + const bool is_complex = (NumTraits::IsComplex != 0); + + eigen_assert(dec.rows() == dec.cols()); + const Index n = dec.rows(); + if (n == 0) + return 0; + + // Disable Index to float conversion warning +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif + Vector v = dec.solve(Vector::Ones(n) / Scalar(n)); +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif + + // lower_bound is a lower bound on + // ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1 + // and is the objective maximized by the ("super-") gradient ascent + // algorithm below. + RealScalar lower_bound = v.template lpNorm<1>(); + if (n == 1) + return lower_bound; + + // Gradient ascent algorithm follows: We know that the optimum is achieved at + // one of the simplices v = e_i, so in each iteration we follow a + // super-gradient to move towards the optimal one. + RealScalar old_lower_bound = lower_bound; + Vector sign_vector(n); + Vector old_sign_vector; + Index v_max_abs_index = -1; + Index old_v_max_abs_index = v_max_abs_index; + for (int k = 0; k < 4; ++k) + { + sign_vector = internal::rcond_compute_sign::run(v); + if (k > 0 && !is_complex && sign_vector == old_sign_vector) { + // Break if the solution stagnated. + break; + } + // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )| + v = dec.adjoint().solve(sign_vector); + v.real().cwiseAbs().maxCoeff(&v_max_abs_index); + if (v_max_abs_index == old_v_max_abs_index) { + // Break if the solution stagnated. + break; + } + // Move to the new simplex e_j, where j = v_max_abs_index. + v = dec.solve(Vector::Unit(n, v_max_abs_index)); // v = inv(matrix) * e_j. + lower_bound = v.template lpNorm<1>(); + if (lower_bound <= old_lower_bound) { + // Break if the gradient step did not increase the lower_bound. + break; + } + if (!is_complex) { + old_sign_vector = sign_vector; + } + old_v_max_abs_index = v_max_abs_index; + old_lower_bound = lower_bound; + } + // The following calculates an independent estimate of ||matrix||_1 by + // multiplying matrix by a vector with entries of slowly increasing + // magnitude and alternating sign: + // v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1. + // This improvement to Hager's algorithm above is due to Higham. It was + // added to make the algorithm more robust in certain corner cases where + // large elements in the matrix might otherwise escape detection due to + // exact cancellation (especially when op and op_adjoint correspond to a + // sequence of backsubstitutions and permutations), which could cause + // Hager's algorithm to vastly underestimate ||matrix||_1. + Scalar alternating_sign(RealScalar(1)); + for (Index i = 0; i < n; ++i) { + // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates + v[i] = alternating_sign * static_cast(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1)))); + alternating_sign = -alternating_sign; + } + v = dec.solve(v); + const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n)); + return numext::maxi(lower_bound, alternate_lower_bound); +} + +/** \brief Reciprocal condition number estimator. + * + * Computing a decomposition of a dense matrix takes O(n^3) operations, while + * this method estimates the condition number quickly and reliably in O(n^2) + * operations. + * + * \returns an estimate of the reciprocal condition number + * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and + * its decomposition. Supports the following decompositions: FullPivLU, + * PartialPivLU, LDLT, and LLT. + * + * \sa FullPivLU, PartialPivLU, LDLT, LLT. + */ +template +typename Decomposition::RealScalar +rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, const Decomposition& dec) +{ + typedef typename Decomposition::RealScalar RealScalar; + eigen_assert(dec.rows() == dec.cols()); + if (dec.rows() == 0) return RealScalar(1); + if (matrix_norm == RealScalar(0)) return RealScalar(0); + if (dec.rows() == 1) return RealScalar(1); + const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec); + return (inverse_matrix_norm == RealScalar(0) ? RealScalar(0) + : (RealScalar(1) / inverse_matrix_norm) / matrix_norm); +} + +} // namespace internal + +} // namespace Eigen + +#endif diff --git a/external/eigen3/Eigen/src/Core/CoreEvaluators.h b/external/eigen3/Eigen/src/Core/CoreEvaluators.h new file mode 100644 index 0000000..f7c1eff --- /dev/null +++ b/external/eigen3/Eigen/src/Core/CoreEvaluators.h @@ -0,0 +1,1671 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_COREEVALUATORS_H +#define EIGEN_COREEVALUATORS_H + +namespace Eigen { + +namespace internal { + +// This class returns the evaluator kind from the expression storage kind. +// Default assumes index based accessors +template +struct storage_kind_to_evaluator_kind { + typedef IndexBased Kind; +}; + +// This class returns the evaluator shape from the expression storage kind. +// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc. +template struct storage_kind_to_shape; + +template<> struct storage_kind_to_shape { typedef DenseShape Shape; }; +template<> struct storage_kind_to_shape { typedef SolverShape Shape; }; +template<> struct storage_kind_to_shape { typedef PermutationShape Shape; }; +template<> struct storage_kind_to_shape { typedef TranspositionsShape Shape; }; + +// Evaluators have to be specialized with respect to various criteria such as: +// - storage/structure/shape +// - scalar type +// - etc. +// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators. +// We currently distinguish the following kind of evaluators: +// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, MatrixWrapper, ArrayWrapper, Reverse, Replicate) +// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - ternary_evaluator for expression taking three arguments (CwiseTernaryOp) +// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires additional tags for dispatching. +// - mapbase_evaluator for Map, Block, Ref +// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) + +template< typename T, + typename Arg1Kind = typename evaluator_traits::Kind, + typename Arg2Kind = typename evaluator_traits::Kind, + typename Arg3Kind = typename evaluator_traits::Kind, + typename Arg1Scalar = typename traits::Scalar, + typename Arg2Scalar = typename traits::Scalar, + typename Arg3Scalar = typename traits::Scalar> struct ternary_evaluator; + +template< typename T, + typename LhsKind = typename evaluator_traits::Kind, + typename RhsKind = typename evaluator_traits::Kind, + typename LhsScalar = typename traits::Scalar, + typename RhsScalar = typename traits::Scalar> struct binary_evaluator; + +template< typename T, + typename Kind = typename evaluator_traits::Kind, + typename Scalar = typename T::Scalar> struct unary_evaluator; + +// evaluator_traits contains traits for evaluator + +template +struct evaluator_traits_base +{ + // by default, get evaluator kind and shape from storage + typedef typename storage_kind_to_evaluator_kind::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape::StorageKind>::Shape Shape; +}; + +// Default evaluator traits +template +struct evaluator_traits : public evaluator_traits_base +{ +}; + +template::Shape > +struct evaluator_assume_aliasing { + static const bool value = false; +}; + +// By default, we assume a unary expression: +template +struct evaluator : public unary_evaluator +{ + typedef unary_evaluator Base; + EIGEN_DEVICE_FUNC explicit evaluator(const T& xpr) : Base(xpr) {} +}; + + +// TODO: Think about const-correctness +template +struct evaluator + : evaluator +{ + EIGEN_DEVICE_FUNC + explicit evaluator(const T& xpr) : evaluator(xpr) {} +}; + +// ---------- base class for all evaluators ---------- + +template +struct evaluator_base : public noncopyable +{ + // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. + typedef traits ExpressionTraits; + + enum { + Alignment = 0 + }; +}; + +// -------------------- Matrix and Array -------------------- +// +// evaluator is a common base class for the +// Matrix and Array evaluators. +// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, +// so no need for more sophisticated dispatching. + +template +struct evaluator > + : evaluator_base +{ + typedef PlainObjectBase PlainObjectType; + typedef typename PlainObjectType::Scalar Scalar; + typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = PlainObjectType::IsRowMajor, + IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, + RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, + ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, + + CoeffReadCost = NumTraits::ReadCost, + Flags = traits::EvaluatorFlags, + Alignment = traits::Alignment + }; + + EIGEN_DEVICE_FUNC evaluator() + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + if (IsRowMajor) + return m_data[row * m_outerStride.value() + col]; + else + return m_data[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_data[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + if (IsRowMajor) + return const_cast(m_data)[row * m_outerStride.value() + col]; + else + return const_cast(m_data)[row + col * m_outerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return const_cast(m_data)[index]; + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + if (IsRowMajor) + return ploadt(m_data + row * m_outerStride.value() + col); + else + return ploadt(m_data + row + col * m_outerStride.value()); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return ploadt(m_data + index); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + if (IsRowMajor) + return pstoret + (const_cast(m_data) + row * m_outerStride.value() + col, x); + else + return pstoret + (const_cast(m_data) + row + col * m_outerStride.value(), x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + return pstoret(const_cast(m_data) + index, x); + } + +protected: + const Scalar *m_data; + + // We do not need to know the outer stride for vectors + variable_if_dynamic m_outerStride; +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Matrix XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +template +struct evaluator > + : evaluator > > +{ + typedef Array XprType; + + EIGEN_DEVICE_FUNC evaluator() {} + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& m) + : evaluator >(m) + { } +}; + +// -------------------- Transpose -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Transpose XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags ^ RowMajorBit, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(col, row); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(col, row); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename XprType::Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(col, row); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(col, row, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +// -------------------- CwiseNullaryOp -------------------- +// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. +// Likewise, there is not need to more sophisticated dispatching here. + +template::value, + bool has_unary = has_unary_operator::value, + bool has_binary = has_binary_operator::value> +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp(); } +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp(i,j); } +}; + +// We need the following specialization for vector-only functors assigned to a runtime vector, +// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd. +// In this case, i==0 and j is used for the actual iteration. +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i==0 || j==0); + return op(i+j); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i==0 || j==0); + return op.template packetOp(i+j); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp(i); } +}; + +template +struct nullary_wrapper {}; + +#if 0 && EIGEN_COMP_MSVC>0 +// Disable this ugly workaround. This is now handled in traits::match, +// but this piece of code might still become handly if some other weird compilation +// erros pop up again. + +// MSVC exhibits a weird compilation error when +// compiling: +// Eigen::MatrixXf A = MatrixXf::Random(3,3); +// Ref R = 2.f*A; +// and that has_*ary_operator> have not been instantiated yet. +// The "problem" is that evaluator<2.f*A> is instantiated by traits::match<2.f*A> +// and at that time has_*ary_operator returns true regardless of T. +// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>. +// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(), +// and packet() are really instantiated as implemented below: + +// This is a simple wrapper around Index to enforce the re-instantiation of +// has_*ary_operator when needed. +template struct nullary_wrapper_workaround_msvc { + nullary_wrapper_workaround_msvc(const T&); + operator T()const; +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i); + } +}; +#endif // MSVC workaround + +template +struct evaluator > + : evaluator_base > +{ + typedef CwiseNullaryOp XprType; + typedef typename internal::remove_all::type PlainObjectTypeCleaned; + + enum { + CoeffReadCost = internal::functor_traits::Cost, + + Flags = (evaluator::Flags + & ( HereditaryBits + | (functor_has_linear_access::ret ? LinearAccessBit : 0) + | (functor_traits::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), + Alignment = AlignedMax + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) + : m_functor(n.functor()), m_wrapper() + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(IndexType row, IndexType col) const + { + return m_wrapper(m_functor, row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(IndexType index) const + { + return m_wrapper(m_functor,index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(IndexType row, IndexType col) const + { + return m_wrapper.template packetOp(m_functor, row, col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(IndexType index) const + { + return m_wrapper.template packetOp(m_functor, index); + } + +protected: + const NullaryOp m_functor; + const internal::nullary_wrapper m_wrapper; +}; + +// -------------------- CwiseUnaryOp -------------------- + +template +struct unary_evaluator, IndexBased > + : evaluator_base > +{ + typedef CwiseUnaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = evaluator::Flags + & (HereditaryBits | LinearAccessBit | (functor_traits::PacketAccess ? PacketAccessBit : 0)), + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit unary_evaluator(const XprType& op) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_argImpl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_argImpl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet(index)); + } + +protected: + const UnaryOp m_functor; + evaluator m_argImpl; +}; + +// -------------------- CwiseTernaryOp -------------------- + +// this is a ternary expression +template +struct evaluator > + : public ternary_evaluator > +{ + typedef CwiseTernaryOp XprType; + typedef ternary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct ternary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseTernaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + Arg1Flags = evaluator::Flags, + Arg2Flags = evaluator::Flags, + Arg3Flags = evaluator::Flags, + SameType = is_same::value && is_same::value, + StorageOrdersAgree = (int(Arg1Flags)&RowMajorBit)==(int(Arg2Flags)&RowMajorBit) && (int(Arg1Flags)&RowMajorBit)==(int(Arg3Flags)&RowMajorBit), + Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & ( + HereditaryBits + | (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN( + EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment), + evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_arg1Impl.template packet(row, col), + m_arg2Impl.template packet(row, col), + m_arg3Impl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + +protected: + const TernaryOp m_functor; + evaluator m_arg1Impl; + evaluator m_arg2Impl; + evaluator m_arg3Impl; +}; + +// -------------------- CwiseBinaryOp -------------------- + +// this is a binary expression +template +struct evaluator > + : public binary_evaluator > +{ + typedef CwiseBinaryOp XprType; + typedef binary_evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct binary_evaluator, IndexBased, IndexBased> + : evaluator_base > +{ + typedef CwiseBinaryOp XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + LhsFlags = evaluator::Flags, + RhsFlags = evaluator::Flags, + SameType = is_same::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment,evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_functor.packetOp(m_lhsImpl.template packet(row, col), + m_rhsImpl.template packet(row, col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_functor.packetOp(m_lhsImpl.template packet(index), + m_rhsImpl.template packet(index)); + } + +protected: + const BinaryOp m_functor; + evaluator m_lhsImpl; + evaluator m_rhsImpl; +}; + +// -------------------- CwiseUnaryView -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef CwiseUnaryView XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), + + Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_unaryOp(op.functor()), + m_argImpl(op.nestedExpression()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_unaryOp(m_argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_unaryOp(m_argImpl.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_unaryOp(m_argImpl.coeffRef(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_unaryOp(m_argImpl.coeffRef(index)); + } + +protected: + const UnaryOp m_unaryOp; + evaluator m_argImpl; +}; + +// -------------------- Map -------------------- + +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template +struct mapbase_evaluator; + +template +struct mapbase_evaluator : evaluator_base +{ + typedef Derived XprType; + typedef typename XprType::PointerType PointerType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, + CoeffReadCost = NumTraits::ReadCost + }; + + EIGEN_DEVICE_FUNC explicit mapbase_evaluator(const XprType& map) + : m_data(const_cast(map.data())), + m_innerStride(map.innerStride()), + m_outerStride(map.outerStride()) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_data[index * m_innerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_data[index * m_innerStride.value()]; + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + return internal::ploadt(ptr); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return internal::ploadt(m_data + index * m_innerStride.value()); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + return internal::pstoret(ptr, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + internal::pstoret(m_data + index * m_innerStride.value(), x); + } +protected: + EIGEN_DEVICE_FUNC + inline Index rowStride() const { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); } + EIGEN_DEVICE_FUNC + inline Index colStride() const { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); } + + PointerType m_data; + const internal::variable_if_dynamic m_innerStride; + const internal::variable_if_dynamic m_outerStride; +}; + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Map XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int( evaluator::Flags) & (LinearAccessMask&PacketAccessMask), + + Alignment = int(MapOptions)&int(AlignedMask) + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) + : mapbase_evaluator(map) + { } +}; + +// -------------------- Ref -------------------- + +template +struct evaluator > + : public mapbase_evaluator, PlainObjectType> +{ + typedef Ref XprType; + + enum { + Flags = evaluator >::Flags, + Alignment = evaluator >::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& ref) + : mapbase_evaluator(ref) + { } +}; + +// -------------------- Block -------------------- + +template::ret> struct block_evaluator; + +template +struct evaluator > + : block_evaluator +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + + ArgTypeIsRowMajor = (int(evaluator::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ArgTypeIsRowMajor, + HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsArgType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, + + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = evaluator::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, + + PacketAlignment = unpacket_traits::alignment, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) + }; + typedef block_evaluator block_evaluator_type; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& block) : block_evaluator_type(block) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } +}; + +// no direct-access => dispatch to a unary evaluator +template +struct block_evaluator + : unary_evaluator > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : unary_evaluator(block) + {} +}; + +template +struct unary_evaluator, IndexBased> + : evaluator_base > +{ + typedef Block XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) + : m_argImpl(block.nestedExpression()), + m_startRow(block.startRow()), + m_startCol(block.startCol()) + { } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + RowsAtCompileTime = XprType::RowsAtCompileTime + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(m_startRow.value() + row, m_startCol.value() + col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return packet(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + return writePacket(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); + } + +protected: + evaluator m_argImpl; + const variable_if_dynamic m_startRow; + const variable_if_dynamic m_startCol; +}; + +// TODO: This evaluator does not actually use the child evaluator; +// all action is via the data() as returned by the Block expression. + +template +struct block_evaluator + : mapbase_evaluator, + typename Block::PlainObject> +{ + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block) + : mapbase_evaluator(block) + { + // TODO: for the 3.3 release, this should be turned to an internal assertion, but let's keep it as is for the beta lifetime + eigen_assert(((internal::UIntPtr(block.data()) % EIGEN_PLAIN_ENUM_MAX(1,evaluator::Alignment)) == 0) && "data is not aligned"); + } +}; + + +// -------------------- Select -------------------- +// NOTE shall we introduce a ternary_evaluator? + +// TODO enable vectorization for Select +template +struct evaluator > + : evaluator_base > +{ + typedef Select XprType; + enum { + CoeffReadCost = evaluator::CoeffReadCost + + EIGEN_PLAIN_ENUM_MAX(evaluator::CoeffReadCost, + evaluator::CoeffReadCost), + + Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, + + Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) + : m_conditionImpl(select.conditionMatrix()), + m_thenImpl(select.thenMatrix()), + m_elseImpl(select.elseMatrix()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + if (m_conditionImpl.coeff(row, col)) + return m_thenImpl.coeff(row, col); + else + return m_elseImpl.coeff(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + if (m_conditionImpl.coeff(index)) + return m_thenImpl.coeff(index); + else + return m_elseImpl.coeff(index); + } + +protected: + evaluator m_conditionImpl; + evaluator m_thenImpl; + evaluator m_elseImpl; +}; + + +// -------------------- Replicate -------------------- + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Replicate XprType; + typedef typename XprType::CoeffReturnType CoeffReturnType; + enum { + Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor + }; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0, + Flags = (evaluator::Flags & (HereditaryBits|LinearAccessMask) & ~RowMajorBit) | (traits::Flags & RowMajorBit), + + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& replicate) + : m_arg(replicate.nestedExpression()), + m_argImpl(m_arg), + m_rows(replicate.nestedExpression().rows()), + m_cols(replicate.nestedExpression().cols()) + {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.coeff(actual_row, actual_col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.coeff(actual_index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 + : RowFactor==1 ? row + : row % m_rows.value(); + const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 + : ColFactor==1 ? col + : col % m_cols.value(); + + return m_argImpl.template packet(actual_row, actual_col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + const Index actual_index = internal::traits::RowsAtCompileTime==1 + ? (ColFactor==1 ? index : index%m_cols.value()) + : (RowFactor==1 ? index : index%m_rows.value()); + + return m_argImpl.template packet(actual_index); + } + +protected: + const ArgTypeNested m_arg; + evaluator m_argImpl; + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- PartialReduxExpr -------------------- + +template< typename ArgType, typename MemberOp, int Direction> +struct evaluator > + : evaluator_base > +{ + typedef PartialReduxExpr XprType; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef typename internal::remove_all::type ArgTypeNestedCleaned; + typedef typename ArgType::Scalar InputScalar; + typedef typename XprType::Scalar Scalar; + enum { + TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) + }; + typedef typename MemberOp::template Cost CostOpType; + enum { + CoeffReadCost = TraversalSize==Dynamic ? HugeCost + : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), + + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit, + + Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) + : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : int(CostOpType::value)); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index i, Index j) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(j)); + else + return m_functor(m_arg.row(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Scalar coeff(Index index) const + { + if (Direction==Vertical) + return m_functor(m_arg.col(index)); + else + return m_functor(m_arg.row(index)); + } + +protected: + typename internal::add_const_on_value_type::type m_arg; + const MemberOp m_functor; +}; + + +// -------------------- MatrixWrapper and ArrayWrapper -------------------- +// +// evaluator_wrapper_base is a common base class for the +// MatrixWrapper and ArrayWrapper evaluators. + +template +struct evaluator_wrapper_base + : evaluator_base +{ + typedef typename remove_all::type ArgType; + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + + typedef typename ArgType::Scalar Scalar; + typedef typename ArgType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + return m_argImpl.template packet(row, col); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + return m_argImpl.template packet(index); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + m_argImpl.template writePacket(row, col, x); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + m_argImpl.template writePacket(index, x); + } + +protected: + evaluator m_argImpl; +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef MatrixWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + +template +struct unary_evaluator > + : evaluator_wrapper_base > +{ + typedef ArrayWrapper XprType; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base >(wrapper.nestedExpression()) + { } +}; + + +// -------------------- Reverse -------------------- + +// defined in Reverse.h: +template struct reverse_packet_cond; + +template +struct unary_evaluator > + : evaluator_base > +{ + typedef Reverse XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::IsRowMajor, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + ReversePacket = (Direction == BothDirections) + || ((Direction == Vertical) && IsColMajor) + || ((Direction == Horizontal) && IsRowMajor), + + CoeffReadCost = evaluator::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + || ((ReverseRow && XprType::ColsAtCompileTime==1) || (ReverseCol && XprType::RowsAtCompileTime==1)) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), + + Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f. + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& reverse) + : m_argImpl(reverse.nestedExpression()), + m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1), + m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index col) const + { + return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index col) + { + return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, + ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index row, Index col) const + { + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + return reverse_packet::run(m_argImpl.template packet( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col)); + } + + template + EIGEN_STRONG_INLINE + PacketType packet(Index index) const + { + enum { PacketSize = unpacket_traits::size }; + return preverse(m_argImpl.template packet(m_rows.value() * m_cols.value() - index - PacketSize)); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index row, Index col, const PacketType& x) + { + // FIXME we could factorize some code with packet(i,j) + enum { + PacketSize = unpacket_traits::size, + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1 + }; + typedef internal::reverse_packet_cond reverse_packet; + m_argImpl.template writePacket( + ReverseRow ? m_rows.value() - row - OffsetRow : row, + ReverseCol ? m_cols.value() - col - OffsetCol : col, + reverse_packet::run(x)); + } + + template + EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketType& x) + { + enum { PacketSize = unpacket_traits::size }; + m_argImpl.template writePacket + (m_rows.value() * m_cols.value() - index - PacketSize, preverse(x)); + } + +protected: + evaluator m_argImpl; + + // If we do not reverse rows, then we do not need to know the number of rows; same for columns + // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors. + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + + +// -------------------- Diagonal -------------------- + +template +struct evaluator > + : evaluator_base > +{ + typedef Diagonal XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = (unsigned int)(evaluator::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit, + + Alignment = 0 + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& diagonal) + : m_argImpl(diagonal.nestedExpression()), + m_index(diagonal.index()) + { } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index row, Index) const + { + return m_argImpl.coeff(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + CoeffReturnType coeff(Index index) const + { + return m_argImpl.coeff(index + rowOffset(), index + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index row, Index) + { + return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Scalar& coeffRef(Index index) + { + return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); + } + +protected: + evaluator m_argImpl; + const internal::variable_if_dynamicindex m_index; + +private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } +}; + + +//---------------------------------------------------------------------- +// deprecated code +//---------------------------------------------------------------------- + +// -------------------- EvalToTemp -------------------- + +// expression class for evaluating nested expression to a temporary + +template class EvalToTemp; + +template +struct traits > + : public traits +{ }; + +template +class EvalToTemp + : public dense_xpr_base >::type +{ + public: + + typedef typename dense_xpr_base::type Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) + + explicit EvalToTemp(const ArgType& arg) + : m_arg(arg) + { } + + const ArgType& arg() const + { + return m_arg; + } + + Index rows() const + { + return m_arg.rows(); + } + + Index cols() const + { + return m_arg.cols(); + } + + private: + const ArgType& m_arg; +}; + +template +struct evaluator > + : public evaluator +{ + typedef EvalToTemp XprType; + typedef typename ArgType::PlainObject PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.arg()) + { + ::new (static_cast(this)) Base(m_result); + } + + // This constructor is used when nesting an EvalTo evaluator in another evaluator + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) + : m_result(arg) + { + ::new (static_cast(this)) Base(m_result); + } + +protected: + PlainObject m_result; +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COREEVALUATORS_H diff --git a/external/eigen3/Eigen/src/Core/CoreIterators.h b/external/eigen3/Eigen/src/Core/CoreIterators.h index 6da4683..4eb42b9 100644 --- a/external/eigen3/Eigen/src/Core/CoreIterators.h +++ b/external/eigen3/Eigen/src/Core/CoreIterators.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,47 +15,113 @@ namespace Eigen { /* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core */ -/** \ingroup SparseCore_Module - * \class InnerIterator - * \brief An InnerIterator allows to loop over the element of a sparse (or dense) matrix or expression - * - * todo +namespace internal { + +template +class inner_iterator_selector; + +} + +/** \class InnerIterator + * \brief An InnerIterator allows to loop over the element of any matrix expression. + * + * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is constructed. + * + * TODO: add a usage example */ +template +class InnerIterator +{ +protected: + typedef internal::inner_iterator_selector::Kind> IteratorType; + typedef internal::evaluator EvaluatorType; + typedef typename internal::traits::Scalar Scalar; +public: + /** Construct an iterator over the \a outerId -th row or column of \a xpr */ + InnerIterator(const XprType &xpr, const Index &outerId) + : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize()) + {} + + /// \returns the value of the current coefficient. + EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); } + /** Increment the iterator \c *this to the next non-zero coefficient. + * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView + */ + EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } + /// \returns the column or row index of the current coefficient. + EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } + /// \returns the row index of the current coefficient. + EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); } + /// \returns the column index of the current coefficient. + EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); } + /// \returns \c true if the iterator \c *this still references a valid coefficient. + EIGEN_STRONG_INLINE operator bool() const { return m_iter; } + +protected: + EvaluatorType m_eval; + IteratorType m_iter; +private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix A; + // SparseMatrix::InnerIterator it(A,0); + template InnerIterator(const EigenBase&,Index outer); +}; + +namespace internal { -// generic version for dense matrix and expressions -template class DenseBase::InnerIterator +// Generic inner iterator implementation for dense objects +template +class inner_iterator_selector { - protected: - typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - - enum { IsRowMajor = (Derived::Flags&RowMajorBit)==RowMajorBit }; - public: - EIGEN_STRONG_INLINE InnerIterator(const Derived& expr, Index outer) - : m_expression(expr), m_inner(0), m_outer(outer), m_end(expr.innerSize()) - {} - - EIGEN_STRONG_INLINE Scalar value() const - { - return (IsRowMajor) ? m_expression.coeff(m_outer, m_inner) - : m_expression.coeff(m_inner, m_outer); - } - - EIGEN_STRONG_INLINE InnerIterator& operator++() { m_inner++; return *this; } - - EIGEN_STRONG_INLINE Index index() const { return m_inner; } - inline Index row() const { return IsRowMajor ? m_outer : index(); } - inline Index col() const { return IsRowMajor ? index() : m_outer; } - - EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } - - protected: - const Derived& m_expression; - Index m_inner; - const Index m_outer; - const Index m_end; +protected: + typedef evaluator EvaluatorType; + typedef typename traits::Scalar Scalar; + enum { IsRowMajor = (XprType::Flags&RowMajorBit)==RowMajorBit }; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize) + : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize) + {} + + EIGEN_STRONG_INLINE Scalar value() const + { + return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner) + : m_eval.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE inner_iterator_selector& operator++() { m_inner++; return *this; } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner>=0; } + +protected: + const EvaluatorType& m_eval; + Index m_inner; + const Index m_outer; + const Index m_end; }; +// For iterator-based evaluator, inner-iterator is already implemented as +// evaluator<>::InnerIterator +template +class inner_iterator_selector + : public evaluator::InnerIterator +{ +protected: + typedef typename evaluator::InnerIterator Base; + typedef evaluator EvaluatorType; + +public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &/*innerSize*/) + : Base(eval, outerId) + {} +}; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_COREITERATORS_H diff --git a/external/eigen3/Eigen/src/Core/CwiseBinaryOp.h b/external/eigen3/Eigen/src/Core/CwiseBinaryOp.h index 519a866..a36765e 100644 --- a/external/eigen3/Eigen/src/Core/CwiseBinaryOp.h +++ b/external/eigen3/Eigen/src/Core/CwiseBinaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -13,26 +13,6 @@ namespace Eigen { -/** \class CwiseBinaryOp - * \ingroup Core_Module - * - * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions - * - * \param BinaryOp template functor implementing the operator - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * - * This class represents an expression where a coefficient-wise binary operator is applied to two expressions. - * It is the return type of binary operators, by which we mean only those binary operators where - * both the left-hand side and the right-hand side are Eigen expressions. - * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp. - * - * Most of the time, this is the only way that it is used, so you typically don't have to name - * CwiseBinaryOp types explicitly. - * - * \sa MatrixBase::binaryExpr(const MatrixBase &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp - */ - namespace internal { template struct traits > @@ -52,77 +32,75 @@ struct traits > // we still want to handle the case when the result type is different. typedef typename result_of< BinaryOp( - typename Lhs::Scalar, - typename Rhs::Scalar + const typename Lhs::Scalar&, + const typename Rhs::Scalar& ) >::type Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; + typedef typename cwise_promote_storage_type::StorageKind, + typename traits::StorageKind, + BinaryOp>::ret StorageKind; + typedef typename promote_index_type::StorageIndex, + typename traits::StorageIndex>::type StorageIndex; typedef typename Lhs::Nested LhsNested; typedef typename Rhs::Nested RhsNested; typedef typename remove_reference::type _LhsNested; typedef typename remove_reference::type _RhsNested; enum { - LhsCoeffReadCost = _LhsNested::CoeffReadCost, - RhsCoeffReadCost = _RhsNested::CoeffReadCost, - LhsFlags = _LhsNested::Flags, - RhsFlags = _RhsNested::Flags, - SameType = is_same::value, - StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit), - Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( - HereditaryBits - | (int(LhsFlags) & int(RhsFlags) & - ( AlignedBit - | (StorageOrdersAgree ? LinearAccessBit : 0) - | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) - ) - ) - ), - Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), - Cost0 = EIGEN_ADD_COST(LhsCoeffReadCost,RhsCoeffReadCost), - CoeffReadCost = EIGEN_ADD_COST(Cost0,functor_traits::Cost) + Flags = cwise_promote_storage_order::StorageKind,typename traits::StorageKind,_LhsNested::Flags & RowMajorBit,_RhsNested::Flags & RowMajorBit>::value }; }; } // end namespace internal -// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor -// that would take two operands of different types. If there were such an example, then this check should be -// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as -// currently they take only one typename Scalar template parameter. -// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. -// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to -// add together a float matrix and a double matrix. -#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ - EIGEN_STATIC_ASSERT((internal::functor_is_product_like::ret \ - ? int(internal::scalar_product_traits::Defined) \ - : int(internal::is_same::value)), \ - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - template class CwiseBinaryOpImpl; -template -class CwiseBinaryOp : internal::no_assignment_operator, +/** \class CwiseBinaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions + * + * \tparam BinaryOp template functor implementing the operator + * \tparam LhsType the type of the left-hand side + * \tparam RhsType the type of the right-hand side + * + * This class represents an expression where a coefficient-wise binary operator is applied to two expressions. + * It is the return type of binary operators, by which we mean only those binary operators where + * both the left-hand side and the right-hand side are Eigen expressions. + * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseBinaryOp types explicitly. + * + * \sa MatrixBase::binaryExpr(const MatrixBase &,const CustomBinaryOp &) const, class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseBinaryOp : public CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type::StorageKind, - typename internal::traits::StorageKind>::ret> + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>, + internal::no_assignment_operator { public: + + typedef typename internal::remove_all::type Functor; + typedef typename internal::remove_all::type Lhs; + typedef typename internal::remove_all::type Rhs; typedef typename CwiseBinaryOpImpl< - BinaryOp, Lhs, Rhs, - typename internal::promote_storage_type::StorageKind, - typename internal::traits::StorageKind>::ret>::Base Base; + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + BinaryOp>::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) - typedef typename internal::nested::type LhsNested; - typedef typename internal::nested::type RhsNested; + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; typedef typename internal::remove_reference::type _LhsNested; typedef typename internal::remove_reference::type _RhsNested; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp()) : m_lhs(aLhs), m_rhs(aRhs), m_functor(func) { @@ -132,6 +110,7 @@ class CwiseBinaryOp : internal::no_assignment_operator, eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { // return the fixed size type if available to enable compile time optimizations if (internal::traits::type>::RowsAtCompileTime==Dynamic) @@ -139,6 +118,7 @@ class CwiseBinaryOp : internal::no_assignment_operator, else return m_lhs.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { // return the fixed size type if available to enable compile time optimizations if (internal::traits::type>::ColsAtCompileTime==Dynamic) @@ -148,10 +128,13 @@ class CwiseBinaryOp : internal::no_assignment_operator, } /** \returns the left hand side nested expression */ + EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; } /** \returns the right hand side nested expression */ + EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; } /** \returns the functor representing the binary operation */ + EIGEN_DEVICE_FUNC const BinaryOp& functor() const { return m_functor; } protected: @@ -160,41 +143,13 @@ class CwiseBinaryOp : internal::no_assignment_operator, const BinaryOp m_functor; }; -template -class CwiseBinaryOpImpl - : public internal::dense_xpr_base >::type +// Generic API dispatcher +template +class CwiseBinaryOpImpl + : public internal::generic_xpr_base >::type { - typedef CwiseBinaryOp Derived; - public: - - typedef typename internal::dense_xpr_base >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE( Derived ) - - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().lhs().coeff(rowId, colId), - derived().rhs().coeff(rowId, colId)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().lhs().template packet(rowId, colId), - derived().rhs().template packet(rowId, colId)); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().lhs().coeff(index), - derived().rhs().coeff(index)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().lhs().template packet(index), - derived().rhs().template packet(index)); - } +public: + typedef typename internal::generic_xpr_base >::type Base; }; /** replaces \c *this by \c *this - \a other. @@ -206,8 +161,7 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator-=(const MatrixBase &other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -220,11 +174,11 @@ template EIGEN_STRONG_INLINE Derived & MatrixBase::operator+=(const MatrixBase& other) { - SelfCwiseBinaryOp, Derived, OtherDerived> tmp(derived()); - tmp = other.derived(); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } } // end namespace Eigen #endif // EIGEN_CWISE_BINARY_OP_H + diff --git a/external/eigen3/Eigen/src/Core/CwiseNullaryOp.h b/external/eigen3/Eigen/src/Core/CwiseNullaryOp.h index a93bab2..ddd607e 100644 --- a/external/eigen3/Eigen/src/Core/CwiseNullaryOp.h +++ b/external/eigen3/Eigen/src/Core/CwiseNullaryOp.h @@ -12,13 +12,24 @@ namespace Eigen { +namespace internal { +template +struct traits > : traits +{ + enum { + Flags = traits::Flags & RowMajorBit + }; +}; + +} // namespace internal + /** \class CwiseNullaryOp * \ingroup Core_Module * * \brief Generic expression of a matrix where all coefficients are defined by a functor * - * \param NullaryOp template functor implementing the operator - * \param PlainObjectType the underlying plain matrix/array type + * \tparam NullaryOp template functor implementing the operator + * \tparam PlainObjectType the underlying plain matrix/array type * * This class represents an expression of a generic nullary operator. * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods, @@ -27,68 +38,49 @@ namespace Eigen { * However, if you want to write a function returning such an expression, you * will need to use this class. * - * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr() + * The functor NullaryOp must expose one of the following method: + + + + +
\c operator()() if the procedural generation does not depend on the coefficient entries (e.g., random numbers)
\c operator()(Index i)if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace)
\c operator()(Index i,Index j)if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)
+ * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors. + * + * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding + * C++11 random number generators. + * + * A nullary expression can also be used to implement custom sophisticated matrix manipulations + * that cannot be covered by the existing set of natively supported matrix manipulations. + * See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations + * on the behavior of CwiseNullaryOp. + * + * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr */ - -namespace internal { template -struct traits > : traits -{ - enum { - Flags = (traits::Flags - & ( HereditaryBits - | (functor_has_linear_access::ret ? LinearAccessBit : 0) - | (functor_traits::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), - CoeffReadCost = functor_traits::Cost - }; -}; -} - -template -class CwiseNullaryOp : internal::no_assignment_operator, - public internal::dense_xpr_base< CwiseNullaryOp >::type +class CwiseNullaryOp : public internal::dense_xpr_base< CwiseNullaryOp >::type, internal::no_assignment_operator { public: typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp) - CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp()) - : m_rows(nbRows), m_cols(nbCols), m_functor(func) + EIGEN_DEVICE_FUNC + CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp()) + : m_rows(rows), m_cols(cols), m_functor(func) { - eigen_assert(nbRows >= 0 - && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows) - && nbCols >= 0 - && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols)); + eigen_assert(rows >= 0 + && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 + && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); } - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return m_functor(rowId, colId); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return m_functor.packetOp(rowId, colId); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return m_functor(index); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return m_functor.packetOp(index); - } - /** \returns the functor representing the nullary operation */ + EIGEN_DEVICE_FUNC const NullaryOp& functor() const { return m_functor; } protected: @@ -113,10 +105,10 @@ class CwiseNullaryOp : internal::no_assignment_operator, */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) { - return CwiseNullaryOp(rows, cols, func); + return CwiseNullaryOp(rows, cols, func); } /** \returns an expression of a matrix defined by a custom functor \a func @@ -132,16 +124,19 @@ DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f * * The template parameter \a CustomNullaryOp is the type of the functor. * + * Here is an example with C++11 random generators: \include random_cpp11.cpp + * Output: \verbinclude random_cpp11.out + * * \sa class CwiseNullaryOp */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); - else return CwiseNullaryOp(size, 1, func); + if(RowsAtCompileTime == 1) return CwiseNullaryOp(1, size, func); + else return CwiseNullaryOp(size, 1, func); } /** \returns an expression of a matrix defined by a custom functor \a func @@ -155,19 +150,19 @@ DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) */ template template -EIGEN_STRONG_INLINE const CwiseNullaryOp +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp::PlainObject> DenseBase::NullaryExpr(const CustomNullaryOp& func) { - return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); + return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); } /** \returns an expression of a constant matrix of value \a value * - * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this DenseBase type. * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, - * it is redundant to pass \a nbRows and \a nbCols as arguments, so Zero() should be used + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used * instead. * * The template parameter \a CustomNullaryOp is the type of the functor. @@ -176,9 +171,9 @@ DenseBase::NullaryExpr(const CustomNullaryOp& func) */ template EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType -DenseBase::Constant(Index nbRows, Index nbCols, const Scalar& value) +DenseBase::Constant(Index rows, Index cols, const Scalar& value) { - return DenseBase::NullaryExpr(nbRows, nbCols, internal::scalar_constant_op(value)); + return DenseBase::NullaryExpr(rows, cols, internal::scalar_constant_op(value)); } /** \returns an expression of a constant matrix of value \a value @@ -197,7 +192,7 @@ DenseBase::Constant(Index nbRows, Index nbCols, const Scalar& value) * \sa class CwiseNullaryOp */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Constant(Index size, const Scalar& value) { return DenseBase::NullaryExpr(size, internal::scalar_constant_op(value)); @@ -213,53 +208,40 @@ DenseBase::Constant(Index size, const Scalar& value) * \sa class CwiseNullaryOp */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Constant(const Scalar& value) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) return DenseBase::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_constant_op(value)); } -/** - * \brief Sets a linearly space vector. - * - * The function generates 'size' equally spaced values in the closed interval [low,high]. - * This particular version of LinSpaced() uses sequential access, i.e. vector access is - * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization - * and yields faster code than the random access version. - * - * When size is set to 1, a vector of length 1 containing 'high' is returned. - * - * \only_for_vectors - * - * Example: \include DenseBase_LinSpaced_seq.cpp - * Output: \verbinclude DenseBase_LinSpaced_seq.out +/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&) * - * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Index,Scalar,Scalar), CwiseNullaryOp + * \sa LinSpaced(Index,Scalar,Scalar), setLinSpaced(Index,const Scalar&,const Scalar&) */ template -EIGEN_STRONG_INLINE const typename DenseBase::SequentialLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); } -/** - * \copydoc DenseBase::LinSpaced(Sequential_t, Index, const Scalar&, const Scalar&) - * Special version for fixed size types which does not require the size parameter. +/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&) + * + * \sa LinSpaced(Scalar,Scalar) */ template -EIGEN_STRONG_INLINE const typename DenseBase::SequentialLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); } /** - * \brief Sets a linearly space vector. + * \brief Sets a linearly spaced vector. * * The function generates 'size' equally spaced values in the closed interval [low,high]. * When size is set to 1, a vector of length 1 containing 'high' is returned. @@ -269,14 +251,24 @@ DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig * Example: \include DenseBase_LinSpaced.cpp * Output: \verbinclude DenseBase_LinSpaced.out * - * \sa setLinSpaced(Index,const Scalar&,const Scalar&), LinSpaced(Sequential_t,Index,const Scalar&,const Scalar&,Index), CwiseNullaryOp + * For integer scalar types, an even spacing is possible if and only if the length of the range, + * i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the + * number of values \c high-low+1 (meaning each value can be repeated the same number of time). + * If one of these two considions is not satisfied, then \c high is lowered to the largest value + * satisfying one of this constraint. + * Here are some examples: + * + * Example: \include DenseBase_LinSpacedInt.cpp + * Output: \verbinclude DenseBase_LinSpacedInt.out + * + * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp */ template -EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); } /** @@ -284,22 +276,23 @@ DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) * Special version for fixed size types which does not require the size parameter. */ template -EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); } /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ template -bool DenseBase::isApproxToConstant +EIGEN_DEVICE_FUNC bool DenseBase::isApproxToConstant (const Scalar& val, const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isApprox(this->coeff(i, j), val, prec)) + if(!internal::isApprox(self.coeff(i, j), val, prec)) return false; return true; } @@ -308,7 +301,7 @@ bool DenseBase::isApproxToConstant * * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ template -bool DenseBase::isConstant +EIGEN_DEVICE_FUNC bool DenseBase::isConstant (const Scalar& val, const RealScalar& prec) const { return isApproxToConstant(val, prec); @@ -319,22 +312,22 @@ bool DenseBase::isConstant * \sa setConstant(), Constant(), class CwiseNullaryOp */ template -EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar& val) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar& val) { setConstant(val); } -/** Sets all coefficients in this expression to \a value. +/** Sets all coefficients in this expression to value \a val. * * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), Constant(), class CwiseNullaryOp, setZero(), setOnes() */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) { return derived() = Constant(rows(), cols(), val); } -/** Resizes to the given \a size, and sets all coefficients in this expression to the given \a value. +/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val. * * \only_for_vectors * @@ -344,17 +337,17 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) */ template -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setConstant(Index size, const Scalar& val) { resize(size); return setConstant(val); } -/** Resizes to the given size, and sets all coefficients in this expression to the given \a value. +/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * \param val the value to which all coefficients are set * * Example: \include Matrix_setConstant_int_int.cpp @@ -363,15 +356,15 @@ PlainObjectBase::setConstant(Index size, const Scalar& val) * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, MatrixBase::Constant(const Scalar&) */ template -EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setConstant(Index nbRows, Index nbCols, const Scalar& val) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setConstant(Index rows, Index cols, const Scalar& val) { - resize(nbRows, nbCols); + resize(rows, cols); return setConstant(val); } /** - * \brief Sets a linearly space vector. + * \brief Sets a linearly spaced vector. * * The function generates 'size' equally spaced values in the closed interval [low,high]. * When size is set to 1, a vector of length 1 containing 'high' is returned. @@ -381,27 +374,33 @@ PlainObjectBase::setConstant(Index nbRows, Index nbCols, const Scalar& * Example: \include DenseBase_setLinSpaced.cpp * Output: \verbinclude DenseBase_setLinSpaced.out * - * \sa CwiseNullaryOp + * For integer scalar types, do not miss the explanations on the definition + * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink. + * + * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); + return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); } /** - * \brief Sets a linearly space vector. + * \brief Sets a linearly spaced vector. * - * The function fill *this with equally spaced values in the closed interval [low,high]. + * The function fills \c *this with equally spaced values in the closed interval [low,high]. * When size is set to 1, a vector of length 1 containing 'high' is returned. * * \only_for_vectors * - * \sa setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp + * For integer scalar types, do not miss the explanations on the definition + * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink. + * + * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, const Scalar& high) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return setLinSpaced(size(), low, high); @@ -424,10 +423,10 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, * \sa Zero(), Zero(Index) */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType -DenseBase::Zero(Index nbRows, Index nbCols) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Zero(Index rows, Index cols) { - return Constant(nbRows, nbCols, Scalar(0)); + return Constant(rows, cols, Scalar(0)); } /** \returns an expression of a zero vector. @@ -447,7 +446,7 @@ DenseBase::Zero(Index nbRows, Index nbCols) * \sa Zero(), Zero(Index,Index) */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero(Index size) { return Constant(size, Scalar(0)); @@ -464,7 +463,7 @@ DenseBase::Zero(Index size) * \sa Zero(Index), Zero(Index,Index) */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Zero() { return Constant(Scalar(0)); @@ -479,11 +478,12 @@ DenseBase::Zero() * \sa class CwiseNullaryOp, Zero() */ template -bool DenseBase::isZero(const RealScalar& prec) const +EIGEN_DEVICE_FUNC bool DenseBase::isZero(const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) for(Index i = 0; i < rows(); ++i) - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; return true; } @@ -496,7 +496,7 @@ bool DenseBase::isZero(const RealScalar& prec) const * \sa class CwiseNullaryOp, Zero() */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setZero() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setZero() { return setConstant(Scalar(0)); } @@ -511,7 +511,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setZero() * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() */ template -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index newSize) { resize(newSize); @@ -520,8 +520,8 @@ PlainObjectBase::setZero(Index newSize) /** Resizes to the given size, and sets all coefficients in this expression to zero. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setZero_int_int.cpp * Output: \verbinclude Matrix_setZero_int_int.out @@ -529,10 +529,10 @@ PlainObjectBase::setZero(Index newSize) * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() */ template -EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setZero(Index nbRows, Index nbCols) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setZero(Index rows, Index cols) { - resize(nbRows, nbCols); + resize(rows, cols); return setConstant(Scalar(0)); } @@ -540,7 +540,7 @@ PlainObjectBase::setZero(Index nbRows, Index nbCols) /** \returns an expression of a matrix where all coefficients equal one. * - * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, @@ -553,10 +553,10 @@ PlainObjectBase::setZero(Index nbRows, Index nbCols) * \sa Ones(), Ones(Index), isOnes(), class Ones */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType -DenseBase::Ones(Index nbRows, Index nbCols) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Ones(Index rows, Index cols) { - return Constant(nbRows, nbCols, Scalar(1)); + return Constant(rows, cols, Scalar(1)); } /** \returns an expression of a vector where all coefficients equal one. @@ -576,7 +576,7 @@ DenseBase::Ones(Index nbRows, Index nbCols) * \sa Ones(), Ones(Index,Index), isOnes(), class Ones */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Ones(Index newSize) { return Constant(newSize, Scalar(1)); @@ -593,7 +593,7 @@ DenseBase::Ones(Index newSize) * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones */ template -EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Ones() { return Constant(Scalar(1)); @@ -608,7 +608,7 @@ DenseBase::Ones() * \sa class CwiseNullaryOp, Ones() */ template -bool DenseBase::isOnes +EIGEN_DEVICE_FUNC bool DenseBase::isOnes (const RealScalar& prec) const { return isApproxToConstant(Scalar(1), prec); @@ -622,7 +622,7 @@ bool DenseBase::isOnes * \sa class CwiseNullaryOp, Ones() */ template -EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() { return setConstant(Scalar(1)); } @@ -637,7 +637,7 @@ EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() */ template -EIGEN_STRONG_INLINE Derived& +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setOnes(Index newSize) { resize(newSize); @@ -646,8 +646,8 @@ PlainObjectBase::setOnes(Index newSize) /** Resizes to the given size, and sets all coefficients in this expression to one. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setOnes_int_int.cpp * Output: \verbinclude Matrix_setOnes_int_int.out @@ -655,10 +655,10 @@ PlainObjectBase::setOnes(Index newSize) * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() */ template -EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setOnes(Index nbRows, Index nbCols) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& +PlainObjectBase::setOnes(Index rows, Index cols) { - resize(nbRows, nbCols); + resize(rows, cols); return setConstant(Scalar(1)); } @@ -666,7 +666,7 @@ PlainObjectBase::setOnes(Index nbRows, Index nbCols) /** \returns an expression of the identity matrix (not necessarily square). * - * The parameters \a nbRows and \a nbCols are the number of rows and of columns of + * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, @@ -679,10 +679,10 @@ PlainObjectBase::setOnes(Index nbRows, Index nbCols) * \sa Identity(), setIdentity(), isIdentity() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType -MatrixBase::Identity(Index nbRows, Index nbCols) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +MatrixBase::Identity(Index rows, Index cols) { - return DenseBase::NullaryExpr(nbRows, nbCols, internal::scalar_identity_op()); + return DenseBase::NullaryExpr(rows, cols, internal::scalar_identity_op()); } /** \returns an expression of the identity matrix (not necessarily square). @@ -696,7 +696,7 @@ MatrixBase::Identity(Index nbRows, Index nbCols) * \sa Identity(Index,Index), setIdentity(), isIdentity() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType MatrixBase::Identity() { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) @@ -716,18 +716,19 @@ template bool MatrixBase::isIdentity (const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); for(Index j = 0; j < cols(); ++j) { for(Index i = 0; i < rows(); ++i) { if(i == j) { - if(!internal::isApprox(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isApprox(self.coeff(i, j), static_cast(1), prec)) return false; } else { - if(!internal::isMuchSmallerThan(this->coeff(i, j), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; } } @@ -740,6 +741,7 @@ namespace internal { template=16)> struct setIdentity_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Derived& run(Derived& m) { return m = Derived::Identity(m.rows(), m.cols()); @@ -749,11 +751,11 @@ struct setIdentity_impl template struct setIdentity_impl { - typedef typename Derived::Index Index; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Derived& run(Derived& m) { m.setZero(); - const Index size = (std::min)(m.rows(), m.cols()); + const Index size = numext::mini(m.rows(), m.cols()); for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1); return m; } @@ -769,15 +771,15 @@ struct setIdentity_impl * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() */ template -EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() { return internal::setIdentity_impl::run(derived()); } /** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setIdentity_int_int.cpp * Output: \verbinclude Matrix_setIdentity_int_int.out @@ -785,9 +787,9 @@ EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() */ template -EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index nbRows, Index nbCols) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index rows, Index cols) { - derived().resize(nbRows, nbCols); + derived().resize(rows, cols); return setIdentity(); } @@ -798,7 +800,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index nbRows, Inde * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index newSize, Index i) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index newSize, Index i) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return BasisReturnType(SquareMatrixType::Identity(newSize,newSize), i); @@ -813,7 +815,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index i) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit(Index i) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return BasisReturnType(SquareMatrixType::Identity(),i); @@ -826,7 +828,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitX() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitX() { return Derived::Unit(0); } /** \returns an expression of the Y axis unit vector (0,1{,0}^*) @@ -836,7 +838,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitY() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitY() { return Derived::Unit(1); } /** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) @@ -846,7 +848,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitZ() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitZ() { return Derived::Unit(2); } /** \returns an expression of the W axis unit vector (0,0,0,1) @@ -856,7 +858,7 @@ EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBa * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() */ template -EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitW() +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitW() { return Derived::Unit(3); } } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/CwiseTernaryOp.h b/external/eigen3/Eigen/src/Core/CwiseTernaryOp.h new file mode 100644 index 0000000..9f3576f --- /dev/null +++ b/external/eigen3/Eigen/src/Core/CwiseTernaryOp.h @@ -0,0 +1,197 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_TERNARY_OP_H +#define EIGEN_CWISE_TERNARY_OP_H + +namespace Eigen { + +namespace internal { +template +struct traits > { + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef typename remove_all::type Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Arg1, Arg2, and Arg3 to have the same scalar type + // (see CwiseTernaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of::type Scalar; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + typedef typename Arg1::Nested Arg1Nested; + typedef typename Arg2::Nested Arg2Nested; + typedef typename Arg3::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + enum { Flags = _Arg1Nested::Flags & RowMajorBit }; +}; +} // end namespace internal + +template +class CwiseTernaryOpImpl; + +/** \class CwiseTernaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise ternary operator is + * applied to two expressions + * + * \tparam TernaryOp template functor implementing the operator + * \tparam Arg1Type the type of the first argument + * \tparam Arg2Type the type of the second argument + * \tparam Arg3Type the type of the third argument + * + * This class represents an expression where a coefficient-wise ternary + * operator is applied to three expressions. + * It is the return type of ternary operators, by which we mean only those + * ternary operators where + * all three arguments are Eigen expressions. + * For example, the return type of betainc(matrix1, matrix2, matrix3) is a + * CwiseTernaryOp. + * + * Most of the time, this is the only way that it is used, so you typically + * don't have to name + * CwiseTernaryOp types explicitly. + * + * \sa MatrixBase::ternaryExpr(const MatrixBase &, const + * MatrixBase &, const CustomTernaryOp &) const, class CwiseBinaryOp, + * class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseTernaryOp : public CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>, + internal::no_assignment_operator +{ + public: + typedef typename internal::remove_all::type Arg1; + typedef typename internal::remove_all::type Arg2; + typedef typename internal::remove_all::type Arg3; + + typedef typename CwiseTernaryOpImpl< + TernaryOp, Arg1Type, Arg2Type, Arg3Type, + typename internal::traits::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp) + + typedef typename internal::ref_selector::type Arg1Nested; + typedef typename internal::ref_selector::type Arg2Nested; + typedef typename internal::ref_selector::type Arg3Nested; + typedef typename internal::remove_reference::type _Arg1Nested; + typedef typename internal::remove_reference::type _Arg2Nested; + typedef typename internal::remove_reference::type _Arg3Nested; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2, + const Arg3& a3, + const TernaryOp& func = TernaryOp()) + : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) { + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3) + + // The index types should match + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same< + typename internal::traits::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + + eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && + a1.rows() == a3.rows() && a1.cols() == a3.cols()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg3.rows(); + else if (internal::traits::type>:: + RowsAtCompileTime == Dynamic && + internal::traits::type>:: + RowsAtCompileTime == Dynamic) + return m_arg2.rows(); + else + return m_arg1.rows(); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg3.cols(); + else if (internal::traits::type>:: + ColsAtCompileTime == Dynamic && + internal::traits::type>:: + ColsAtCompileTime == Dynamic) + return m_arg2.cols(); + else + return m_arg1.cols(); + } + + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg1Nested& arg1() const { return m_arg1; } + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg2Nested& arg2() const { return m_arg2; } + /** \returns the third argument nested expression */ + EIGEN_DEVICE_FUNC + const _Arg3Nested& arg3() const { return m_arg3; } + /** \returns the functor representing the ternary operation */ + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + protected: + Arg1Nested m_arg1; + Arg2Nested m_arg2; + Arg3Nested m_arg3; + const TernaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseTernaryOpImpl + : public internal::generic_xpr_base< + CwiseTernaryOp >::type { + public: + typedef typename internal::generic_xpr_base< + CwiseTernaryOp >::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_TERNARY_OP_H diff --git a/external/eigen3/Eigen/src/Core/CwiseUnaryOp.h b/external/eigen3/Eigen/src/Core/CwiseUnaryOp.h index f7ee60e..1d2dd19 100644 --- a/external/eigen3/Eigen/src/Core/CwiseUnaryOp.h +++ b/external/eigen3/Eigen/src/Core/CwiseUnaryOp.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -13,41 +13,18 @@ namespace Eigen { -/** \class CwiseUnaryOp - * \ingroup Core_Module - * - * \brief Generic expression where a coefficient-wise unary operator is applied to an expression - * - * \param UnaryOp template functor implementing the operator - * \param XprType the type of the expression to which we are applying the unary operator - * - * This class represents an expression where a unary operator is applied to an expression. - * It is the return type of all operations taking exactly 1 input expression, regardless of the - * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix - * is considered unary, because only the right-hand side is an expression, and its - * return type is a specialization of CwiseUnaryOp. - * - * Most of the time, this is the only way that it is used, so you typically don't have to name - * CwiseUnaryOp types explicitly. - * - * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp - */ - namespace internal { template struct traits > : traits { typedef typename result_of< - UnaryOp(typename XprType::Scalar) + UnaryOp(const typename XprType::Scalar&) >::type Scalar; typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference::type _XprTypeNested; enum { - Flags = _XprTypeNested::Flags & ( - HereditaryBits | LinearAccessBit | AlignedBit - | (functor_traits::PacketAccess ? PacketAccessBit : 0)), - CoeffReadCost = EIGEN_ADD_COST(_XprTypeNested::CoeffReadCost, functor_traits::Cost) + Flags = _XprTypeNested::Flags & RowMajorBit }; }; } @@ -55,70 +32,70 @@ struct traits > template class CwiseUnaryOpImpl; +/** \class CwiseUnaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise unary operator is applied to an expression + * + * \tparam UnaryOp template functor implementing the operator + * \tparam XprType the type of the expression to which we are applying the unary operator + * + * This class represents an expression where a unary operator is applied to an expression. + * It is the return type of all operations taking exactly 1 input expression, regardless of the + * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix + * is considered unary, because only the right-hand side is an expression, and its + * return type is a specialization of CwiseUnaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseUnaryOp types explicitly. + * + * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp + */ template -class CwiseUnaryOp : internal::no_assignment_operator, - public CwiseUnaryOpImpl::StorageKind> +class CwiseUnaryOp : public CwiseUnaryOpImpl::StorageKind>, internal::no_assignment_operator { public: typedef typename CwiseUnaryOpImpl::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + typedef typename internal::ref_selector::type XprTypeNested; + typedef typename internal::remove_all::type NestedExpression; - inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} - EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index cols() const { return m_xpr.cols(); } /** \returns the functor representing the unary operation */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryOp& functor() const { return m_functor; } /** \returns the nested expression */ - const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& nestedExpression() const { return m_xpr; } /** \returns the nested expression */ - typename internal::remove_all::type& - nestedExpression() { return m_xpr.const_cast_derived(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename internal::remove_all::type& + nestedExpression() { return m_xpr; } protected: - typename XprType::Nested m_xpr; + XprTypeNested m_xpr; const UnaryOp m_functor; }; -// This is the generic implementation for dense storage. -// It can be used for any expression types implementing the dense concept. -template -class CwiseUnaryOpImpl - : public internal::dense_xpr_base >::type +// Generic API dispatcher +template +class CwiseUnaryOpImpl + : public internal::generic_xpr_base >::type { - public: - - typedef CwiseUnaryOp Derived; - typedef typename internal::dense_xpr_base >::type Base; - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) - - EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const - { - return derived().functor()(derived().nestedExpression().coeff(rowId, colId)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet(rowId, colId)); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index index) const - { - return derived().functor().packetOp(derived().nestedExpression().template packet(index)); - } +public: + typedef typename internal::generic_xpr_base >::type Base; }; } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/CwiseUnaryView.h b/external/eigen3/Eigen/src/Core/CwiseUnaryView.h index f3b2ffe..2710330 100644 --- a/external/eigen3/Eigen/src/Core/CwiseUnaryView.h +++ b/external/eigen3/Eigen/src/Core/CwiseUnaryView.h @@ -12,33 +12,19 @@ namespace Eigen { -/** \class CwiseUnaryView - * \ingroup Core_Module - * - * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector - * - * \param ViewOp template functor implementing the view - * \param MatrixType the type of the matrix we are applying the unary operator - * - * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector. - * It is the return type of real() and imag(), and most of the time this is the only way it is used. - * - * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp - */ - namespace internal { template struct traits > : traits { typedef typename result_of< - ViewOp(typename traits::Scalar) + ViewOp(const typename traits::Scalar&) >::type Scalar; typedef typename MatrixType::Nested MatrixTypeNested; typedef typename remove_all::type _MatrixTypeNested; enum { - Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), - CoeffReadCost = EIGEN_ADD_COST(traits<_MatrixTypeNested>::CoeffReadCost, functor_traits::Cost), + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions MatrixTypeInnerStride = inner_stride_at_compile_time::ret, // need to cast the sizeof's from size_t to int explicitly, otherwise: // "error: no integral type can represent all of the enumerator values @@ -55,6 +41,19 @@ struct traits > template class CwiseUnaryViewImpl; +/** \class CwiseUnaryView + * \ingroup Core_Module + * + * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector + * + * \tparam ViewOp template functor implementing the view + * \tparam MatrixType the type of the matrix we are applying the unary operator + * + * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector. + * It is the return type of real() and imag(), and most of the time this is the only way it is used. + * + * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp + */ template class CwiseUnaryView : public CwiseUnaryViewImpl::StorageKind> { @@ -62,8 +61,10 @@ class CwiseUnaryView : public CwiseUnaryViewImpl::StorageKind>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + typedef typename internal::ref_selector::non_const_type MatrixTypeNested; + typedef typename internal::remove_all::type NestedExpression; - inline CwiseUnaryView(const MatrixType& mat, const ViewOp& func = ViewOp()) + explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) : m_matrix(mat), m_functor(func) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) @@ -75,19 +76,27 @@ class CwiseUnaryView : public CwiseUnaryViewImpl::type& + const typename internal::remove_all::type& nestedExpression() const { return m_matrix; } /** \returns the nested expression */ - typename internal::remove_all::type& + typename internal::remove_reference::type& nestedExpression() { return m_matrix.const_cast_derived(); } protected: - // FIXME changed from MatrixType::Nested because of a weird compilation error with sun CC - typename internal::nested::type m_matrix; + MatrixTypeNested m_matrix; ViewOp m_functor; }; +// Generic API dispatcher +template +class CwiseUnaryViewImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; +}; + template class CwiseUnaryViewImpl : public internal::dense_xpr_base< CwiseUnaryView >::type @@ -100,38 +109,18 @@ class CwiseUnaryViewImpl EIGEN_DENSE_PUBLIC_INTERFACE(Derived) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) - inline Scalar* data() { return &coeffRef(0); } - inline const Scalar* data() const { return &coeff(0); } + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeff(0)); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride() * sizeof(typename internal::traits::Scalar) / sizeof(Scalar); } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const - { - return derived().functor()(derived().nestedExpression().coeff(row, col)); - } - - EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - return derived().functor()(derived().nestedExpression().coeff(index)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(row, col)); - } - - EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) - { - return derived().functor()(const_cast_derived().nestedExpression().coeffRef(index)); - } }; } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/DenseBase.h b/external/eigen3/Eigen/src/Core/DenseBase.h index 4b371b0..90066ae 100644 --- a/external/eigen3/Eigen/src/Core/DenseBase.h +++ b/external/eigen3/Eigen/src/Core/DenseBase.h @@ -34,37 +34,45 @@ static inline void check_DenseIndex_is_signed() { * \tparam Derived is the derived type, e.g., a matrix type or an expression. * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template class DenseBase #ifndef EIGEN_PARSED_BY_DOXYGEN - : public internal::special_scalar_op_base::Scalar, - typename NumTraits::Scalar>::Real, - DenseCoeffsBase > -#else : public DenseCoeffsBase +#else + : public DenseCoeffsBase #endif // not EIGEN_PARSED_BY_DOXYGEN { public: - class InnerIterator; + /** Inner iterator type to iterate over the coefficients of a row or column. + * \sa class InnerIterator + */ + typedef Eigen::InnerIterator InnerIterator; typedef typename internal::traits::StorageKind StorageKind; - /** \brief The type of indices - * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. - * \sa \ref TopicPreprocessorDirectives. - */ - typedef typename internal::traits::Index Index; + /** + * \brief The type used to store indices + * \details This typedef is relevant for types that store multiple indices such as + * PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index + * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase. + */ + typedef typename internal::traits::StorageIndex StorageIndex; + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. */ typedef typename internal::traits::Scalar Scalar; - typedef typename internal::packet_traits::type PacketScalar; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. + * + * It is an alias for the Scalar type */ + typedef Scalar value_type; + typedef typename NumTraits::Real RealScalar; - typedef internal::special_scalar_op_base > Base; + typedef DenseCoeffsBase Base; - using Base::operator*; using Base::derived; using Base::const_cast_derived; using Base::rows; @@ -74,16 +82,6 @@ template class DenseBase using Base::colIndexByOuterInner; using Base::coeff; using Base::coeffByOuterInner; - using Base::packet; - using Base::packetByOuterInner; - using Base::writePacket; - using Base::writePacketByOuterInner; - using Base::coeffRef; - using Base::coeffRefByOuterInner; - using Base::copyCoeff; - using Base::copyCoeffByOuterInner; - using Base::copyPacket; - using Base::copyPacketByOuterInner; using Base::operator(); using Base::operator[]; using Base::x; @@ -169,19 +167,46 @@ template class DenseBase InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) : int(IsRowMajor) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), - CoeffReadCost = internal::traits::CoeffReadCost, - /**< This is a rough measure of how expensive it is to read one coefficient from - * this expression. - */ - InnerStrideAtCompileTime = internal::inner_stride_at_compile_time::ret, OuterStrideAtCompileTime = internal::outer_stride_at_compile_time::ret }; + + typedef typename internal::find_best_packet::type PacketScalar; - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 0 }; + + /** The plain matrix type corresponding to this expression. + * \sa PlainObject */ + typedef Matrix::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainMatrix; + + /** The plain array type corresponding to this expression. + * \sa PlainObject */ + typedef Array::Scalar, + internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime + > PlainArray; + + /** \brief The plain matrix or array type corresponding to this expression. + * + * This is not necessarily exactly the return type of eval(). In the case of plain matrices, + * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed + * that the return type of eval() is either PlainObject or const PlainObject&. + */ + typedef typename internal::conditional::XprKind,MatrixXpr >::value, + PlainMatrix, PlainArray>::type PlainObject; /** \returns the number of nonzero coefficients which is in practice the number * of stored coefficients. */ + EIGEN_DEVICE_FUNC inline Index nonZeros() const { return size(); } /** \returns the outer size. @@ -189,6 +214,7 @@ template class DenseBase * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a * column-major matrix, and the number of rows for a row-major matrix. */ + EIGEN_DEVICE_FUNC Index outerSize() const { return IsVectorAtCompileTime ? 1 @@ -200,6 +226,7 @@ template class DenseBase * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a * column-major matrix, and the number of columns for a row-major matrix. */ + EIGEN_DEVICE_FUNC Index innerSize() const { return IsVectorAtCompileTime ? this->size() @@ -210,6 +237,7 @@ template class DenseBase * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does * nothing else. */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { EIGEN_ONLY_USED_FOR_DEBUG(newSize); @@ -220,22 +248,22 @@ template class DenseBase * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does * nothing else. */ - void resize(Index nbRows, Index nbCols) + EIGEN_DEVICE_FUNC + void resize(Index rows, Index cols) { - EIGEN_ONLY_USED_FOR_DEBUG(nbRows); - EIGEN_ONLY_USED_FOR_DEBUG(nbCols); - eigen_assert(nbRows == this->rows() && nbCols == this->cols() + EIGEN_ONLY_USED_FOR_DEBUG(rows); + EIGEN_ONLY_USED_FOR_DEBUG(cols); + eigen_assert(rows == this->rows() && cols == this->cols() && "DenseBase::resize() does not actually allow to resize."); } #ifndef EIGEN_PARSED_BY_DOXYGEN - /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; - /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */ - typedef CwiseNullaryOp,Derived> SequentialLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; + /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */ + typedef CwiseNullaryOp,PlainObject> SequentialLinSpacedReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ - typedef CwiseNullaryOp,Derived> RandomAccessLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> RandomAccessLinSpacedReturnType; /** \internal the return type of MatrixBase::eigenvalues() */ typedef Matrix::Scalar>::Real, internal::traits::ColsAtCompileTime, 1> EigenvaluesReturnType; @@ -243,120 +271,133 @@ template class DenseBase /** Copies \a other into *this. \returns a reference to *this. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); template + EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase &other); template + EIGEN_DEVICE_FUNC Derived& operator+=(const EigenBase &other); template + EIGEN_DEVICE_FUNC Derived& operator-=(const EigenBase &other); template + EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& func); - /** \internal Copies \a other into *this without evaluating other. \returns a reference to *this. */ + /** \internal + * Copies \a other into *this without evaluating other. \returns a reference to *this. + * \deprecated */ template + EIGEN_DEVICE_FUNC Derived& lazyAssign(const DenseBase& other); - /** \internal Evaluates \a other into *this. \returns a reference to *this. */ - template - Derived& lazyAssign(const ReturnByValue& other); - + EIGEN_DEVICE_FUNC CommaInitializer operator<< (const Scalar& s); + /** \deprecated it now returns \c *this */ template - const Flagged flagged() const; + EIGEN_DEPRECATED + const Derived& flagged() const + { return derived(); } template + EIGEN_DEVICE_FUNC CommaInitializer operator<< (const DenseBase& other); - Eigen::Transpose transpose(); - typedef typename internal::add_const >::type ConstTransposeReturnType; + typedef Transpose TransposeReturnType; + EIGEN_DEVICE_FUNC + TransposeReturnType transpose(); + typedef typename internal::add_const >::type ConstTransposeReturnType; + EIGEN_DEVICE_FUNC ConstTransposeReturnType transpose() const; + EIGEN_DEVICE_FUNC void transposeInPlace(); -#ifndef EIGEN_NO_DEBUG - protected: - template - void checkTransposeAliasing(const OtherDerived& other) const; - public: -#endif - - static const ConstantReturnType + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index rows, Index cols, const Scalar& value); - static const ConstantReturnType + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index size, const Scalar& value); - static const ConstantReturnType + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(const Scalar& value); - static const SequentialLinSpacedReturnType + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high); - static const RandomAccessLinSpacedReturnType + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Index size, const Scalar& low, const Scalar& high); - static const SequentialLinSpacedReturnType + EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType LinSpaced(Sequential_t, const Scalar& low, const Scalar& high); - static const RandomAccessLinSpacedReturnType + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(const Scalar& low, const Scalar& high); - template - static const CwiseNullaryOp + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func); - template - static const CwiseNullaryOp + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp NullaryExpr(Index size, const CustomNullaryOp& func); - template - static const CwiseNullaryOp + template EIGEN_DEVICE_FUNC + static const CwiseNullaryOp NullaryExpr(const CustomNullaryOp& func); - static const ConstantReturnType Zero(Index rows, Index cols); - static const ConstantReturnType Zero(Index size); - static const ConstantReturnType Zero(); - static const ConstantReturnType Ones(Index rows, Index cols); - static const ConstantReturnType Ones(Index size); - static const ConstantReturnType Ones(); - - void fill(const Scalar& value); - Derived& setConstant(const Scalar& value); - Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); - Derived& setLinSpaced(const Scalar& low, const Scalar& high); - Derived& setZero(); - Derived& setOnes(); - Derived& setRandom(); - - template + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(); + + EIGEN_DEVICE_FUNC void fill(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setZero(); + EIGEN_DEVICE_FUNC Derived& setOnes(); + EIGEN_DEVICE_FUNC Derived& setRandom(); + + template EIGEN_DEVICE_FUNC bool isApprox(const DenseBase& other, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const RealScalar& other, const RealScalar& prec = NumTraits::dummy_precision()) const; - template + template EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const DenseBase& other, const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; - bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; inline bool hasNaN() const; inline bool allFinite() const; - inline Derived& operator*=(const Scalar& other); - inline Derived& operator/=(const Scalar& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator*=(const Scalar& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator/=(const Scalar& other); typedef typename internal::add_const_on_value_type::type>::type EvalReturnType; /** \returns the matrix or vector obtained by evaluating this expression. * * Notice that in the case of a plain matrix or vector (not an expression) this function just returns * a const reference, in order to avoid a useless copy. + * + * \warning Be carefull with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page \endlink. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvalReturnType eval() const { // Even though MSVC does not honor strong inlining when the return type @@ -364,61 +405,78 @@ template class DenseBase // size types on MSVC. return typename internal::eval::type(derived()); } - + /** swaps *this with the expression \a other. * */ template - void swap(const DenseBase& other, - int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase) + EIGEN_DEVICE_FUNC + void swap(const DenseBase& other) { - SwapWrapper(derived()).lazyAssign(other.derived()); + EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); } /** swaps *this with the matrix or array \a other. * */ template + EIGEN_DEVICE_FUNC void swap(PlainObjectBase& other) { - SwapWrapper(derived()).lazyAssign(other.derived()); + eigen_assert(rows()==other.rows() && cols()==other.cols()); + call_assignment(derived(), other.derived(), internal::swap_assign_op()); } + EIGEN_DEVICE_FUNC inline const NestByValue nestByValue() const; + EIGEN_DEVICE_FUNC inline const ForceAlignedAccess forceAlignedAccess() const; + EIGEN_DEVICE_FUNC inline ForceAlignedAccess forceAlignedAccess(); + template EIGEN_DEVICE_FUNC + inline const typename internal::conditional,Derived&>::type forceAlignedAccessIf() const; + template EIGEN_DEVICE_FUNC + inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); - inline const NestByValue nestByValue() const; - inline const ForceAlignedAccess forceAlignedAccess() const; - inline ForceAlignedAccess forceAlignedAccess(); - template inline const typename internal::conditional,Derived&>::type forceAlignedAccessIf() const; - template inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); - - Scalar sum() const; - Scalar mean() const; - Scalar trace() const; + EIGEN_DEVICE_FUNC Scalar sum() const; + EIGEN_DEVICE_FUNC Scalar mean() const; + EIGEN_DEVICE_FUNC Scalar trace() const; - Scalar prod() const; + EIGEN_DEVICE_FUNC Scalar prod() const; - typename internal::traits::Scalar minCoeff() const; - typename internal::traits::Scalar maxCoeff() const; + EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff() const; + EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff() const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* index) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* index) const; template - typename internal::result_of::Scalar)>::type - redux(const BinaryOp& func) const; + EIGEN_DEVICE_FUNC + Scalar redux(const BinaryOp& func) const; template + EIGEN_DEVICE_FUNC void visit(Visitor& func) const; - inline const WithFormat format(const IOFormat& fmt) const; + /** \returns a WithFormat proxy object allowing to print a matrix the with given + * format \a fmt. + * + * See class IOFormat for some examples. + * + * \sa class IOFormat, class WithFormat + */ + inline const WithFormat format(const IOFormat& fmt) const + { + return WithFormat(derived(), fmt); + } /** \returns the unique coefficient of a 1x1 expression */ + EIGEN_DEVICE_FUNC CoeffReturnType value() const { EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) @@ -426,23 +484,44 @@ template class DenseBase return derived().coeff(0,0); } - bool all(void) const; - bool any(void) const; - Index count() const; + EIGEN_DEVICE_FUNC bool all() const; + EIGEN_DEVICE_FUNC bool any() const; + EIGEN_DEVICE_FUNC Index count() const; typedef VectorwiseOp RowwiseReturnType; typedef const VectorwiseOp ConstRowwiseReturnType; typedef VectorwiseOp ColwiseReturnType; typedef const VectorwiseOp ConstColwiseReturnType; - ConstRowwiseReturnType rowwise() const; - RowwiseReturnType rowwise(); - ConstColwiseReturnType colwise() const; - ColwiseReturnType colwise(); + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_rowwise.cpp + * Output: \verbinclude MatrixBase_rowwise.out + * + * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const { + return ConstRowwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC RowwiseReturnType rowwise(); + + /** \returns a VectorwiseOp wrapper of *this providing additional partial reduction operations + * + * Example: \include MatrixBase_colwise.cpp + * Output: \verbinclude MatrixBase_colwise.out + * + * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const { + return ConstColwiseReturnType(derived()); + } + EIGEN_DEVICE_FUNC ColwiseReturnType colwise(); - static const CwiseNullaryOp,Derived> Random(Index rows, Index cols); - static const CwiseNullaryOp,Derived> Random(Index size); - static const CwiseNullaryOp,Derived> Random(); + typedef CwiseNullaryOp,PlainObject> RandomReturnType; + static const RandomReturnType Random(Index rows, Index cols); + static const RandomReturnType Random(Index size); + static const RandomReturnType Random(); template const Select @@ -460,45 +539,56 @@ template class DenseBase template RealScalar lpNorm() const; template - inline const Replicate replicate() const; - - typedef Replicate ReplicateReturnType; - inline const ReplicateReturnType replicate(Index rowFacor,Index colFactor) const; + EIGEN_DEVICE_FUNC + const Replicate replicate() const; + /** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate_int_int.cpp + * Output: \verbinclude MatrixBase_replicate_int_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC + const Replicate replicate(Index rowFactor, Index colFactor) const + { + return Replicate(derived(), rowFactor, colFactor); + } typedef Reverse ReverseReturnType; typedef const Reverse ConstReverseReturnType; - ReverseReturnType reverse(); - ConstReverseReturnType reverse() const; - void reverseInPlace(); + EIGEN_DEVICE_FUNC ReverseReturnType reverse(); + /** This is the const version of reverse(). */ + //Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const + { + return ConstReverseReturnType(derived()); + } + EIGEN_DEVICE_FUNC void reverseInPlace(); #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) # include "../plugins/BlockMethods.h" # ifdef EIGEN_DENSEBASE_PLUGIN # include EIGEN_DENSEBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS - -#ifdef EIGEN2_SUPPORT - - Block corner(CornerType type, Index cRows, Index cCols); - const Block corner(CornerType type, Index cRows, Index cCols) const; - template - Block corner(CornerType type); - template - const Block corner(CornerType type) const; - -#endif // EIGEN2_SUPPORT - +#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF // disable the use of evalTo for dense objects with a nice compilation error - template inline void evalTo(Dest& ) const + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& ) const { EIGEN_STATIC_ASSERT((internal::is_same::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS); } protected: /** Default constructor. Do nothing. */ - DenseBase() + EIGEN_DEVICE_FUNC DenseBase() { /* Just checks for self-consistency of the flags. * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down @@ -511,9 +601,9 @@ template class DenseBase } private: - explicit DenseBase(int); - DenseBase(int,int); - template explicit DenseBase(const DenseBase&); + EIGEN_DEVICE_FUNC explicit DenseBase(int); + EIGEN_DEVICE_FUNC DenseBase(int,int); + template EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase&); }; } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/DenseCoeffsBase.h b/external/eigen3/Eigen/src/Core/DenseCoeffsBase.h index 3c890f2..c4af48a 100644 --- a/external/eigen3/Eigen/src/Core/DenseCoeffsBase.h +++ b/external/eigen3/Eigen/src/Core/DenseCoeffsBase.h @@ -35,7 +35,6 @@ class DenseCoeffsBase : public EigenBase { public: typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; @@ -61,6 +60,7 @@ class DenseCoeffsBase : public EigenBase using Base::size; using Base::derived; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const { return int(Derived::RowsAtCompileTime) == 1 ? 0 @@ -69,6 +69,7 @@ class DenseCoeffsBase : public EigenBase : inner; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const { return int(Derived::ColsAtCompileTime) == 1 ? 0 @@ -91,13 +92,15 @@ class DenseCoeffsBase : public EigenBase * * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeff(row, col); + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeff(row,col); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { return coeff(rowIndexByOuterInner(outer, inner), @@ -108,11 +111,12 @@ class DenseCoeffsBase : public EigenBase * * \sa operator()(Index,Index), operator[](Index) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeff(row, col); + return coeff(row, col); } /** Short version: don't use this function, use @@ -130,11 +134,14 @@ class DenseCoeffsBase : public EigenBase * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); - return derived().coeff(index); + return internal::evaluator(derived()).coeff(index); } @@ -146,15 +153,14 @@ class DenseCoeffsBase : public EigenBase * z() const, w() const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator[](Index index) const { - #ifndef EIGEN2_SUPPORT EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) - #endif eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** \returns the coefficient at given index. @@ -167,32 +173,49 @@ class DenseCoeffsBase : public EigenBase * z() const, w() const */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType operator()(Index index) const { eigen_assert(index >= 0 && index < size()); - return derived().coeff(index); + return coeff(index); } /** equivalent to operator[](0). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType x() const { return (*this)[0]; } /** equivalent to operator[](1). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType - y() const { return (*this)[1]; } + y() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; + } /** equivalent to operator[](2). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType - z() const { return (*this)[2]; } + z() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; + } /** equivalent to operator[](3). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType - w() const { return (*this)[3]; } + w() const + { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; + } /** \internal * \returns the packet of coefficients starting at the given row and column. It is your responsibility @@ -207,9 +230,9 @@ class DenseCoeffsBase : public EigenBase template EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().template packet(row,col); + typedef typename internal::packet_traits::type DefaultPacketType; + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return internal::evaluator(derived()).template packet(row,col); } @@ -234,8 +257,11 @@ class DenseCoeffsBase : public EigenBase template EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + typedef typename internal::packet_traits::type DefaultPacketType; eigen_internal_assert(index >= 0 && index < size()); - return derived().template packet(index); + return internal::evaluator(derived()).template packet(index); } protected: @@ -278,7 +304,6 @@ class DenseCoeffsBase : public DenseCoeffsBase Base; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; @@ -311,13 +336,15 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() - && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeffRef(row,col); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRefByOuterInner(Index outer, Index inner) { @@ -330,12 +357,13 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); - return derived().coeffRef(row, col); + return coeffRef(row, col); } @@ -354,11 +382,14 @@ class DenseCoeffsBase : public DenseCoeffsBase::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); - return derived().coeffRef(index); + return internal::evaluator(derived()).coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -368,15 +399,14 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** \returns a reference to the coefficient at given index. @@ -388,167 +418,49 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); - return derived().coeffRef(index); + return coeffRef(index); } /** equivalent to operator[](0). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& x() { return (*this)[0]; } /** equivalent to operator[](1). */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& - y() { return (*this)[1]; } - - /** equivalent to operator[](2). */ - - EIGEN_STRONG_INLINE Scalar& - z() { return (*this)[2]; } - - /** equivalent to operator[](3). */ - - EIGEN_STRONG_INLINE Scalar& - w() { return (*this)[3]; } - - /** \internal - * Stores the given packet of coefficients, at the given row and column of this expression. It is your responsibility - * to ensure that a packet really starts there. This method is only available on expressions having the - * PacketAccessBit. - * - * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select - * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets - * starting at an address which is a multiple of the packet size. - */ - - template - EIGEN_STRONG_INLINE void writePacket - (Index row, Index col, const typename internal::packet_traits::type& val) + y() { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket(row,col,val); + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; } + /** equivalent to operator[](2). */ - /** \internal */ - template - EIGEN_STRONG_INLINE void writePacketByOuterInner - (Index outer, Index inner, const typename internal::packet_traits::type& val) - { - writePacket(rowIndexByOuterInner(outer, inner), - colIndexByOuterInner(outer, inner), - val); - } - - /** \internal - * Stores the given packet of coefficients, at the given index in this expression. It is your responsibility - * to ensure that a packet really starts there. This method is only available on expressions having the - * PacketAccessBit and the LinearAccessBit. - * - * The \a LoadMode parameter may have the value \a Aligned or \a Unaligned. Its effect is to select - * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets - * starting at an address which is a multiple of the packet size. - */ - template - EIGEN_STRONG_INLINE void writePacket - (Index index, const typename internal::packet_traits::type& val) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket(index,val); - } - -#ifndef EIGEN_PARSED_BY_DOXYGEN - - /** \internal Copies the coefficient at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase& other) - { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().coeffRef(row, col) = other.derived().coeff(row, col); - } - - /** \internal Copies the coefficient at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().coeffRef(index) = other.derived().coeff(index); - } - - - template - EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase& other) - { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().copyCoeff(row, col, other); - } - - /** \internal Copies the packet at position (row,col) of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyPacket(Index row, Index col, const DenseBase& other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + z() { - eigen_internal_assert(row >= 0 && row < rows() - && col >= 0 && col < cols()); - derived().template writePacket(row, col, - other.derived().template packet(row, col)); + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; } - /** \internal Copies the packet at the given index of other into *this. - * - * This method is overridden in SwapWrapper, allowing swap() assignments to share 99% of their code - * with usual assignments. - * - * Outside of this internal usage, this method has probably no usefulness. It is hidden in the public API dox. - */ - - template - EIGEN_STRONG_INLINE void copyPacket(Index index, const DenseBase& other) - { - eigen_internal_assert(index >= 0 && index < size()); - derived().template writePacket(index, - other.derived().template packet(index)); - } + /** equivalent to operator[](3). */ - /** \internal */ - template - EIGEN_STRONG_INLINE void copyPacketByOuterInner(Index outer, Index inner, const DenseBase& other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& + w() { - const Index row = rowIndexByOuterInner(outer,inner); - const Index col = colIndexByOuterInner(outer,inner); - // derived() is important here: copyCoeff() may be reimplemented in Derived! - derived().template copyPacket< OtherDerived, StoreMode, LoadMode>(row, col, other); + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime==-1 || Derived::SizeAtCompileTime>=4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; } -#endif - }; /** \brief Base class providing direct read-only coefficient access to matrices and arrays. @@ -560,7 +472,7 @@ class DenseCoeffsBase : public DenseCoeffsBase which defines functions to access entries read-only using * \c operator() . * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template class DenseCoeffsBase : public DenseCoeffsBase @@ -568,7 +480,6 @@ class DenseCoeffsBase : public DenseCoeffsBase Base; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename NumTraits::Real RealScalar; @@ -581,6 +492,7 @@ class DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase : public DenseCoeffsBase which defines functions to access entries read/write using * \c operator(). * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template class DenseCoeffsBase @@ -639,7 +554,6 @@ class DenseCoeffsBase public: typedef DenseCoeffsBase Base; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename NumTraits::Real RealScalar; @@ -652,6 +566,7 @@ class DenseCoeffsBase * * \sa outerStride(), rowStride(), colStride() */ + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().innerStride(); @@ -662,6 +577,7 @@ class DenseCoeffsBase * * \sa innerStride(), rowStride(), colStride() */ + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().outerStride(); @@ -677,6 +593,7 @@ class DenseCoeffsBase * * \sa innerStride(), outerStride(), colStride() */ + EIGEN_DEVICE_FUNC inline Index rowStride() const { return Derived::IsRowMajor ? outerStride() : innerStride(); @@ -686,6 +603,7 @@ class DenseCoeffsBase * * \sa innerStride(), outerStride(), rowStride() */ + EIGEN_DEVICE_FUNC inline Index colStride() const { return Derived::IsRowMajor ? innerStride() : outerStride(); @@ -694,33 +612,42 @@ class DenseCoeffsBase namespace internal { -template +template struct first_aligned_impl { - static inline typename Derived::Index run(const Derived&) + static inline Index run(const Derived&) { return 0; } }; -template -struct first_aligned_impl +template +struct first_aligned_impl { - static inline typename Derived::Index run(const Derived& m) + static inline Index run(const Derived& m) { - return internal::first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size()); + return internal::first_aligned(m.data(), m.size()); } }; -/** \internal \returns the index of the first element of the array that is well aligned for vectorization. +/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect to \a Alignment for vectorization. + * + * \tparam Alignment requested alignment in Bytes. * * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more * documentation. */ +template +static inline Index first_aligned(const DenseBase& m) +{ + enum { ReturnZero = (int(evaluator::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) }; + return first_aligned_impl::run(m.derived()); +} + template -static inline typename Derived::Index first_aligned(const Derived& m) +static inline Index first_default_aligned(const DenseBase& m) { - return first_aligned_impl - - ::run(m); + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type DefaultPacketType; + return internal::first_aligned::alignment),Derived>(m); } template::ret> diff --git a/external/eigen3/Eigen/src/Core/DenseStorage.h b/external/eigen3/Eigen/src/Core/DenseStorage.h index 568493c..7958fee 100644 --- a/external/eigen3/Eigen/src/Core/DenseStorage.h +++ b/external/eigen3/Eigen/src/Core/DenseStorage.h @@ -3,7 +3,7 @@ // // Copyright (C) 2008 Gael Guennebaud // Copyright (C) 2006-2009 Benoit Jacob -// Copyright (C) 2010 Hauke Heibel +// Copyright (C) 2010-2013 Hauke Heibel // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -13,9 +13,9 @@ #define EIGEN_MATRIXSTORAGE_H #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN EIGEN_DENSE_STORAGE_CTOR_PLUGIN; + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) X; EIGEN_DENSE_STORAGE_CTOR_PLUGIN; #else - #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + #define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) #endif namespace Eigen { @@ -24,7 +24,9 @@ namespace internal { struct constructor_without_unaligned_array_assert {}; -template void check_static_allocation_size() +template +EIGEN_DEVICE_FUNC +void check_static_allocation_size() { // if EIGEN_STACK_ALLOCATION_LIMIT is defined to 0, then no limit #if EIGEN_STACK_ALLOCATION_LIMIT @@ -38,18 +40,19 @@ template void check_static_allocation_size() */ template + : compute_default_alignment::value > struct plain_array { T array[Size]; - plain_array() + EIGEN_DEVICE_FUNC + plain_array() { check_static_allocation_size(); } - plain_array(constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) { check_static_allocation_size(); } @@ -64,29 +67,88 @@ struct plain_array template EIGEN_ALWAYS_INLINE PtrType eigen_unaligned_array_assert_workaround_gcc47(PtrType array) { return array; } #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((reinterpret_cast(eigen_unaligned_array_assert_workaround_gcc47(array)) & sizemask) == 0 \ + eigen_assert((internal::UIntPtr(eigen_unaligned_array_assert_workaround_gcc47(array)) & (sizemask)) == 0 \ && "this assertion is explained here: " \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ " **** READ THIS WEB PAGE !!! ****"); #else #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \ - eigen_assert((reinterpret_cast(array) & sizemask) == 0 \ + eigen_assert((internal::UIntPtr(array) & (sizemask)) == 0 \ && "this assertion is explained here: " \ "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ " **** READ THIS WEB PAGE !!! ****"); #endif +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + template struct plain_array { - EIGEN_USER_ALIGN16 T array[Size]; + EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size]; + EIGEN_DEVICE_FUNC plain_array() { - EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf); + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15); check_static_allocation_size(); } + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC + plain_array(constructor_without_unaligned_array_assert) + { + check_static_allocation_size(); + } +}; + +template +struct plain_array +{ + EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size]; + + EIGEN_DEVICE_FUNC + plain_array() + { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63); + check_static_allocation_size(); + } + + EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) { check_static_allocation_size(); @@ -96,9 +158,9 @@ struct plain_array template struct plain_array { - EIGEN_USER_ALIGN16 T array[1]; - plain_array() {} - plain_array(constructor_without_unaligned_array_assert) {} + T array[1]; + EIGEN_DEVICE_FUNC plain_array() {} + EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {} }; } // end namespace internal @@ -122,41 +184,54 @@ template class DenseSt { internal::plain_array m_data; public: - DenseStorage() {} - DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } + EIGEN_DEVICE_FUNC + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()) {} - DenseStorage(const DenseStorage& other) : m_data(other.m_data) {} + EIGEN_DEVICE_FUNC + DenseStorage(const DenseStorage& other) : m_data(other.m_data) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) - { + { if (this != &other) m_data = other.m_data; - return *this; + return *this; } - DenseStorage(DenseIndex,DenseIndex,DenseIndex) {} - void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } - static DenseIndex rows(void) {return _Rows;} - static DenseIndex cols(void) {return _Cols;} - void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {} - void resize(DenseIndex,DenseIndex,DenseIndex) {} - const T *data() const { return m_data.array; } - T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + eigen_internal_assert(size==rows*cols && rows==_Rows && cols==_Cols); + EIGEN_UNUSED_VARIABLE(size); + EIGEN_UNUSED_VARIABLE(rows); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // null matrix template class DenseStorage { public: - DenseStorage() {} - DenseStorage(internal::constructor_without_unaligned_array_assert) {} - DenseStorage(const DenseStorage&) {} - DenseStorage& operator=(const DenseStorage&) { return *this; } - DenseStorage(DenseIndex,DenseIndex,DenseIndex) {} - void swap(DenseStorage& ) {} - static DenseIndex rows(void) {return _Rows;} - static DenseIndex cols(void) {return _Cols;} - void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {} - void resize(DenseIndex,DenseIndex,DenseIndex) {} - const T *data() const { return 0; } - T *data() { return 0; } + EIGEN_DEVICE_FUNC DenseStorage() {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) {} + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage&) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage&) { return *this; } + EIGEN_DEVICE_FUNC DenseStorage(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& ) {} + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC void resize(Index,Index,Index) {} + EIGEN_DEVICE_FUNC const T *data() const { return 0; } + EIGEN_DEVICE_FUNC T *data() { return 0; } }; // more specializations for null matrices; these are necessary to resolve ambiguities @@ -173,74 +248,74 @@ template class DenseStorage class DenseStorage { internal::plain_array m_data; - DenseIndex m_rows; - DenseIndex m_cols; + Index m_rows; + Index m_cols; public: - DenseStorage() : m_rows(0), m_cols(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0), m_cols(0) {} - DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {} - DenseStorage& operator=(const DenseStorage& other) - { + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { if (this != &other) { m_data = other.m_data; m_rows = other.m_rows; m_cols = other.m_cols; } - return *this; + return *this; } - DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) : m_rows(nbRows), m_cols(nbCols) {} - void swap(DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index cols) : m_rows(rows), m_cols(cols) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } - DenseIndex rows() const {return m_rows;} - DenseIndex cols() const {return m_cols;} - void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; } - void resize(DenseIndex, DenseIndex nbRows, DenseIndex nbCols) { m_rows = nbRows; m_cols = nbCols; } - const T *data() const { return m_data.array; } - T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC Index rows() const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols() const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index cols) { m_rows = rows; m_cols = cols; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // dynamic-size matrix with fixed-size storage and fixed width template class DenseStorage { internal::plain_array m_data; - DenseIndex m_rows; + Index m_rows; public: - DenseStorage() : m_rows(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_rows(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_rows(0) {} - DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {} - DenseStorage& operator=(const DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_rows(other.m_rows) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { if (this != &other) { m_data = other.m_data; m_rows = other.m_rows; } - return *this; + return *this; } - DenseStorage(DenseIndex, DenseIndex nbRows, DenseIndex) : m_rows(nbRows) {} - void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } - DenseIndex rows(void) const {return m_rows;} - DenseIndex cols(void) const {return _Cols;} - void conservativeResize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; } - void resize(DenseIndex, DenseIndex nbRows, DenseIndex) { m_rows = nbRows; } - const T *data() const { return m_data.array; } - T *data() { return m_data.array; } + EIGEN_DEVICE_FUNC DenseStorage(Index, Index rows, Index) : m_rows(rows) {} + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return _Cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC void resize(Index, Index rows, Index) { m_rows = rows; } + EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; } + EIGEN_DEVICE_FUNC T *data() { return m_data.array; } }; // dynamic-size matrix with fixed-size storage and fixed height template class DenseStorage { internal::plain_array m_data; - DenseIndex m_cols; + Index m_cols; public: - DenseStorage() : m_cols(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(internal::constructor_without_unaligned_array_assert()), m_cols(0) {} - DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {} - DenseStorage& operator=(const DenseStorage& other) + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) : m_data(other.m_data), m_cols(other.m_cols) {} + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) { if (this != &other) { @@ -249,38 +324,62 @@ template class DenseStorage class DenseStorage { T *m_data; - DenseIndex m_rows; - DenseIndex m_cols; + Index m_rows; + Index m_cols; public: - DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0), m_cols(0) {} + EIGEN_DEVICE_FUNC explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0), m_cols(0) {} - DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) - : m_data(internal::conditional_aligned_new_auto(size)), m_rows(nbRows), m_cols(nbCols) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - DenseStorage(DenseStorage&& other) + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) + : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + eigen_internal_assert(size==rows*cols && rows>=0 && cols >=0); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*other.m_cols)) + , m_rows(other.m_rows) + , m_cols(other.m_cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*m_cols) + internal::smart_copy(other.m_data, other.m_data+other.m_rows*other.m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT : m_data(std::move(other.m_data)) , m_rows(std::move(other.m_rows)) , m_cols(std::move(other.m_cols)) { other.m_data = nullptr; + other.m_rows = 0; + other.m_cols = 0; } - DenseStorage& operator=(DenseStorage&& other) + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT { using std::swap; swap(m_data, other.m_data); @@ -289,18 +388,18 @@ template class DenseStorage(m_data, m_rows*m_cols); } - void swap(DenseStorage& other) + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, m_rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); std::swap(m_cols,other.m_cols); } - DenseIndex rows(void) const {return m_rows;} - DenseIndex cols(void) const {return m_cols;} - void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + void conservativeResize(Index size, Index rows, Index cols) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*m_cols); - m_rows = nbRows; - m_cols = nbCols; + m_rows = rows; + m_cols = cols; } - void resize(DenseIndex size, DenseIndex nbRows, DenseIndex nbCols) + EIGEN_DEVICE_FUNC void resize(Index size, Index rows, Index cols) { if(size != m_rows*m_cols) { @@ -309,36 +408,56 @@ template class DenseStorage(size); else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } - m_rows = nbRows; - m_cols = nbCols; + m_rows = rows; + m_cols = cols; } - const T *data() const { return m_data; } - T *data() { return m_data; } - private: - DenseStorage(const DenseStorage&); - DenseStorage& operator=(const DenseStorage&); + EIGEN_DEVICE_FUNC const T *data() const { return m_data; } + EIGEN_DEVICE_FUNC T *data() { return m_data; } }; // matrix with dynamic width and fixed height (so that matrix has dynamic size). template class DenseStorage { T *m_data; - DenseIndex m_cols; + Index m_cols; public: - DenseStorage() : m_data(0), m_cols(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} - DenseStorage(DenseIndex size, DenseIndex, DenseIndex nbCols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(nbCols) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - DenseStorage(DenseStorage&& other) + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_cols(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_cols(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_cols(cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + eigen_internal_assert(size==rows*cols && rows==_Rows && cols >=0); + EIGEN_UNUSED_VARIABLE(rows); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(_Rows*other.m_cols)) + , m_cols(other.m_cols) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_cols*_Rows) + internal::smart_copy(other.m_data, other.m_data+_Rows*m_cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT : m_data(std::move(other.m_data)) , m_cols(std::move(other.m_cols)) { other.m_data = nullptr; + other.m_cols = 0; } - DenseStorage& operator=(DenseStorage&& other) + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT { using std::swap; swap(m_data, other.m_data); @@ -346,16 +465,16 @@ template class DenseStorage(m_data, _Rows*m_cols); } - void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } - static DenseIndex rows(void) {return _Rows;} - DenseIndex cols(void) const {return m_cols;} - void conservativeResize(DenseIndex size, DenseIndex, DenseIndex nbCols) + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Rows*m_cols); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_cols,other.m_cols); } + EIGEN_DEVICE_FUNC static Index rows(void) {return _Rows;} + EIGEN_DEVICE_FUNC Index cols(void) const {return m_cols;} + EIGEN_DEVICE_FUNC void conservativeResize(Index size, Index, Index cols) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, _Rows*m_cols); - m_cols = nbCols; + m_cols = cols; } - EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex, DenseIndex nbCols) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index, Index cols) { if(size != _Rows*m_cols) { @@ -364,35 +483,55 @@ template class DenseStorage(size); else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } - m_cols = nbCols; + m_cols = cols; } - const T *data() const { return m_data; } - T *data() { return m_data; } - private: - DenseStorage(const DenseStorage&); - DenseStorage& operator=(const DenseStorage&); + EIGEN_DEVICE_FUNC const T *data() const { return m_data; } + EIGEN_DEVICE_FUNC T *data() { return m_data; } }; // matrix with dynamic height and fixed width (so that matrix has dynamic size). template class DenseStorage { T *m_data; - DenseIndex m_rows; + Index m_rows; public: - DenseStorage() : m_data(0), m_rows(0) {} - DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} - DenseStorage(DenseIndex size, DenseIndex nbRows, DenseIndex) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(nbRows) - { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN } -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - DenseStorage(DenseStorage&& other) + EIGEN_DEVICE_FUNC DenseStorage() : m_data(0), m_rows(0) {} + explicit DenseStorage(internal::constructor_without_unaligned_array_assert) : m_data(0), m_rows(0) {} + EIGEN_DEVICE_FUNC DenseStorage(Index size, Index rows, Index cols) : m_data(internal::conditional_aligned_new_auto(size)), m_rows(rows) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + eigen_internal_assert(size==rows*cols && rows>=0 && cols == _Cols); + EIGEN_UNUSED_VARIABLE(cols); + } + EIGEN_DEVICE_FUNC DenseStorage(const DenseStorage& other) + : m_data(internal::conditional_aligned_new_auto(other.m_rows*_Cols)) + , m_rows(other.m_rows) + { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = m_rows*_Cols) + internal::smart_copy(other.m_data, other.m_data+other.m_rows*_Cols, m_data); + } + EIGEN_DEVICE_FUNC DenseStorage& operator=(const DenseStorage& other) + { + if (this != &other) + { + DenseStorage tmp(other); + this->swap(tmp); + } + return *this; + } +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + DenseStorage(DenseStorage&& other) EIGEN_NOEXCEPT : m_data(std::move(other.m_data)) , m_rows(std::move(other.m_rows)) { other.m_data = nullptr; + other.m_rows = 0; } - DenseStorage& operator=(DenseStorage&& other) + EIGEN_DEVICE_FUNC + DenseStorage& operator=(DenseStorage&& other) EIGEN_NOEXCEPT { using std::swap; swap(m_data, other.m_data); @@ -400,16 +539,16 @@ template class DenseStorage(m_data, _Cols*m_rows); } - void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } - DenseIndex rows(void) const {return m_rows;} - static DenseIndex cols(void) {return _Cols;} - void conservativeResize(DenseIndex size, DenseIndex nbRows, DenseIndex) + EIGEN_DEVICE_FUNC ~DenseStorage() { internal::conditional_aligned_delete_auto(m_data, _Cols*m_rows); } + EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); std::swap(m_rows,other.m_rows); } + EIGEN_DEVICE_FUNC Index rows(void) const {return m_rows;} + EIGEN_DEVICE_FUNC static Index cols(void) {return _Cols;} + void conservativeResize(Index size, Index rows, Index) { m_data = internal::conditional_aligned_realloc_new_auto(m_data, size, m_rows*_Cols); - m_rows = nbRows; + m_rows = rows; } - EIGEN_STRONG_INLINE void resize(DenseIndex size, DenseIndex nbRows, DenseIndex) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index size, Index rows, Index) { if(size != m_rows*_Cols) { @@ -418,15 +557,12 @@ template class DenseStorage(size); else m_data = 0; - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } - m_rows = nbRows; + m_rows = rows; } - const T *data() const { return m_data; } - T *data() { return m_data; } - private: - DenseStorage(const DenseStorage&); - DenseStorage& operator=(const DenseStorage&); + EIGEN_DEVICE_FUNC const T *data() const { return m_data; } + EIGEN_DEVICE_FUNC T *data() { return m_data; } }; } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/Diagonal.h b/external/eigen3/Eigen/src/Core/Diagonal.h index 68cf6d4..49e7112 100644 --- a/external/eigen3/Eigen/src/Core/Diagonal.h +++ b/external/eigen3/Eigen/src/Core/Diagonal.h @@ -21,7 +21,7 @@ namespace Eigen { * \param MatrixType the type of the object in which we are taking a sub/main/super diagonal * \param DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal. * A positive value means a superdiagonal, a negative value means a subdiagonal. - * You can also use Dynamic so the index can be set at runtime. + * You can also use DynamicIndex so the index can be set at runtime. * * The matrix is not required to be square. * @@ -37,7 +37,7 @@ template struct traits > : traits { - typedef typename nested::type MatrixTypeNested; + typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; typedef typename MatrixType::StorageKind StorageKind; enum { @@ -52,8 +52,7 @@ struct traits > MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), MaxColsAtCompileTime = 1, MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, - Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, - CoeffReadCost = _MatrixTypeNested::CoeffReadCost, + Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions MatrixTypeOuterStride = outer_stride_at_compile_time::ret, InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, OuterStrideAtCompileTime = 0 @@ -70,20 +69,28 @@ template class Diagonal typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) - inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + EIGEN_DEVICE_FUNC + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) + EIGEN_DEVICE_FUNC inline Index rows() const - { return m_index.value()<0 ? (std::min)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min)(m_matrix.rows(),m_matrix.cols()-m_index.value()); } + { + return m_index.value()<0 ? numext::mini(m_matrix.cols(),m_matrix.rows()+m_index.value()) + : numext::mini(m_matrix.rows(),m_matrix.cols()-m_index.value()); + } + EIGEN_DEVICE_FUNC inline Index cols() const { return 1; } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.outerStride() + 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return 0; @@ -95,62 +102,75 @@ template class Diagonal const Scalar >::type ScalarWithConstIfNotLvalue; - inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } - inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC + inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC + inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index) { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) - return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); + return m_matrix.coeffRef(row+rowOffset(), row+colOffset()); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index) const { - return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); + return m_matrix.coeffRef(row+rowOffset(), row+colOffset()); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index) const { return m_matrix.coeff(row+rowOffset(), row+colOffset()); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index idx) { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) - return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); + return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset()); } + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index idx) const { - return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); + return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset()); } + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index idx) const { return m_matrix.coeff(idx+rowOffset(), idx+colOffset()); } - const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC + inline const typename internal::remove_all::type& nestedExpression() const { return m_matrix; } - int index() const + EIGEN_DEVICE_FUNC + inline Index index() const { return m_index.value(); } protected: - typename MatrixType::Nested m_matrix; + typename internal::ref_selector::non_const_type m_matrix; const internal::variable_if_dynamicindex m_index; private: // some compilers may fail to optimize std::max etc in case of compile-time constants... + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; } - // triger a compile time error is someone try to call packet + // trigger a compile-time error if someone try to call packet template typename MatrixType::PacketReturnType packet(Index) const; template typename MatrixType::PacketReturnType packet(Index,Index) const; }; @@ -167,7 +187,7 @@ template inline typename MatrixBase::DiagonalReturnType MatrixBase::diagonal() { - return derived(); + return DiagonalReturnType(derived()); } /** This is the const version of diagonal(). */ @@ -216,20 +236,20 @@ MatrixBase::diagonal(Index index) const * * \sa MatrixBase::diagonal(), class Diagonal */ template -template -inline typename MatrixBase::template DiagonalIndexReturnType::Type +template +inline typename MatrixBase::template DiagonalIndexReturnType::Type MatrixBase::diagonal() { - return derived(); + return typename DiagonalIndexReturnType::Type(derived()); } /** This is the const version of diagonal(). */ template -template -inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type +template +inline typename MatrixBase::template ConstDiagonalIndexReturnType::Type MatrixBase::diagonal() const { - return derived(); + return typename ConstDiagonalIndexReturnType::Type(derived()); } } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/DiagonalMatrix.h b/external/eigen3/Eigen/src/Core/DiagonalMatrix.h index 53c757b..ecfdce8 100644 --- a/external/eigen3/Eigen/src/Core/DiagonalMatrix.h +++ b/external/eigen3/Eigen/src/Core/DiagonalMatrix.h @@ -22,7 +22,7 @@ class DiagonalBase : public EigenBase typedef typename DiagonalVectorType::Scalar Scalar; typedef typename DiagonalVectorType::RealScalar RealScalar; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, @@ -30,79 +30,61 @@ class DiagonalBase : public EigenBase MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, IsVectorAtCompileTime = 0, - Flags = 0 + Flags = NoPreferredStorageOrderBit }; typedef Matrix DenseMatrixType; typedef DenseMatrixType DenseType; typedef DiagonalMatrix PlainObject; + EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } - template - void evalTo(MatrixBase &other) const; - template - inline void addTo(MatrixBase &other) const - { other.diagonal() += diagonal(); } - template - inline void subTo(MatrixBase &other) const - { other.diagonal() -= diagonal(); } - + + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } + EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return derived().diagonal(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return diagonal().size(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return diagonal().size(); } - /** \returns the diagonal matrix product of \c *this by the matrix \a matrix. - */ template - const DiagonalProduct + EIGEN_DEVICE_FUNC + const Product operator*(const MatrixBase &matrix) const { - return DiagonalProduct(matrix.derived(), derived()); + return Product(derived(),matrix.derived()); } - inline const DiagonalWrapper, const DiagonalVectorType> > + typedef DiagonalWrapper, const DiagonalVectorType> > InverseReturnType; + EIGEN_DEVICE_FUNC + inline const InverseReturnType inverse() const { - return diagonal().cwiseInverse(); + return InverseReturnType(diagonal().cwiseInverse()); } - inline const DiagonalWrapper, const DiagonalVectorType> > + EIGEN_DEVICE_FUNC + inline const DiagonalWrapper operator*(const Scalar& scalar) const { - return diagonal() * scalar; + return DiagonalWrapper(diagonal() * scalar); } - friend inline const DiagonalWrapper, const DiagonalVectorType> > + EIGEN_DEVICE_FUNC + friend inline const DiagonalWrapper operator*(const Scalar& scalar, const DiagonalBase& other) { - return other.diagonal() * scalar; - } - - #ifdef EIGEN2_SUPPORT - template - bool isApprox(const DiagonalBase& other, typename NumTraits::Real precision = NumTraits::dummy_precision()) const - { - return diagonal().isApprox(other.diagonal(), precision); + return DiagonalWrapper(scalar * other.diagonal()); } - template - bool isApprox(const MatrixBase& other, typename NumTraits::Real precision = NumTraits::dummy_precision()) const - { - return toDenseMatrix().isApprox(other, precision); - } - #endif }; -template -template -inline void DiagonalBase::evalTo(MatrixBase &other) const -{ - other.setZero(); - other.diagonal() = diagonal(); -} #endif /** \class DiagonalMatrix @@ -124,10 +106,9 @@ struct traits > : traits > { typedef Matrix<_Scalar,SizeAtCompileTime,1,0,MaxSizeAtCompileTime,1> DiagonalVectorType; - typedef Dense StorageKind; - typedef DenseIndex Index; + typedef DiagonalShape StorageKind; enum { - Flags = LvalueBit + Flags = LvalueBit | NoPreferredStorageOrderBit }; }; } @@ -141,7 +122,7 @@ class DiagonalMatrix typedef const DiagonalMatrix& Nested; typedef _Scalar Scalar; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; #endif protected: @@ -151,24 +132,31 @@ class DiagonalMatrix public: /** const version of diagonal(). */ + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return m_diagonal; } /** \returns a reference to the stored vector of diagonal coefficients. */ + EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return m_diagonal; } /** Default constructor without initialization */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix() {} /** Constructs a diagonal matrix with given dimension */ - inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} + EIGEN_DEVICE_FUNC + explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} /** 2D constructor. */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {} /** 3D constructor. */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {} /** Copy constructor. */ template + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const DiagonalBase& other) : m_diagonal(other.diagonal()) {} #ifndef EIGEN_PARSED_BY_DOXYGEN @@ -178,11 +166,13 @@ class DiagonalMatrix /** generic constructor from expression of the diagonal coefficients */ template + EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(const MatrixBase& other) : m_diagonal(other) {} /** Copy operator. */ template + EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalBase& other) { m_diagonal = other.diagonal(); @@ -193,6 +183,7 @@ class DiagonalMatrix /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ + EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalMatrix& other) { m_diagonal = other.diagonal(); @@ -201,14 +192,19 @@ class DiagonalMatrix #endif /** Resizes to given size. */ + EIGEN_DEVICE_FUNC inline void resize(Index size) { m_diagonal.resize(size); } /** Sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC inline void setZero() { m_diagonal.setZero(); } /** Resizes and sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC inline void setZero(Index size) { m_diagonal.setZero(size); } /** Sets this matrix to be the identity matrix of the current size. */ + EIGEN_DEVICE_FUNC inline void setIdentity() { m_diagonal.setOnes(); } /** Sets this matrix to be the identity matrix of the given size. */ + EIGEN_DEVICE_FUNC inline void setIdentity(Index size) { m_diagonal.setOnes(size); } }; @@ -232,14 +228,15 @@ struct traits > { typedef _DiagonalVectorType DiagonalVectorType; typedef typename DiagonalVectorType::Scalar Scalar; - typedef typename DiagonalVectorType::Index Index; - typedef typename DiagonalVectorType::StorageKind StorageKind; + typedef typename DiagonalVectorType::StorageIndex StorageIndex; + typedef DiagonalShape StorageKind; + typedef typename traits::XprKind XprKind; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxRowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - MaxColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, - Flags = traits::Flags & LvalueBit + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + Flags = (traits::Flags & LvalueBit) | NoPreferredStorageOrderBit }; }; } @@ -255,9 +252,11 @@ class DiagonalWrapper #endif /** Constructor from expression of diagonal coefficients to wrap. */ - inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + EIGEN_DEVICE_FUNC + explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} /** \returns a const reference to the wrapped expression of diagonal coefficients. */ + EIGEN_DEVICE_FUNC const DiagonalVectorType& diagonal() const { return m_diagonal; } protected: @@ -277,7 +276,7 @@ template inline const DiagonalWrapper MatrixBase::asDiagonal() const { - return derived(); + return DiagonalWrapper(derived()); } /** \returns true if *this is approximately equal to a diagonal matrix, @@ -291,12 +290,11 @@ MatrixBase::asDiagonal() const template bool MatrixBase::isDiagonal(const RealScalar& prec) const { - using std::abs; if(cols() != rows()) return false; RealScalar maxAbsOnDiagonal = static_cast(-1); for(Index j = 0; j < cols(); ++j) { - RealScalar absOnDiagonal = abs(coeff(j,j)); + RealScalar absOnDiagonal = numext::abs(coeff(j,j)); if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal; } for(Index j = 0; j < cols(); ++j) @@ -308,6 +306,38 @@ bool MatrixBase::isDiagonal(const RealScalar& prec) const return true; } +namespace internal { + +template<> struct storage_kind_to_shape { typedef DiagonalShape Shape; }; + +struct Diagonal2Dense {}; + +template<> struct AssignmentKind { typedef Diagonal2Dense Kind; }; + +// Diagonal matrix to Dense assignment +template< typename DstXprType, typename SrcXprType, typename Functor> +struct Assignment +{ + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &/*func*/) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + dst.setZero(); + dst.diagonal() = src.diagonal(); + } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &/*func*/) + { dst.diagonal() += src.diagonal(); } + + static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &/*func*/) + { dst.diagonal() -= src.diagonal(); } +}; + +} // namespace internal + } // end namespace Eigen #endif // EIGEN_DIAGONALMATRIX_H diff --git a/external/eigen3/Eigen/src/Core/DiagonalProduct.h b/external/eigen3/Eigen/src/Core/DiagonalProduct.h index cc6b536..d372b93 100644 --- a/external/eigen3/Eigen/src/Core/DiagonalProduct.h +++ b/external/eigen3/Eigen/src/Core/DiagonalProduct.h @@ -13,117 +13,14 @@ namespace Eigen { -namespace internal { -template -struct traits > - : traits -{ - typedef typename scalar_product_traits::ReturnType Scalar; - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor, - _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) - ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), - _SameTypes = is_same::value, - // FIXME currently we need same types, but in the future the next rule should be the one - //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), - _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0, - - Flags = ((HereditaryBits|_LinearAccessMask|AlignedBit) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0),//(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), - Cost0 = EIGEN_ADD_COST(NumTraits::MulCost, MatrixType::CoeffReadCost), - CoeffReadCost = EIGEN_ADD_COST(Cost0,DiagonalType::DiagonalVectorType::CoeffReadCost) - }; -}; -} - -template -class DiagonalProduct : internal::no_assignment_operator, - public MatrixBase > -{ - public: - - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(DiagonalProduct) - - inline DiagonalProduct(const MatrixType& matrix, const DiagonalType& diagonal) - : m_matrix(matrix), m_diagonal(diagonal) - { - eigen_assert(diagonal.diagonal().size() == (ProductOrder == OnTheLeft ? matrix.rows() : matrix.cols())); - } - - EIGEN_STRONG_INLINE Index rows() const { return m_matrix.rows(); } - EIGEN_STRONG_INLINE Index cols() const { return m_matrix.cols(); } - - EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const - { - return m_diagonal.diagonal().coeff(ProductOrder == OnTheLeft ? row : col) * m_matrix.coeff(row, col); - } - - EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return coeff(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index row, Index col) const - { - enum { - StorageOrder = Flags & RowMajorBit ? RowMajor : ColMajor - }; - const Index indexInDiagonalVector = ProductOrder == OnTheLeft ? row : col; - return packet_impl(row,col,indexInDiagonalVector,typename internal::conditional< - ((int(StorageOrder) == RowMajor && int(ProductOrder) == OnTheLeft) - ||(int(StorageOrder) == ColMajor && int(ProductOrder) == OnTheRight)), internal::true_type, internal::false_type>::type()); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet(Index idx) const - { - enum { - StorageOrder = int(MatrixType::Flags) & RowMajorBit ? RowMajor : ColMajor - }; - return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); - } - - protected: - template - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::true_type) const - { - return internal::pmul(m_matrix.template packet(row, col), - internal::pset1(m_diagonal.diagonal().coeff(id))); - } - - template - EIGEN_STRONG_INLINE PacketScalar packet_impl(Index row, Index col, Index id, internal::false_type) const - { - enum { - InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalVectorPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) - }; - return internal::pmul(m_matrix.template packet(row, col), - m_diagonal.diagonal().template packet(id)); - } - - typename MatrixType::Nested m_matrix; - typename DiagonalType::Nested m_diagonal; -}; - /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. */ template template -inline const DiagonalProduct +inline const Product MatrixBase::operator*(const DiagonalBase &a_diagonal) const { - return DiagonalProduct(derived(), a_diagonal.derived()); + return Product(derived(),a_diagonal.derived()); } } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/Dot.h b/external/eigen3/Eigen/src/Core/Dot.h index 23aab83..06ef18b 100644 --- a/external/eigen3/Eigen/src/Core/Dot.h +++ b/external/eigen3/Eigen/src/Core/Dot.h @@ -28,26 +28,31 @@ template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; + EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.template binaryExpr(b).sum(); } }; template struct dot_nocheck { - typedef typename scalar_product_traits::Scalar,typename traits::Scalar>::ReturnType ResScalar; + typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; + typedef typename conj_prod::result_type ResScalar; + EIGEN_DEVICE_FUNC static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) { - return a.transpose().template binaryExpr::Scalar,typename traits::Scalar> >(b).sum(); + return a.transpose().template binaryExpr(b).sum(); } }; } // end namespace internal -/** \returns the dot product of *this with other. +/** \fn MatrixBase::dot + * \returns the dot product of *this with other. * * \only_for_vectors * @@ -59,55 +64,30 @@ struct dot_nocheck */ template template -inline typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType +EIGEN_DEVICE_FUNC +typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) +#if !(defined(EIGEN_NO_STATIC_ASSERT) && defined(EIGEN_NO_DEBUG)) typedef internal::scalar_conj_product_op func; EIGEN_CHECK_BINARY_COMPATIBILIY(func,Scalar,typename OtherDerived::Scalar); - +#endif + eigen_assert(size() == other.size()); return internal::dot_nocheck::run(*this, other); } -#ifdef EIGEN2_SUPPORT -/** \returns the dot product of *this with other, with the Eigen2 convention that the dot product is linear in the first variable - * (conjugating the second variable). Of course this only makes a difference in the complex case. - * - * This method is only available in EIGEN2_SUPPORT mode. - * - * \only_for_vectors - * - * \sa dot() - */ -template -template -typename internal::traits::Scalar -MatrixBase::eigen2_dot(const MatrixBase& other) const -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived) - EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived,OtherDerived) - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - eigen_assert(size() == other.size()); - - return internal::dot_nocheck::run(other,*this); -} -#endif - - //---------- implementation of L2 norm and related functions ---------- /** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the Frobenius norm. * In both cases, it consists in the sum of the square of all the matrix entries. * For vectors, this is also equals to the dot product of \c *this with itself. * - * \sa dot(), norm() + * \sa dot(), norm(), lpNorm() */ template EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const @@ -119,16 +99,18 @@ EIGEN_STRONG_INLINE typename NumTraits::Scala * In both cases, it consists in the square root of the sum of the square of all the matrix entries. * For vectors, this is also equals to the square root of the dot product of \c *this with itself. * - * \sa dot(), squaredNorm() + * \sa lpNorm(), dot(), squaredNorm() */ template inline typename NumTraits::Scalar>::Real MatrixBase::norm() const { - using std::sqrt; - return sqrt(squaredNorm()); + return numext::sqrt(squaredNorm()); } -/** \returns an expression of the quotient of *this by its own norm. +/** \returns an expression of the quotient of \c *this by its own norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. * * \only_for_vectors * @@ -138,22 +120,77 @@ template inline const typename MatrixBase::PlainObject MatrixBase::normalized() const { - typedef typename internal::nested::type Nested; - typedef typename internal::remove_reference::type _Nested; + typedef typename internal::nested_eval::type _Nested; _Nested n(derived()); - return n / n.norm(); + RealScalar z = n.squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + return n / numext::sqrt(z); + else + return n; } /** Normalizes the vector, i.e. divides it by its own norm. * * \only_for_vectors * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * * \sa norm(), normalized() */ template inline void MatrixBase::normalize() { - *this /= norm(); + RealScalar z = squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + derived() /= numext::sqrt(z); +} + +/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow. + * + * \only_for_vectors + * + * This method is analogue to the normalized() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. + * + * \sa stableNorm(), stableNormalize(), normalized() + */ +template +inline const typename MatrixBase::PlainObject +MatrixBase::stableNormalized() const +{ + typedef typename internal::nested_eval::type _Nested; + _Nested n(derived()); + RealScalar w = n.cwiseAbs().maxCoeff(); + RealScalar z = (n/w).squaredNorm(); + if(z>RealScalar(0)) + return n / (numext::sqrt(z)*w); + else + return n; +} + +/** Normalizes the vector while avoid underflow and overflow + * + * \only_for_vectors + * + * This method is analogue to the normalize() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * + * \sa stableNorm(), stableNormalized(), normalize() + */ +template +inline void MatrixBase::stableNormalize() +{ + RealScalar w = cwiseAbs().maxCoeff(); + RealScalar z = (derived()/w).squaredNorm(); + if(z>RealScalar(0)) + derived() /= numext::sqrt(z)*w; } //---------- implementation of other norms ---------- @@ -164,9 +201,10 @@ template struct lpNorm_selector { typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const MatrixBase& m) { - using std::pow; + EIGEN_USING_STD_MATH(pow) return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1)/p); } }; @@ -174,6 +212,7 @@ struct lpNorm_selector template struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) { return m.cwiseAbs().sum(); @@ -183,6 +222,7 @@ struct lpNorm_selector template struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) { return m.norm(); @@ -192,23 +232,35 @@ struct lpNorm_selector template struct lpNorm_selector { - static inline typename NumTraits::Scalar>::Real run(const MatrixBase& m) + typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const MatrixBase& m) { + if(Derived::SizeAtCompileTime==0 || (Derived::SizeAtCompileTime==Dynamic && m.size()==0)) + return RealScalar(0); return m.cwiseAbs().maxCoeff(); } }; } // end namespace internal -/** \returns the \f$ \ell^p \f$ norm of *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values - * of the coefficients of *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ - * norm, that is the maximum of the absolute values of the coefficients of *this. +/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the p-th powers of the absolute values + * of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, this function returns the \f$ \ell^\infty \f$ + * norm, that is the maximum of the absolute values of the coefficients of \c *this. + * + * In all cases, if \c *this is empty, then the value 0 is returned. + * + * \note For matrices, this function does not compute the operator-norm. That is, if \c *this is a matrix, then its coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. * * \sa norm() */ template template +#ifndef EIGEN_PARSED_BY_DOXYGEN inline typename NumTraits::Scalar>::Real +#else +MatrixBase::RealScalar +#endif MatrixBase::lpNorm() const { return internal::lpNorm_selector::run(*this); @@ -227,8 +279,8 @@ template bool MatrixBase::isOrthogonal (const MatrixBase& other, const RealScalar& prec) const { - typename internal::nested::type nested(derived()); - typename internal::nested::type otherNested(other.derived()); + typename internal::nested_eval::type nested(derived()); + typename internal::nested_eval::type otherNested(other.derived()); return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); } @@ -246,13 +298,13 @@ bool MatrixBase::isOrthogonal template bool MatrixBase::isUnitary(const RealScalar& prec) const { - typename Derived::Nested nested(derived()); + typename internal::nested_eval::type self(derived()); for(Index i = 0; i < cols(); ++i) { - if(!internal::isApprox(nested.col(i).squaredNorm(), static_cast(1), prec)) + if(!internal::isApprox(self.col(i).squaredNorm(), static_cast(1), prec)) return false; for(Index j = 0; j < i; ++j) - if(!internal::isMuchSmallerThan(nested.col(i).dot(nested.col(j)), static_cast(1), prec)) + if(!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast(1), prec)) return false; } return true; diff --git a/external/eigen3/Eigen/src/Core/EigenBase.h b/external/eigen3/Eigen/src/Core/EigenBase.h index fadb458..b195506 100644 --- a/external/eigen3/Eigen/src/Core/EigenBase.h +++ b/external/eigen3/Eigen/src/Core/EigenBase.h @@ -13,7 +13,10 @@ namespace Eigen { -/** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). +/** \class EigenBase + * \ingroup Core_Module + * + * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * * In other words, an EigenBase object is an object that can be copied into a MatrixBase. * @@ -21,39 +24,57 @@ namespace Eigen { * * Notice that this class is trivial, it is only used to disambiguate overloaded functions. * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template struct EigenBase { // typedef typename internal::plain_matrix_type::type PlainObject; - + + /** \brief The interface type of indices + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \deprecated Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead. + * \sa StorageIndex, \ref TopicPreprocessorDirectives. + */ + typedef Eigen::Index Index; + + // FIXME is it needed? typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; /** \returns a reference to the derived object */ + EIGEN_DEVICE_FUNC Derived& derived() { return *static_cast(this); } /** \returns a const reference to the derived object */ + EIGEN_DEVICE_FUNC const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC inline Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } + EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast(this); } /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ + EIGEN_DEVICE_FUNC inline Index rows() const { return derived().rows(); } /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ + EIGEN_DEVICE_FUNC inline Index cols() const { return derived().cols(); } /** \returns the number of coefficients, which is rows()*cols(). * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC inline Index size() const { return rows() * cols(); } /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */ - template inline void evalTo(Dest& dst) const + template + EIGEN_DEVICE_FUNC + inline void evalTo(Dest& dst) const { derived().evalTo(dst); } /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */ - template inline void addTo(Dest& dst) const + template + EIGEN_DEVICE_FUNC + inline void addTo(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -63,7 +84,9 @@ template struct EigenBase } /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */ - template inline void subTo(Dest& dst) const + template + EIGEN_DEVICE_FUNC + inline void subTo(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -73,7 +96,8 @@ template struct EigenBase } /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */ - template inline void applyThisOnTheRight(Dest& dst) const + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -81,7 +105,8 @@ template struct EigenBase } /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */ - template inline void applyThisOnTheLeft(Dest& dst) const + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. @@ -104,25 +129,28 @@ template struct EigenBase */ template template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator=(const EigenBase &other) { - other.derived().evalTo(derived()); + call_assignment(derived(), other.derived()); return derived(); } template template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator+=(const EigenBase &other) { - other.derived().addTo(derived()); + call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } template template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator-=(const EigenBase &other) { - other.derived().subTo(derived()); + call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } diff --git a/external/eigen3/Eigen/src/Core/Flagged.h b/external/eigen3/Eigen/src/Core/Flagged.h deleted file mode 100644 index 1f2955f..0000000 --- a/external/eigen3/Eigen/src/Core/Flagged.h +++ /dev/null @@ -1,140 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FLAGGED_H -#define EIGEN_FLAGGED_H - -namespace Eigen { - -/** \class Flagged - * \ingroup Core_Module - * - * \brief Expression with modified flags - * - * \param ExpressionType the type of the object of which we are modifying the flags - * \param Added the flags added to the expression - * \param Removed the flags removed from the expression (has priority over Added). - * - * This class represents an expression whose flags have been modified. - * It is the return type of MatrixBase::flagged() - * and most of the time this is the only way it is used. - * - * \sa MatrixBase::flagged() - */ - -namespace internal { -template -struct traits > : traits -{ - enum { Flags = (ExpressionType::Flags | Added) & ~Removed }; -}; -} - -template class Flagged - : public MatrixBase > -{ - public: - - typedef MatrixBase Base; - - EIGEN_DENSE_PUBLIC_INTERFACE(Flagged) - typedef typename internal::conditional::ret, - ExpressionType, const ExpressionType&>::type ExpressionTypeNested; - typedef typename ExpressionType::InnerIterator InnerIterator; - - inline Flagged(const ExpressionType& matrix) : m_matrix(matrix) {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - inline Index outerStride() const { return m_matrix.outerStride(); } - inline Index innerStride() const { return m_matrix.innerStride(); } - - inline CoeffReturnType coeff(Index row, Index col) const - { - return m_matrix.coeff(row, col); - } - - inline CoeffReturnType coeff(Index index) const - { - return m_matrix.coeff(index); - } - - inline const Scalar& coeffRef(Index row, Index col) const - { - return m_matrix.const_cast_derived().coeffRef(row, col); - } - - inline const Scalar& coeffRef(Index index) const - { - return m_matrix.const_cast_derived().coeffRef(index); - } - - inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(row, col); - } - - inline Scalar& coeffRef(Index index) - { - return m_matrix.const_cast_derived().coeffRef(index); - } - - template - inline const PacketScalar packet(Index row, Index col) const - { - return m_matrix.template packet(row, col); - } - - template - inline void writePacket(Index row, Index col, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(row, col, x); - } - - template - inline const PacketScalar packet(Index index) const - { - return m_matrix.template packet(index); - } - - template - inline void writePacket(Index index, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(index, x); - } - - const ExpressionType& _expression() const { return m_matrix; } - - template - typename ExpressionType::PlainObject solveTriangular(const MatrixBase& other) const; - - template - void solveTriangularInPlace(const MatrixBase& other) const; - - protected: - ExpressionTypeNested m_matrix; -}; - -/** \returns an expression of *this with added and removed flags - * - * This is mostly for internal use. - * - * \sa class Flagged - */ -template -template -inline const Flagged -DenseBase::flagged() const -{ - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_FLAGGED_H diff --git a/external/eigen3/Eigen/src/Core/ForceAlignedAccess.h b/external/eigen3/Eigen/src/Core/ForceAlignedAccess.h index 807c7a2..7b08b45 100644 --- a/external/eigen3/Eigen/src/Core/ForceAlignedAccess.h +++ b/external/eigen3/Eigen/src/Core/ForceAlignedAccess.h @@ -39,29 +39,29 @@ template class ForceAlignedAccess typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) - inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -90,7 +90,7 @@ template class ForceAlignedAccess m_expression.const_cast_derived().template writePacket(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType& m_expression; @@ -127,7 +127,7 @@ template inline typename internal::add_const_on_value_type,Derived&>::type>::type MatrixBase::forceAlignedAccessIf() const { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } /** \returns an expression of *this with forced aligned access if \a Enable is true. @@ -138,7 +138,7 @@ template inline typename internal::conditional,Derived&>::type MatrixBase::forceAlignedAccessIf() { - return derived(); + return derived(); // FIXME This should not work but apparently is never used } } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/Functors.h b/external/eigen3/Eigen/src/Core/Functors.h deleted file mode 100644 index 5a1b2f2..0000000 --- a/external/eigen3/Eigen/src/Core/Functors.h +++ /dev/null @@ -1,1029 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_FUNCTORS_H -#define EIGEN_FUNCTORS_H - -namespace Eigen { - -namespace internal { - -// associative functors: - -/** \internal - * \brief Template functor to compute the sum of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, MatrixBase::sum() - */ -template struct scalar_sum_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::padd(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAdd - }; -}; - -/** \internal - * \brief Template functor to compute the product of two scalars - * - * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux() - */ -template struct scalar_product_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasMul && packet_traits::HasMul - }; - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmul(a,b); } - template - EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const - { return internal::predux_mul(a); } -}; -template -struct functor_traits > { - enum { - Cost = (NumTraits::MulCost + NumTraits::MulCost)/2, // rough estimate! - PacketAccess = scalar_product_op::Vectorizable - }; -}; - -/** \internal - * \brief Template functor to compute the conjugate product of two scalars - * - * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y) - */ -template struct scalar_conj_product_op { - - enum { - Conj = NumTraits::IsComplex - }; - - typedef typename scalar_product_traits::ReturnType result_type; - - EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const - { return conj_helper().pmul(a,b); } - - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return conj_helper().pmul(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::MulCost, - PacketAccess = internal::is_same::value && packet_traits::HasMul - }; -}; - -/** \internal - * \brief Template functor to compute the min of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff() - */ -template struct scalar_min_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmin(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux_min(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMin - }; -}; - -/** \internal - * \brief Template functor to compute the max of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff() - */ -template struct scalar_max_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pmax(a,b); } - template - EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const - { return internal::predux_max(a); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasMax - }; -}; - -/** \internal - * \brief Template functor to compute the hypot of two scalars - * - * \sa MatrixBase::stableNorm(), class Redux - */ -template struct scalar_hypot_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) -// typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const - { - using std::max; - using std::min; - using std::sqrt; - Scalar p = (max)(_x, _y); - Scalar q = (min)(_x, _y); - Scalar qp = q/p; - return p * sqrt(Scalar(1) + qp*qp); - } -}; -template -struct functor_traits > { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess=0 }; -}; - -/** \internal - * \brief Template functor to compute the pow of two scalars - */ -template struct scalar_binary_pow_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op) - inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); } -}; -template -struct functor_traits > { - enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; -}; - -// other binary functors: - -/** \internal - * \brief Template functor to compute the difference of two scalars - * - * \sa class CwiseBinaryOp, MatrixBase::operator- - */ -template struct scalar_difference_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::psub(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasSub - }; -}; - -/** \internal - * \brief Template functor to compute the quotient of two scalars - * - * \sa class CwiseBinaryOp, Cwise::operator/() - */ -template struct scalar_quotient_op { - enum { - // TODO vectorize mixed product - Vectorizable = is_same::value && packet_traits::HasDiv && packet_traits::HasDiv - }; - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op) - EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const - { return internal::pdiv(a,b); } -}; -template -struct functor_traits > { - enum { - Cost = (NumTraits::MulCost + NumTraits::MulCost), // rough estimate! - PacketAccess = scalar_quotient_op::Vectorizable - }; -}; - - - -/** \internal - * \brief Template functor to compute the and of two booleans - * - * \sa class CwiseBinaryOp, ArrayBase::operator&& - */ -struct scalar_boolean_and_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op) - EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; } -}; -template<> struct functor_traits { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -/** \internal - * \brief Template functor to compute the or of two booleans - * - * \sa class CwiseBinaryOp, ArrayBase::operator|| - */ -struct scalar_boolean_or_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op) - EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; } -}; -template<> struct functor_traits { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -/** \internal - * \brief Template functors for comparison of two scalars - * \todo Implement packet-comparisons - */ -template struct scalar_cmp_op; - -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost, - PacketAccess = false - }; -}; - -template -struct result_of(Scalar,Scalar)> { - typedef bool type; -}; - - -template struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;} -}; -template struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;} -}; -template struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);} -}; -template struct scalar_cmp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op) - EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;} -}; - -// unary functors: - -/** \internal - * \brief Template functor to compute the opposite of a scalar - * - * \sa class CwiseUnaryOp, MatrixBase::operator- - */ -template struct scalar_opposite_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pnegate(a); } -}; -template -struct functor_traits > -{ enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasNegate }; -}; - -/** \internal - * \brief Template functor to compute the absolute value of a scalar - * - * \sa class CwiseUnaryOp, Cwise::abs - */ -template struct scalar_abs_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pabs(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = NumTraits::AddCost, - PacketAccess = packet_traits::HasAbs - }; -}; - -/** \internal - * \brief Template functor to compute the squared absolute value of a scalar - * - * \sa class CwiseUnaryOp, Cwise::abs2 - */ -template struct scalar_abs2_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a,a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasAbs2 }; }; - -/** \internal - * \brief Template functor to compute the conjugate of a complex value - * - * \sa class CwiseUnaryOp, MatrixBase::conjugate() - */ -template struct scalar_conjugate_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op) - EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); } - template - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = NumTraits::IsComplex ? NumTraits::AddCost : 0, - PacketAccess = packet_traits::HasConj - }; -}; - -/** \internal - * \brief Template functor to cast a scalar to another type - * - * \sa class CwiseUnaryOp, MatrixBase::cast() - */ -template -struct scalar_cast_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) - typedef NewType result_type; - EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast(a); } -}; -template -struct functor_traits > -{ enum { Cost = is_same::value ? 0 : NumTraits::AddCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the real part of a complex - * - * \sa class CwiseUnaryOp, MatrixBase::real() - */ -template -struct scalar_real_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the imaginary part of a complex - * - * \sa class CwiseUnaryOp, MatrixBase::imag() - */ -template -struct scalar_imag_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the real part of a complex as a reference - * - * \sa class CwiseUnaryOp, MatrixBase::real() - */ -template -struct scalar_real_ref_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast(&a)); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to extract the imaginary part of a complex as a reference - * - * \sa class CwiseUnaryOp, MatrixBase::imag() - */ -template -struct scalar_imag_ref_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op) - typedef typename NumTraits::Real result_type; - EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast(&a)); } -}; -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -/** \internal - * - * \brief Template functor to compute the exponential of a scalar - * - * \sa class CwiseUnaryOp, Cwise::exp() - */ -template struct scalar_exp_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op) - inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pexp(a); } -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasExp }; }; - -/** \internal - * - * \brief Template functor to compute the logarithm of a scalar - * - * \sa class CwiseUnaryOp, Cwise::log() - */ -template struct scalar_log_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op) - inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::plog(a); } -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = packet_traits::HasLog }; }; - -/** \internal - * \brief Template functor to multiply a scalar by a fixed other one - * - * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/ - */ -/* NOTE why doing the pset1() in packetOp *is* an optimization ? - * indeed it seems better to declare m_other as a Packet and do the pset1() once - * in the constructor. However, in practice: - * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() outside the loop :) - * - simpler code ;) - * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) - */ -template -struct scalar_multiple_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { } - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -template -struct scalar_multiple2_op { - typedef typename scalar_product_traits::ReturnType result_type; - EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { } - EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to divide a scalar by a fixed other one - * - * This functor is used to implement the quotient of a matrix by - * a scalar where the scalar type is not necessarily a floating point type. - * - * \sa class CwiseUnaryOp, MatrixBase::operator/ - */ -template -struct scalar_quotient1_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pdiv(a, pset1(m_other)); } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - -// nullary functors - -template -struct scalar_constant_op { - typedef typename packet_traits::type Packet; - EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { } - template - EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; } - template - EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1(m_other); } - const Scalar m_other; -}; -template -struct functor_traits > -// FIXME replace this packet test by a safe one -{ enum { Cost = 1, PacketAccess = packet_traits::Vectorizable, IsRepeatable = true }; }; - -template struct scalar_identity_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op) - template - EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false, IsRepeatable = true }; }; - -template struct linspaced_op_impl; - -// linear access for packet ops: -// 1) initialization -// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0]) -// 2) each step (where size is 1 for coeff access or PacketSize for packet access) -// base += [size*step, ..., size*step] -// -// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp) -// in order to avoid the padd() in operator() ? -template -struct linspaced_op_impl -{ - typedef typename packet_traits::type Packet; - - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_packetStep(pset1(packet_traits::size*step)), - m_base(padd(pset1(low), pmul(pset1(step),plset(-packet_traits::size)))) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const - { - m_base = padd(m_base, pset1(m_step)); - return m_low+Scalar(i)*m_step; - } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); } - - const Scalar m_low; - const Scalar m_step; - const Packet m_packetStep; - mutable Packet m_base; -}; - -// random access for packet ops: -// 1) each step -// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) -template -struct linspaced_op_impl -{ - typedef typename packet_traits::type Packet; - - linspaced_op_impl(const Scalar& low, const Scalar& step) : - m_low(low), m_step(step), - m_lowPacket(pset1(m_low)), m_stepPacket(pset1(m_step)), m_interPacket(plset(0)) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const - { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1(Scalar(i)),m_interPacket))); } - - const Scalar m_low; - const Scalar m_step; - const Packet m_lowPacket; - const Packet m_stepPacket; - const Packet m_interPacket; -}; - -// ----- Linspace functor ---------------------------------------------------------------- - -// Forward declaration (we default to random access which does not really give -// us a speed gain when using packet access but it allows to use the functor in -// nested expressions). -template struct linspaced_op; -template struct functor_traits< linspaced_op > -{ enum { Cost = 1, PacketAccess = packet_traits::HasSetLinear, IsRepeatable = true }; }; -template struct linspaced_op -{ - typedef typename packet_traits::type Packet; - linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {} - - template - EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } - - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl(col + row); - } - - template - EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); } - - // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since - // there row==0 and col is used for the actual iteration. - template - EIGEN_STRONG_INLINE const Packet packetOp(Index row, Index col) const - { - eigen_assert(col==0 || row==0); - return impl.packetOp(col + row); - } - - // This proxy object handles the actual required temporaries, the different - // implementations (random vs. sequential access) as well as the - // correct piping to size 2/4 packet operations. - const linspaced_op_impl impl; -}; - -// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta -// to indicate whether a functor allows linear access, just always answering 'yes' except for -// scalar_identity_op. -// FIXME move this to functor_traits adding a functor_default -template struct functor_has_linear_access { enum { ret = 1 }; }; -template struct functor_has_linear_access > { enum { ret = 0 }; }; - -// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication -// where the mixing of different types is handled by scalar_product_traits -// In particular, real * complex is allowed. -// FIXME move this to functor_traits adding a functor_default -template struct functor_is_product_like { enum { ret = 0 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; -template struct functor_is_product_like > { enum { ret = 1 }; }; - - -/** \internal - * \brief Template functor to add a scalar to a fixed other one - * \sa class CwiseUnaryOp, Array::operator+ - */ -/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */ -template -struct scalar_add_op { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { } - inline scalar_add_op(const Scalar& other) : m_other(other) { } - inline Scalar operator() (const Scalar& a) const { return a + m_other; } - inline const Packet packetOp(const Packet& a) const - { return internal::padd(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = packet_traits::HasAdd }; }; - -/** \internal - * \brief Template functor to compute the square root of a scalar - * \sa class CwiseUnaryOp, Cwise::sqrt() - */ -template struct scalar_sqrt_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op) - inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); } -}; -template -struct functor_traits > -{ enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasSqrt - }; -}; - -/** \internal - * \brief Template functor to compute the cosine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::cos() - */ -template struct scalar_cos_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op) - inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pcos(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasCos - }; -}; - -/** \internal - * \brief Template functor to compute the sine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::sin() - */ -template struct scalar_sin_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op) - inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::psin(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasSin - }; -}; - - -/** \internal - * \brief Template functor to compute the tan of a scalar - * \sa class CwiseUnaryOp, ArrayBase::tan() - */ -template struct scalar_tan_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op) - inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::ptan(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasTan - }; -}; - -/** \internal - * \brief Template functor to compute the arc cosine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::acos() - */ -template struct scalar_acos_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op) - inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pacos(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasACos - }; -}; - -/** \internal - * \brief Template functor to compute the arc sine of a scalar - * \sa class CwiseUnaryOp, ArrayBase::asin() - */ -template struct scalar_asin_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op) - inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); } - typedef typename packet_traits::type Packet; - inline Packet packetOp(const Packet& a) const { return internal::pasin(a); } -}; -template -struct functor_traits > -{ - enum { - Cost = 5 * NumTraits::MulCost, - PacketAccess = packet_traits::HasASin - }; -}; - -/** \internal - * \brief Template functor to raise a scalar to a power - * \sa class CwiseUnaryOp, Cwise::pow - */ -template -struct scalar_pow_op { - // FIXME default copy constructors seems bugged with std::complex<> - inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { } - inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {} - inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); } - const Scalar m_exponent; -}; -template -struct functor_traits > -{ enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false }; }; - -/** \internal - * \brief Template functor to compute the quotient between a scalar and array entries. - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_mult_op { - scalar_inverse_mult_op(const Scalar& other) : m_other(other) {} - inline Scalar operator() (const Scalar& a) const { return m_other / a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(m_other),a); } - Scalar m_other; -}; - -/** \internal - * \brief Template functor to compute the inverse of a scalar - * \sa class CwiseUnaryOp, Cwise::inverse() - */ -template -struct scalar_inverse_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op) - inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pdiv(pset1(Scalar(1)),a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; - -/** \internal - * \brief Template functor to compute the square of a scalar - * \sa class CwiseUnaryOp, Cwise::square() - */ -template -struct scalar_square_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op) - inline Scalar operator() (const Scalar& a) const { return a*a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pmul(a,a); } -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -/** \internal - * \brief Template functor to compute the cube of a scalar - * \sa class CwiseUnaryOp, Cwise::cube() - */ -template -struct scalar_cube_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op) - inline Scalar operator() (const Scalar& a) const { return a*a*a; } - template - inline const Packet packetOp(const Packet& a) const - { return internal::pmul(a,pmul(a,a)); } -}; -template -struct functor_traits > -{ enum { Cost = 2*NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -// default functor traits for STL functors: - -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = NumTraits::AddCost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1, PacketAccess = false }; }; - -#if(__cplusplus < 201103L) -// std::binder* are deprecated since c++11 and will be removed in c++17 -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost, PacketAccess = false }; }; -#endif - -template -struct functor_traits > -{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; - -#ifdef EIGEN_STDEXT_SUPPORT - -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > > -{ enum { Cost = 0, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; - -template -struct functor_traits > -{ enum { Cost = functor_traits::Cost + functor_traits::Cost + functor_traits::Cost, PacketAccess = false }; }; - -#endif // EIGEN_STDEXT_SUPPORT - -// allow to add new functors and specializations of functor_traits from outside Eigen. -// this macro is really needed because functor_traits must be specialized after it is declared but before it is used... -#ifdef EIGEN_FUNCTORS_PLUGIN -#include EIGEN_FUNCTORS_PLUGIN -#endif - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_FUNCTORS_H diff --git a/external/eigen3/Eigen/src/Core/Fuzzy.h b/external/eigen3/Eigen/src/Core/Fuzzy.h index fe63bd2..3e403a0 100644 --- a/external/eigen3/Eigen/src/Core/Fuzzy.h +++ b/external/eigen3/Eigen/src/Core/Fuzzy.h @@ -19,18 +19,19 @@ namespace internal template::IsInteger> struct isApprox_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { - using std::min; - typename internal::nested::type nested(x); - typename internal::nested::type otherNested(y); - return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); + typename internal::nested_eval::type nested(x); + typename internal::nested_eval::type otherNested(y); + return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); } }; template struct isApprox_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&) { return x.matrix() == y.matrix(); @@ -40,6 +41,7 @@ struct isApprox_selector template::IsInteger> struct isMuchSmallerThan_object_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum(); @@ -49,6 +51,7 @@ struct isMuchSmallerThan_object_selector template struct isMuchSmallerThan_object_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&) { return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); @@ -58,6 +61,7 @@ struct isMuchSmallerThan_object_selector template::IsInteger> struct isMuchSmallerThan_scalar_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec) { return x.cwiseAbs2().sum() <= numext::abs2(prec * y); @@ -67,6 +71,7 @@ struct isMuchSmallerThan_scalar_selector template struct isMuchSmallerThan_scalar_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&) { return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); diff --git a/external/eigen3/Eigen/src/Core/GeneralProduct.h b/external/eigen3/Eigen/src/Core/GeneralProduct.h index 5744eb7..0f16cd8 100644 --- a/external/eigen3/Eigen/src/Core/GeneralProduct.h +++ b/external/eigen3/Eigen/src/Core/GeneralProduct.h @@ -11,29 +11,7 @@ #ifndef EIGEN_GENERAL_PRODUCT_H #define EIGEN_GENERAL_PRODUCT_H -namespace Eigen { - -/** \class GeneralProduct - * \ingroup Core_Module - * - * \brief Expression of the product of two general matrices or vectors - * - * \param LhsNested the type used to store the left-hand side - * \param RhsNested the type used to store the right-hand side - * \param ProductMode the type of the product - * - * This class represents an expression of the product of two general matrices. - * We call a general matrix, a dense matrix with full storage. For instance, - * This excludes triangular, selfadjoint, and sparse matrices. - * It is the return type of the operator* between general matrices. Its template - * arguments are determined automatically by ProductReturnType. Therefore, - * GeneralProduct should never be used direclty. To determine the result type of a - * function which involves a matrix product, use ProductReturnType::Type. - * - * \sa ProductReturnType, MatrixBase::operator*(const MatrixBase&) - */ -template::value> -class GeneralProduct; +namespace Eigen { enum { Large = 2, @@ -47,7 +25,8 @@ template struct product_type_selector; template struct product_size_category { enum { is_large = MaxSize == Dynamic || - Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD, + Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || + (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), value = is_large ? Large : Size == 1 ? 1 : Small @@ -59,15 +38,14 @@ template struct product_type typedef typename remove_all::type _Lhs; typedef typename remove_all::type _Rhs; enum { - MaxRows = _Lhs::MaxRowsAtCompileTime, - Rows = _Lhs::RowsAtCompileTime, - MaxCols = _Rhs::MaxColsAtCompileTime, - Cols = _Rhs::ColsAtCompileTime, - MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::MaxColsAtCompileTime, - _Rhs::MaxRowsAtCompileTime), - Depth = EIGEN_SIZE_MIN_PREFER_FIXED(_Lhs::ColsAtCompileTime, - _Rhs::RowsAtCompileTime), - LargeThreshold = EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD + MaxRows = traits<_Lhs>::MaxRowsAtCompileTime, + Rows = traits<_Lhs>::RowsAtCompileTime, + MaxCols = traits<_Rhs>::MaxColsAtCompileTime, + Cols = traits<_Rhs>::ColsAtCompileTime, + MaxDepth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::MaxColsAtCompileTime, + traits<_Rhs>::MaxRowsAtCompileTime), + Depth = EIGEN_SIZE_MIN_PREFER_FIXED(traits<_Lhs>::ColsAtCompileTime, + traits<_Rhs>::RowsAtCompileTime) }; // the splitting into different lines of code here, introducing the _select enums and the typedef below, @@ -82,7 +60,8 @@ private: public: enum { - value = selector::ret + value = selector::ret, + ret = selector::ret }; #ifdef EIGEN_DEBUG_PRODUCT static void debug() @@ -98,12 +77,13 @@ public: #endif }; - /* The following allows to select the kind of product at compile time * based on the three dimensions of the product. * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ // FIXME I'm not sure the current mapping is the ideal one. template struct product_type_selector { enum { ret = OuterProduct }; }; +template struct product_type_selector { enum { ret = LazyCoeffBasedProductMode }; }; +template struct product_type_selector<1, N, 1> { enum { ret = LazyCoeffBasedProductMode }; }; template struct product_type_selector<1, 1, Depth> { enum { ret = InnerProduct }; }; template<> struct product_type_selector<1, 1, 1> { enum { ret = InnerProduct }; }; template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; @@ -122,60 +102,12 @@ template<> struct product_type_selector { enum template<> struct product_type_selector { enum { ret = GemmProduct }; }; template<> struct product_type_selector { enum { ret = GemmProduct }; }; template<> struct product_type_selector { enum { ret = GemmProduct }; }; -template<> struct product_type_selector { enum { ret = GemmProduct }; }; -template<> struct product_type_selector { enum { ret = GemmProduct }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; +template<> struct product_type_selector { enum { ret = CoeffBasedProductMode }; }; template<> struct product_type_selector { enum { ret = GemmProduct }; }; } // end namespace internal -/** \class ProductReturnType - * \ingroup Core_Module - * - * \brief Helper class to get the correct and optimized returned type of operator* - * - * \param Lhs the type of the left-hand side - * \param Rhs the type of the right-hand side - * \param ProductMode the type of the product (determined automatically by internal::product_mode) - * - * This class defines the typename Type representing the optimized product expression - * between two matrix expressions. In practice, using ProductReturnType::Type - * is the recommended way to define the result type of a function returning an expression - * which involve a matrix product. The class Product should never be - * used directly. - * - * \sa class Product, MatrixBase::operator*(const MatrixBase&) - */ -template -struct ProductReturnType -{ - // TODO use the nested type to reduce instanciations ???? -// typedef typename internal::nested::type LhsNested; -// typedef typename internal::nested::type RhsNested; - - typedef GeneralProduct Type; -}; - -template -struct ProductReturnType -{ - typedef typename internal::nested::type >::type LhsNested; - typedef typename internal::nested::type >::type RhsNested; - typedef CoeffBasedProduct Type; -}; - -template -struct ProductReturnType -{ - typedef typename internal::nested::type >::type LhsNested; - typedef typename internal::nested::type >::type RhsNested; - typedef CoeffBasedProduct Type; -}; - -// this is a workaround for sun CC -template -struct LazyProductReturnType : public ProductReturnType -{}; - /*********************************************************************** * Implementation of Inner Vector Vector Product ***********************************************************************/ @@ -187,114 +119,10 @@ struct LazyProductReturnType : public ProductReturnType with: operator=(Scalar x); -namespace internal { - -template -struct traits > - : traits::ReturnType,1,1> > -{}; - -} - -template -class GeneralProduct - : internal::no_assignment_operator, - public Matrix::ReturnType,1,1> -{ - typedef Matrix::ReturnType,1,1> Base; - public: - GeneralProduct(const Lhs& lhs, const Rhs& rhs) - { - Base::coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); - } - - /** Convertion to scalar */ - operator const typename Base::Scalar() const { - return Base::coeff(0,0); - } -}; - /*********************************************************************** * Implementation of Outer Vector Vector Product ***********************************************************************/ -namespace internal { - -// Column major -template -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&) -{ - typedef typename Dest::Index Index; - // FIXME make sure lhs is sequentially stored - // FIXME not very good if rhs is real and lhs complex while alpha is real too - const Index cols = dest.cols(); - for (Index j=0; j -EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) { - typedef typename Dest::Index Index; - // FIXME make sure rhs is sequentially stored - // FIXME not very good if lhs is real and rhs complex while alpha is real too - const Index rows = dest.rows(); - for (Index i=0; i -struct traits > - : traits, Lhs, Rhs> > -{}; - -} - -template -class GeneralProduct - : public ProductBase, Lhs, Rhs> -{ - template struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; - - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - GeneralProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) - { - } - - struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; - struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; - struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; - struct adds { - Scalar m_scale; - adds(const Scalar& s) : m_scale(s) {} - template void operator()(const Dst& dst, const Src& src) const { - dst.const_cast_derived() += m_scale * src; - } - }; - - template - inline void evalTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, set(), is_row_major()); - } - - template - inline void addTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, add(), is_row_major()); - } - - template - inline void subTo(Dest& dest) const { - internal::outer_product_selector_run(*this, dest, sub(), is_row_major()); - } - - template void scaleAndAddTo(Dest& dest, const Scalar& alpha) const - { - internal::outer_product_selector_run(*this, dest, adds(alpha), is_row_major()); - } -}; - /*********************************************************************** * Implementation of General Matrix Vector Product ***********************************************************************/ @@ -308,60 +136,13 @@ class GeneralProduct */ namespace internal { -template -struct traits > - : traits, Lhs, Rhs> > -{}; - template -struct gemv_selector; +struct gemv_dense_selector; } // end namespace internal -template -class GeneralProduct - : public ProductBase, Lhs, Rhs> -{ - public: - EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct) - - typedef typename Lhs::Scalar LhsScalar; - typedef typename Rhs::Scalar RhsScalar; - - GeneralProduct(const Lhs& a_lhs, const Rhs& a_rhs) : Base(a_lhs,a_rhs) - { -// EIGEN_STATIC_ASSERT((internal::is_same::value), -// YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - } - - enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; - typedef typename internal::conditional::type MatrixType; - - template void scaleAndAddTo(Dest& dst, const Scalar& alpha) const - { - eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols()); - internal::gemv_selector::HasUsableDirectAccess)>::run(*this, dst, alpha); - } -}; - namespace internal { -// The vector is on the left => transposition -template -struct gemv_selector -{ - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) - { - Transpose destT(dest); - enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; - gemv_selector - ::run(GeneralProduct,Transpose, GemvProduct> - (prod.rhs().transpose(), prod.lhs().transpose()), destT, alpha); - } -}; - template struct gemv_static_vector_if; template @@ -379,46 +160,61 @@ struct gemv_static_vector_if template struct gemv_static_vector_if { - #if EIGEN_ALIGN_STATICALLY - internal::plain_array m_data; - EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } - #else - // Some architectures cannot align on the stack, - // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. enum { ForceAlignment = internal::packet_traits::Vectorizable, PacketSize = internal::packet_traits::size }; - internal::plain_array m_data; + #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0 + internal::plain_array m_data; + EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; } + #else + // Some architectures cannot align on the stack, + // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. + internal::plain_array m_data; EIGEN_STRONG_INLINE Scalar* data() { return ForceAlignment - ? reinterpret_cast((reinterpret_cast(m_data.array) & ~(size_t(15))) + 16) + ? reinterpret_cast((internal::UIntPtr(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES) : m_data.array; } #endif }; -template<> struct gemv_selector +// The vector is on the left => transposition +template +struct gemv_dense_selector +{ + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) + { + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_dense_selector + ::run(rhs.transpose(), lhs.transpose(), destT, alpha); + } +}; + +template<> struct gemv_dense_selector { - template - static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static inline void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::Index Index; - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::RealScalar RealScalar; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; - typedef Map, Aligned> MappedDest; - - ActualLhsType actualLhs = LhsBlasTraits::extract(prod.lhs()); - ActualRhsType actualRhs = RhsBlasTraits::extract(prod.rhs()); - - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + typedef typename Dest::RealScalar RealScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + + typedef Map, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits::size)> MappedDest; + + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); // make sure Dest is a compile-time vector type (bug 1166) typedef typename conditional::type ActualDest; @@ -428,80 +224,97 @@ template<> struct gemv_selector // on, the other hand it is good for the cache to pack the vector anyways... EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime==1), ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), - MightCannotUseDest = (ActualDest::InnerStrideAtCompileTime!=1) || ComplexByReal + MightCannotUseDest = (!EvalToDestAtCompileTime) || ComplexByReal }; - gemv_static_vector_if static_dest; - - bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); - bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; - + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; RhsScalar compatibleAlpha = get_factor::run(actualAlpha); - ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), - evalToDest ? dest.data() : static_dest.data()); - - if(!evalToDest) + if(!MightCannotUseDest) { - #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = dest.size(); - EIGEN_DENSE_STORAGE_CTOR_PLUGIN - #endif - if(!alphaIsCompatible) + // shortcut if we are sure to be able to use dest directly, + // this ease the compiler to generate cleaner and more optimzized code for most common cases + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + dest.data(), 1, + compatibleAlpha); + } + else + { + gemv_static_vector_if static_dest; + + const bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0)); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + + ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(), + evalToDest ? dest.data() : static_dest.data()); + + if(!evalToDest) { - MappedDest(actualDestPtr, dest.size()).setZero(); - compatibleAlpha = RhsScalar(1); + #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + Index size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN + #endif + if(!alphaIsCompatible) + { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } + else + MappedDest(actualDestPtr, dest.size()) = dest; } - else - MappedDest(actualDestPtr, dest.size()) = dest; - } - general_matrix_vector_product - ::run( - actualLhs.rows(), actualLhs.cols(), - actualLhs.data(), actualLhs.outerStride(), - actualRhs.data(), actualRhs.innerStride(), - actualDestPtr, 1, - compatibleAlpha); + general_matrix_vector_product + ::run( + actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhs.data(), actualRhs.innerStride()), + actualDestPtr, 1, + compatibleAlpha); - if (!evalToDest) - { - if(!alphaIsCompatible) - dest += actualAlpha * MappedDest(actualDestPtr, dest.size()); - else - dest = MappedDest(actualDestPtr, dest.size()); + if (!evalToDest) + { + if(!alphaIsCompatible) + dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } } } }; -template<> struct gemv_selector +template<> struct gemv_dense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename ProductType::LhsScalar LhsScalar; - typedef typename ProductType::RhsScalar RhsScalar; - typedef typename ProductType::Scalar ResScalar; - typedef typename ProductType::Index Index; - typedef typename ProductType::ActualLhsType ActualLhsType; - typedef typename ProductType::ActualRhsType ActualRhsType; - typedef typename ProductType::_ActualRhsType _ActualRhsType; - typedef typename ProductType::LhsBlasTraits LhsBlasTraits; - typedef typename ProductType::RhsBlasTraits RhsBlasTraits; - - typename add_const::type actualLhs = LhsBlasTraits::extract(prod.lhs()); - typename add_const::type actualRhs = RhsBlasTraits::extract(prod.rhs()); - - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs()) - * RhsBlasTraits::extractScalarFactor(prod.rhs()); + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef typename internal::remove_all::type ActualRhsTypeCleaned; + + typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); + typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) + * RhsBlasTraits::extractScalarFactor(rhs); enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 // on, the other hand it is good for the cache to pack the vector anyways... - DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1 + DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 }; - gemv_static_vector_if static_rhs; + gemv_static_vector_if static_rhs; ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(), DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); @@ -509,45 +322,48 @@ template<> struct gemv_selector if(!DirectlyUseRhs) { #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - int size = actualRhs.size(); + Index size = actualRhs.size(); EIGEN_DENSE_STORAGE_CTOR_PLUGIN #endif - Map(actualRhsPtr, actualRhs.size()) = actualRhs; + Map(actualRhsPtr, actualRhs.size()) = actualRhs; } + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; general_matrix_vector_product - ::run( + ::run( actualLhs.rows(), actualLhs.cols(), - actualLhs.data(), actualLhs.outerStride(), - actualRhsPtr, 1, + LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhsPtr, 1), dest.data(), dest.col(0).innerStride(), //NOTE if dest is not a vector at compile-time, then dest.innerStride() might be wrong. (bug 1166) actualAlpha); } }; -template<> struct gemv_selector +template<> struct gemv_dense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename Dest::Index Index; - // TODO makes sure dest is sequentially stored in memory, otherwise use a temp - const Index size = prod.rhs().rows(); + EIGEN_STATIC_ASSERT((!nested_eval::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); + // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, otherwise use a temp + typename nested_eval::type actual_rhs(rhs); + const Index size = rhs.rows(); for(Index k=0; k struct gemv_selector +template<> struct gemv_dense_selector { - template - static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha) + template + static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha) { - typedef typename Dest::Index Index; - // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp - const Index rows = prod.rows(); + EIGEN_STATIC_ASSERT((!nested_eval::Evaluate),EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); + typename nested_eval::type actual_rhs(rhs); + const Index rows = dest.rows(); for(Index i=0; i struct gemv_selector * * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() */ +#ifndef __CUDACC__ + template template -inline const typename ProductReturnType::Type +inline const Product MatrixBase::operator*(const MatrixBase &other) const { // A note regarding the function declaration: In MSVC, this function will sometimes @@ -590,9 +408,12 @@ MatrixBase::operator*(const MatrixBase &other) const #ifdef EIGEN_DEBUG_PRODUCT internal::product_type::debug(); #endif - return typename ProductReturnType::Type(derived(), other.derived()); + + return Product(derived(), other.derived()); } +#endif // __CUDACC__ + /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be @@ -606,7 +427,7 @@ MatrixBase::operator*(const MatrixBase &other) const */ template template -const typename LazyProductReturnType::Type +const Product MatrixBase::lazyProduct(const MatrixBase &other) const { enum { @@ -625,7 +446,7 @@ MatrixBase::lazyProduct(const MatrixBase &other) const INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) - return typename LazyProductReturnType::Type(derived(), other.derived()); + return Product(derived(), other.derived()); } } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/GenericPacketMath.h b/external/eigen3/Eigen/src/Core/GenericPacketMath.h index c6e93bb..029f8ac 100644 --- a/external/eigen3/Eigen/src/Core/GenericPacketMath.h +++ b/external/eigen3/Eigen/src/Core/GenericPacketMath.h @@ -42,21 +42,28 @@ namespace internal { struct default_packet_traits { enum { + HasHalfPacket = 0, + HasAdd = 1, HasSub = 1, HasMul = 1, HasNegate = 1, HasAbs = 1, + HasArg = 0, HasAbs2 = 1, HasMin = 1, HasMax = 1, HasConj = 1, HasSetLinear = 1, + HasBlend = 0, HasDiv = 0, HasSqrt = 0, + HasRsqrt = 0, HasExp = 0, HasLog = 0, + HasLog1p = 0, + HasLog10 = 0, HasPow = 0, HasSin = 0, @@ -64,17 +71,37 @@ struct default_packet_traits HasTan = 0, HasASin = 0, HasACos = 0, - HasATan = 0 + HasATan = 0, + HasSinh = 0, + HasCosh = 0, + HasTanh = 0, + HasLGamma = 0, + HasDiGamma = 0, + HasZeta = 0, + HasPolygamma = 0, + HasErf = 0, + HasErfc = 0, + HasIGamma = 0, + HasIGammac = 0, + HasBetaInc = 0, + + HasRound = 0, + HasFloor = 0, + HasCeil = 0, + + HasSign = 0 }; }; template struct packet_traits : default_packet_traits { typedef T type; + typedef T half; enum { Vectorizable = 0, size = 1, - AlignedOnScalar = 0 + AlignedOnScalar = 0, + HasHalfPacket = 0 }; enum { HasAdd = 0, @@ -90,135 +117,239 @@ template struct packet_traits : default_packet_traits }; }; +template struct packet_traits : packet_traits { }; + +template struct type_casting_traits { + enum { + VectorizedCast = 0, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + + +/** \internal \returns static_cast(a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a) { + return static_cast(a); +} +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/) { + return static_cast(a); +} + +template +EIGEN_DEVICE_FUNC inline TgtPacket +pcast(const SrcPacket& a, const SrcPacket& /*b*/, const SrcPacket& /*c*/, const SrcPacket& /*d*/) { + return static_cast(a); +} + /** \internal \returns a + b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) { return a+b; } /** \internal \returns a - b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) { return a-b; } /** \internal \returns -a (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) { return -a; } /** \internal \returns conj(a) (coeff-wise) */ -template inline Packet + +template EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) { return numext::conj(a); } /** \internal \returns a * b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) { return a*b; } /** \internal \returns a / b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) { return a/b; } /** \internal \returns the min of \a a and \a b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, - const Packet& b) { using std::min; return (min)(a, b); } + const Packet& b) { return numext::mini(a, b); } /** \internal \returns the max of \a a and \a b (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, - const Packet& b) { using std::max; return (max)(a, b); } + const Packet& b) { return numext::maxi(a, b); } /** \internal \returns the absolute value of \a a */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) { using std::abs; return abs(a); } +/** \internal \returns the phase angle of \a a */ +template EIGEN_DEVICE_FUNC inline Packet +parg(const Packet& a) { using numext::arg; return arg(a); } + /** \internal \returns the bitwise and of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) { return a & b; } /** \internal \returns the bitwise or of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) { return a | b; } /** \internal \returns the bitwise xor of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) { return a ^ b; } /** \internal \returns the bitwise andnot of \a a and \a b */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return a & (!b); } /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits::type* from) { return *from; } /** \internal \returns a packet version of \a *from, (un-aligned load) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits::type* from) { return *from; } +/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ +template EIGEN_DEVICE_FUNC inline Packet +pset1(const typename unpacket_traits::type& a) { return a; } + +/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */ +template EIGEN_DEVICE_FUNC inline Packet +pload1(const typename unpacket_traits::type *a) { return pset1(*a); } + /** \internal \returns a packet with elements of \a *from duplicated. - * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and - * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]} + * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and + * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} * Currently, this function is only used for scalar * complex products. - */ -template inline Packet + */ +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits::type* from) { return *from; } -/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ -template inline Packet -pset1(const typename unpacket_traits::type& a) { return a; } +/** \internal \returns a packet with elements of \a *from quadrupled. + * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and + * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]} + * Currently, this function is only used in matrix products. + * For packet-size smaller or equal to 4, this function is equivalent to pload1 + */ +template EIGEN_DEVICE_FUNC inline Packet +ploadquad(const typename unpacket_traits::type* from) +{ return pload1(from); } + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * a2 = pload1(a+2); + * a3 = pload1(a+3); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast2 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast4(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1, Packet& a2, Packet& a3) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); + a2 = pload1(a+2); + a3 = pload1(a+3); +} + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast4 + */ +template EIGEN_DEVICE_FUNC +inline void pbroadcast2(const typename unpacket_traits::type *a, + Packet& a0, Packet& a1) +{ + a0 = pload1(a+0); + a1 = pload1(a+1); +} /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ -template inline typename packet_traits::type -plset(const Scalar& a) { return a; } +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet +plset(const typename unpacket_traits::type& a) { return a; } /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ -template inline void pstore(Scalar* to, const Packet& from) +template EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) { (*to) = from; } /** \internal copy the packet \a from to \a *to, (un-aligned store) */ -template inline void pstoreu(Scalar* to, const Packet& from) -{ (*to) = from; } +template EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) +{ (*to) = from; } + + template EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) + { return ploadu(from); } + + template EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) + { pstore(to, from); } /** \internal tries to do cache prefetching of \a addr */ -template inline void prefetch(const Scalar* addr) +template EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) { -#if (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC) +#ifdef __CUDA_ARCH__ +#if defined(__LP64__) + // 64-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); +#else + // 32-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); +#endif +#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC) __builtin_prefetch(addr); #endif } /** \internal \returns the first element of a packet */ -template inline typename unpacket_traits::type pfirst(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type pfirst(const Packet& a) { return a; } /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet preduxp(const Packet* vecs) { return vecs[0]; } /** \internal \returns the sum of the elements of \a a*/ -template inline typename unpacket_traits::type predux(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux(const Packet& a) +{ return a; } + +/** \internal \returns the sum of the elements of \a a by block of 4 elements. + * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7} + * For packet-size smaller or equal to 4, this boils down to a noop. + */ +template EIGEN_DEVICE_FUNC inline +typename conditional<(unpacket_traits::size%8)==0,typename unpacket_traits::half,Packet>::type +predux_downto4(const Packet& a) { return a; } /** \internal \returns the product of the elements of \a a*/ -template inline typename unpacket_traits::type predux_mul(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_mul(const Packet& a) { return a; } /** \internal \returns the min of the elements of \a a*/ -template inline typename unpacket_traits::type predux_min(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) { return a; } /** \internal \returns the max of the elements of \a a*/ -template inline typename unpacket_traits::type predux_max(const Packet& a) +template EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) { return a; } /** \internal \returns the reversed elements of \a a*/ -template inline Packet preverse(const Packet& a) +template EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) { return a; } - /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ -template inline Packet pcplxflip(const Packet& a) +template EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) { // FIXME: uncomment the following in case we drop the internal imag and real functions. // using std::imag; @@ -250,6 +381,22 @@ Packet pasin(const Packet& a) { using std::asin; return asin(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) { using std::acos; return acos(a); } +/** \internal \returns the arc tangent of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet patan(const Packet& a) { using std::atan; return atan(a); } + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet psinh(const Packet& a) { using std::sinh; return sinh(a); } + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pcosh(const Packet& a) { using std::cosh; return cosh(a); } + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } + /** \internal \returns the exp of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { using std::exp; return exp(a); } @@ -258,10 +405,36 @@ Packet pexp(const Packet& a) { using std::exp; return exp(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } +/** \internal \returns the log1p of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog1p(const Packet& a) { return numext::log1p(a); } + +/** \internal \returns the log10 of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet plog10(const Packet& a) { using std::log10; return log10(a); } + /** \internal \returns the square-root of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); } +/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet prsqrt(const Packet& a) { + return pdiv(pset1(1), psqrt(a)); +} + +/** \internal \returns the rounded value of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pround(const Packet& a) { using numext::round; return round(a); } + +/** \internal \returns the floor of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pfloor(const Packet& a) { using numext::floor; return floor(a); } + +/** \internal \returns the ceil of \a a (coeff-wise) */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); } + /*************************************************************************** * The following functions might not have to be overwritten for vectorized types ***************************************************************************/ @@ -275,34 +448,45 @@ inline void pstore1(typename unpacket_traits::type* to, const typename u } /** \internal \returns a * b + c (coeff-wise) */ -template inline Packet +template EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { return padd(pmul(a, b),c); } /** \internal \returns a packet version of \a *from. - * If LoadMode equals #Aligned, \a from must be 16 bytes aligned */ -template -inline Packet ploadt(const typename unpacket_traits::type* from) + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits::type* from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits::alignment) return pload(from); else return ploadu(from); } /** \internal copy the packet \a from to \a *to. - * If StoreMode equals #Aligned, \a to must be 16 bytes aligned */ -template -inline void pstoret(Scalar* to, const Packet& from) + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) { - if(LoadMode == Aligned) + if(Alignment >= unpacket_traits::alignment) pstore(to, from); else pstoreu(to, from); } +/** \internal \returns a packet version of \a *from. + * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the + * hardware if available to speedup the loading of data that won't be modified + * by the current computation. + */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits::type* from) +{ + return ploadt(from); +} + /** \internal default implementation of palign() allowing partial specialization */ template struct palign_impl @@ -336,15 +520,74 @@ inline void palign(PacketType& first, const PacketType& second) * Fast complex products (GCC generates a function call which is very slow) ***************************************************************************/ +// Eigen+CUDA does not support complexes. +#ifndef __CUDACC__ + template<> inline std::complex pmul(const std::complex& a, const std::complex& b) { return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } template<> inline std::complex pmul(const std::complex& a, const std::complex& b) { return std::complex(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); } +#endif + + +/*************************************************************************** + * PacketBlock, that is a collection of N packets where the number of words + * in the packet is a multiple of N. +***************************************************************************/ +template ::size> struct PacketBlock { + Packet packet[N]; +}; + +template EIGEN_DEVICE_FUNC inline void +ptranspose(PacketBlock& /*kernel*/) { + // Nothing to do in the scalar case, i.e. a 1x1 matrix. +} + +/*************************************************************************** + * Selector, i.e. vector of N boolean values used to select (i.e. blend) + * words from 2 packets. +***************************************************************************/ +template struct Selector { + bool select[N]; +}; + +template EIGEN_DEVICE_FUNC inline Packet +pblend(const Selector::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) { + return ifPacket.select[0] ? thenPacket : elsePacket; +} + +/** \internal \returns \a a with the first coefficient replaced by the scalar b */ +template EIGEN_DEVICE_FUNC inline Packet +pinsertfirst(const Packet& a, typename unpacket_traits::type b) +{ + // Default implementation based on pblend. + // It must be specialized for higher performance. + Selector::size> mask; + mask.select[0] = true; + // This for loop should be optimized away by the compiler. + for(Index i=1; i::size; ++i) + mask.select[i] = false; + return pblend(mask, pset1(b), a); +} + +/** \internal \returns \a a with the last coefficient replaced by the scalar b */ +template EIGEN_DEVICE_FUNC inline Packet +pinsertlast(const Packet& a, typename unpacket_traits::type b) +{ + // Default implementation based on pblend. + // It must be specialized for higher performance. + Selector::size> mask; + // This for loop should be optimized away by the compiler. + for(Index i=0; i::size-1; ++i) + mask.select[i] = false; + mask.select[unpacket_traits::size-1] = true; + return pblend(mask, pset1(b), a); +} + } // end namespace internal } // end namespace Eigen #endif // EIGEN_GENERIC_PACKET_MATH_H - diff --git a/external/eigen3/Eigen/src/Core/GlobalFunctions.h b/external/eigen3/Eigen/src/Core/GlobalFunctions.h index 2acf977..769dc25 100644 --- a/external/eigen3/Eigen/src/Core/GlobalFunctions.h +++ b/external/eigen3/Eigen/src/Core/GlobalFunctions.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2010-2012 Gael Guennebaud +// Copyright (C) 2010-2016 Gael Guennebaud // Copyright (C) 2010 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -11,13 +11,30 @@ #ifndef EIGEN_GLOBAL_FUNCTIONS_H #define EIGEN_GLOBAL_FUNCTIONS_H -#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR) \ +#ifdef EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \ + /** \returns an expression of the coefficient-wise DOC_OP of \a x + + DOC_DETAILS + + \sa Math functions, class CwiseUnaryOp + */ \ + template \ + inline const Eigen::CwiseUnaryOp, const Derived> \ + NAME(const Eigen::ArrayBase& x); + +#else + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME,FUNCTOR,DOC_OP,DOC_DETAILS) \ template \ inline const Eigen::CwiseUnaryOp, const Derived> \ - NAME(const Eigen::ArrayBase& x) { \ - return x.derived(); \ + (NAME)(const Eigen::ArrayBase& x) { \ + return Eigen::CwiseUnaryOp, const Derived>(x.derived()); \ } +#endif // EIGEN_PARSED_BY_DOXYGEN + #define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME,FUNCTOR) \ \ template \ @@ -30,55 +47,133 @@ { \ static inline typename NAME##_retval >::type run(const Eigen::ArrayBase& x) \ { \ - return x.derived(); \ + return typename NAME##_retval >::type(x.derived()); \ } \ }; - namespace Eigen { - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real,scalar_real_op,real part,\sa ArrayBase::real) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag,scalar_imag_op,imaginary part,\sa ArrayBase::imag) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj,scalar_conjugate_op,complex conjugate,\sa ArrayBase::conjugate) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse,scalar_inverse_op,inverse,\sa ArrayBase::inverse) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin,scalar_sin_op,sine,\sa ArrayBase::sin) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos,scalar_cos_op,cosine,\sa ArrayBase::cos) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan,scalar_tan_op,tangent,\sa ArrayBase::tan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan,scalar_atan_op,arc-tangent,\sa ArrayBase::atan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin,scalar_asin_op,arc-sine,\sa ArrayBase::asin) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos,scalar_acos_op,arc-consine,\sa ArrayBase::acos) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh,scalar_sinh_op,hyperbolic sine,\sa ArrayBase::sinh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op,hyperbolic cosine,\sa ArrayBase::cosh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op,hyperbolic tangent,\sa ArrayBase::tanh) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op,natural logarithm of the gamma function,\sa ArrayBase::lgamma) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs,scalar_abs_op,absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2,scalar_abs2_op,squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg,scalar_arg_op,complex argument,\sa ArrayBase::arg) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt,scalar_sqrt_op,square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt,scalar_rsqrt_op,reciprocal square root,\sa ArrayBase::rsqrt) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square,scalar_square_op,square (power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube,scalar_cube_op,cube (power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round,scalar_round_op,nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(floor,scalar_floor_op,nearest integer not greater than the giben value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ceil,scalar_ceil_op,nearest integer not less than the giben value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isnan,scalar_isnan_op,not-a-number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isinf,scalar_isinf_op,infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite,scalar_isfinite_op,finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite) + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign) + /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent. + * + * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar). + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Derived,Constant > + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,typename Derived::Scalar,ScalarExponent), + const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type + pow(const Eigen::ArrayBase& x, const ScalarExponent& exponent) { + return x.derived().pow(exponent); + } + template - inline const Eigen::CwiseUnaryOp, const Derived> + inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow) pow(const Eigen::ArrayBase& x, const typename Derived::Scalar& exponent) { return x.derived().pow(exponent); } +#endif - template - inline const Eigen::CwiseBinaryOp, const Derived, const Derived> - pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) + /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power. + * + * Example: \include Cwise_array_power_array.cpp + * Output: \verbinclude Cwise_array_power_array.out + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ + template + inline const Eigen::CwiseBinaryOp, const Derived, const ExponentDerived> + pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) { - return Eigen::CwiseBinaryOp, const Derived, const Derived>( + return Eigen::CwiseBinaryOp, const Derived, const ExponentDerived>( x.derived(), exponents.derived() ); } - /** - * \brief Component-wise division of a scalar by array elements. - **/ - template - inline const Eigen::CwiseUnaryOp, const Derived> - operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase& a) + /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power between a scalar and an array of exponents. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar). + * + * Example: \include Cwise_scalar_power_array.cpp + * Output: \verbinclude Cwise_scalar_power_array.out + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN + template + inline const CwiseBinaryOp,Constant,Derived> + pow(const Scalar& x,const Eigen::ArrayBase& x); +#else + template + inline typename internal::enable_if< !(internal::is_same::value) && EIGEN_SCALAR_BINARY_SUPPORTED(pow,Scalar,typename Derived::Scalar), + const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type + pow(const Scalar& x, const Eigen::ArrayBase& exponents) { - return Eigen::CwiseUnaryOp, const Derived>( - a.derived(), - Eigen::internal::scalar_inverse_mult_op(s) - ); + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); } + template + inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow) + pow(const typename Derived::Scalar& x, const Eigen::ArrayBase& exponents) + { + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)( + typename internal::plain_constant_type::type(exponents.rows(), exponents.cols(), x), exponents.derived() ); + } +#endif + + namespace internal { EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real,scalar_real_op) diff --git a/external/eigen3/Eigen/src/Core/IO.h b/external/eigen3/Eigen/src/Core/IO.h index 8d4bc59..da7fd6c 100644 --- a/external/eigen3/Eigen/src/Core/IO.h +++ b/external/eigen3/Eigen/src/Core/IO.h @@ -49,7 +49,7 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& */ struct IOFormat { - /** Default contructor, see class IOFormat for the meaning of the parameters */ + /** Default constructor, see class IOFormat for the meaning of the parameters */ IOFormat(int _precision = StreamPrecision, int _flags = 0, const std::string& _coeffSeparator = " ", const std::string& _rowSeparator = "\n", const std::string& _rowPrefix="", const std::string& _rowSuffix="", @@ -57,6 +57,10 @@ struct IOFormat : matPrefix(_matPrefix), matSuffix(_matSuffix), rowPrefix(_rowPrefix), rowSuffix(_rowSuffix), rowSeparator(_rowSeparator), rowSpacer(""), coeffSeparator(_coeffSeparator), precision(_precision), flags(_flags) { + // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline + // don't add rowSpacer if columns are not to be aligned + if((flags & DontAlignCols)) + return; int i = int(matSuffix.length())-1; while (i>=0 && matSuffix[i]!='\n') { @@ -76,7 +80,7 @@ struct IOFormat * * \brief Pseudo expression providing matrix output with given format * - * \param ExpressionType the type of the object on which IO stream operations are performed + * \tparam ExpressionType the type of the object on which IO stream operations are performed * * This class represents an expression with stream operators controlled by a given IOFormat. * It is the return type of DenseBase::format() @@ -101,52 +105,24 @@ class WithFormat } protected: - const typename ExpressionType::Nested m_matrix; + typename ExpressionType::Nested m_matrix; IOFormat m_format; }; -/** \returns a WithFormat proxy object allowing to print a matrix the with given - * format \a fmt. - * - * See class IOFormat for some examples. - * - * \sa class IOFormat, class WithFormat - */ -template -inline const WithFormat -DenseBase::format(const IOFormat& fmt) const -{ - return WithFormat(derived(), fmt); -} - namespace internal { -template -struct significant_decimals_default_impl -{ - typedef typename NumTraits::Real RealScalar; - static inline int run() - { - using std::ceil; - using std::log; - return cast(ceil(-log(NumTraits::epsilon())/log(RealScalar(10)))); - } -}; - +// NOTE: This helper is kept for backward compatibility with previous code specializing +// this internal::significant_decimals_impl structure. In the future we should directly +// call digits10() which has been introduced in July 2016 in 3.3. template -struct significant_decimals_default_impl +struct significant_decimals_impl { static inline int run() { - return 0; + return NumTraits::digits10(); } }; -template -struct significant_decimals_impl - : significant_decimals_default_impl::IsInteger> -{}; - /** \internal * print the matrix \a _m to the output stream \a s using the output format \a fmt */ template @@ -160,7 +136,6 @@ std::ostream & print_matrix(std::ostream & s, const Derived& _m, const IOFormat& typename Derived::Nested m = _m; typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; Index width = 0; diff --git a/external/eigen3/Eigen/src/Core/Inverse.h b/external/eigen3/Eigen/src/Core/Inverse.h new file mode 100644 index 0000000..b76f043 --- /dev/null +++ b/external/eigen3/Eigen/src/Core/Inverse.h @@ -0,0 +1,118 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_H +#define EIGEN_INVERSE_H + +namespace Eigen { + +template class InverseImpl; + +namespace internal { + +template +struct traits > + : traits +{ + typedef typename XprType::PlainObject PlainObject; + typedef traits BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit + }; +}; + +} // end namespace internal + +/** \class Inverse + * + * \brief Expression of the inverse of another expression + * + * \tparam XprType the type of the expression we are taking the inverse + * + * This class represents an abstract expression of A.inverse() + * and most of the time this is the only way it is used. + * + */ +template +class Inverse : public InverseImpl::StorageKind> +{ +public: + typedef typename XprType::StorageIndex StorageIndex; + typedef typename XprType::PlainObject PlainObject; + typedef typename XprType::Scalar Scalar; + typedef typename internal::ref_selector::type XprTypeNested; + typedef typename internal::remove_all::type XprTypeNestedCleaned; + typedef typename internal::ref_selector::type Nested; + typedef typename internal::remove_all::type NestedExpression; + + explicit EIGEN_DEVICE_FUNC Inverse(const XprType &xpr) + : m_xpr(xpr) + {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + + EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } + +protected: + XprTypeNested m_xpr; +}; + +// Generic API dispatcher +template +class InverseImpl + : public internal::generic_xpr_base >::type +{ +public: + typedef typename internal::generic_xpr_base >::type Base; + typedef typename XprType::Scalar Scalar; +private: + + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +namespace internal { + +/** \internal + * \brief Default evaluator for Inverse expression. + * + * This default evaluator for Inverse expression simply evaluate the inverse into a temporary + * by a call to internal::call_assignment_no_alias. + * Therefore, inverse implementers only have to specialize Assignment, ...> for + * there own nested expression. + * + * \sa class Inverse + */ +template +struct unary_evaluator > + : public evaluator::PlainObject> +{ + typedef Inverse InverseType; + typedef typename InverseType::PlainObject PlainObject; + typedef evaluator Base; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + unary_evaluator(const InverseType& inv_xpr) + : m_result(inv_xpr.rows(), inv_xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + internal::call_assignment_no_alias(m_result, inv_xpr); + } + +protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_H diff --git a/external/eigen3/Eigen/src/Core/Map.h b/external/eigen3/Eigen/src/Core/Map.h index f804c89..06d1967 100644 --- a/external/eigen3/Eigen/src/Core/Map.h +++ b/external/eigen3/Eigen/src/Core/Map.h @@ -13,13 +13,35 @@ namespace Eigen { +namespace internal { +template +struct traits > + : public traits +{ + typedef traits TraitsBase; + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + Alignment = int(MapOptions)&int(AlignedMask), + Flags0 = TraitsBase::Flags & (~NestByRefBit), + Flags = is_lvalue::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) + }; +private: + enum { Options }; // Expressions don't have Options +}; +} + /** \class Map * \ingroup Core_Module * * \brief A matrix or vector expression mapping an existing array of data. * * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam MapOptions specifies whether the pointer is \c #Aligned, or \c #Unaligned. + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. * The default is \c #Unaligned. * \tparam StrideType optionally specifies strides. By default, Map assumes the memory layout * of an ordinary, contiguous array. This can be overridden by specifying strides. @@ -63,44 +85,6 @@ namespace Eigen { * * \sa PlainObjectBase::Map(), \ref TopicStorageOrders */ - -namespace internal { -template -struct traits > - : public traits -{ - typedef traits TraitsBase; - typedef typename PlainObjectType::Index Index; - typedef typename PlainObjectType::Scalar Scalar; - enum { - InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 - ? int(PlainObjectType::InnerStrideAtCompileTime) - : int(StrideType::InnerStrideAtCompileTime), - OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 - ? int(PlainObjectType::OuterStrideAtCompileTime) - : int(StrideType::OuterStrideAtCompileTime), - HasNoInnerStride = InnerStrideAtCompileTime == 1, - HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, - HasNoStride = HasNoInnerStride && HasNoOuterStride, - IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), - IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, - KeepsPacketAccess = bool(HasNoInnerStride) - && ( bool(IsDynamicSize) - || HasNoOuterStride - || ( OuterStrideAtCompileTime!=Dynamic - && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ), - Flags0 = TraitsBase::Flags & (~NestByRefBit), - Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), - Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) - ? int(Flags1) : int(Flags1 & ~LinearAccessBit), - Flags3 = is_lvalue::value ? int(Flags2) : (int(Flags2) & ~LvalueBit), - Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit) - }; -private: - enum { Options }; // Expressions don't have Options -}; -} - template class Map : public MapBase > { @@ -110,19 +94,17 @@ template class Ma EIGEN_DENSE_PUBLIC_INTERFACE(Map) typedef typename Base::PointerType PointerType; -#if EIGEN2_SUPPORT_STAGE <= STAGE30_FULL_EIGEN3_API - typedef const Scalar* PointerArgType; - inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast(ptr); } -#else typedef PointerType PointerArgType; + EIGEN_DEVICE_FUNC inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; } -#endif + EIGEN_DEVICE_FUNC inline Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() @@ -134,10 +116,11 @@ template class Ma /** Constructor in the fixed-size case. * * \param dataPtr pointer to the array to map - * \param a_stride optional Stride object, passing the strides. + * \param stride optional Stride object, passing the strides. */ - inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType()) - : Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride) + EIGEN_DEVICE_FUNC + explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr)), m_stride(stride) { PlainObjectType::Base::_check_template_params(); } @@ -145,11 +128,12 @@ template class Ma /** Constructor in the dynamic-size vector case. * * \param dataPtr pointer to the array to map - * \param a_size the size of the vector expression - * \param a_stride optional Stride object, passing the strides. + * \param size the size of the vector expression + * \param stride optional Stride object, passing the strides. */ - inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType()) - : Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride) + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride) { PlainObjectType::Base::_check_template_params(); } @@ -157,12 +141,13 @@ template class Ma /** Constructor in the dynamic-size matrix case. * * \param dataPtr pointer to the array to map - * \param nbRows the number of rows of the matrix expression - * \param nbCols the number of columns of the matrix expression - * \param a_stride optional Stride object, passing the strides. + * \param rows the number of rows of the matrix expression + * \param cols the number of columns of the matrix expression + * \param stride optional Stride object, passing the strides. */ - inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType()) - : Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride) + EIGEN_DEVICE_FUNC + inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride) { PlainObjectType::Base::_check_template_params(); } @@ -173,19 +158,6 @@ template class Ma StrideType m_stride; }; -template -inline Array<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> - ::Array(const Scalar *data) -{ - this->_set_noalias(Eigen::Map(data)); -} - -template -inline Matrix<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols> - ::Matrix(const Scalar *data) -{ - this->_set_noalias(Eigen::Map(data)); -} } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/MapBase.h b/external/eigen3/Eigen/src/Core/MapBase.h index 81efc4a..020f939 100644 --- a/external/eigen3/Eigen/src/Core/MapBase.h +++ b/external/eigen3/Eigen/src/Core/MapBase.h @@ -12,15 +12,25 @@ #define EIGEN_MAPBASE_H #define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \ - EIGEN_STATIC_ASSERT((int(internal::traits::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ + EIGEN_STATIC_ASSERT((int(internal::evaluator::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT) namespace Eigen { -/** \class MapBase - * \ingroup Core_Module +/** \ingroup Core_Module * - * \brief Base class for Map and Block expression with direct access + * \brief Base class for dense Map and Block expression with direct access + * + * This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense + * Map and Block objects with direct access. + * Typical users do not have to directly deal with this class. + * + * This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN. + * See \link TopicCustomizing_Plugins customizing Eigen \endlink for details. + * + * The \c Derived class has to provide the following two methods describing the memory layout: + * \code Index innerStride() const; \endcode + * \code Index outerStride() const; \endcode * * \sa class Map, class Block */ @@ -37,7 +47,6 @@ template class MapBase }; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; @@ -76,8 +85,10 @@ template class MapBase typedef typename Base::CoeffReturnType CoeffReturnType; - inline Index rows() const { return m_rows.value(); } - inline Index cols() const { return m_cols.value(); } + /** \copydoc DenseBase::rows() */ + EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); } + /** \copydoc DenseBase::cols() */ + EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); } /** Returns a pointer to the first coefficient of the matrix or vector. * @@ -85,30 +96,39 @@ template class MapBase * * \sa innerStride(), outerStride() */ - inline const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_data; } + /** \copydoc PlainObjectBase::coeff(Index,Index) const */ + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const { return m_data[colId * colStride() + rowId * rowStride()]; } + /** \copydoc PlainObjectBase::coeff(Index) const */ + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index index) const { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) return m_data[index * innerStride()]; } + /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */ + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { return this->m_data[colId * colStride() + rowId * rowStride()]; } + /** \copydoc PlainObjectBase::coeffRef(Index) const */ + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) return this->m_data[index * innerStride()]; } + /** \internal */ template inline PacketScalar packet(Index rowId, Index colId) const { @@ -116,6 +136,7 @@ template class MapBase (m_data + (colId * colStride() + rowId * rowStride())); } + /** \internal */ template inline PacketScalar packet(Index index) const { @@ -123,12 +144,16 @@ template class MapBase return internal::ploadt(m_data + index * innerStride()); } + /** \internal Constructor for fixed size matrices or vectors */ + EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - checkSanity(); + checkSanity(); } + /** \internal Constructor for dynamically sized vectors */ + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : m_data(dataPtr), m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)), @@ -137,16 +162,18 @@ template class MapBase EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) eigen_assert(vecSize >= 0); eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize); - checkSanity(); + checkSanity(); } - inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) - : m_data(dataPtr), m_rows(nbRows), m_cols(nbCols) + /** \internal Constructor for dynamically sized matrices */ + EIGEN_DEVICE_FUNC + inline MapBase(PointerType dataPtr, Index rows, Index cols) + : m_data(dataPtr), m_rows(rows), m_cols(cols) { eigen_assert( (dataPtr == 0) - || ( nbRows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == nbRows) - && nbCols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols))); - checkSanity(); + || ( rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) + && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); + checkSanity(); } #ifdef EIGEN_MAPBASE_PLUGIN @@ -155,20 +182,36 @@ template class MapBase protected: - void checkSanity() const + template + EIGEN_DEVICE_FUNC + void checkSanity(typename internal::enable_if<(internal::traits::Alignment>0),void*>::type = 0) const { - EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits::Flags&PacketAccessBit, - internal::inner_stride_at_compile_time::ret==1), - PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); - eigen_assert(EIGEN_IMPLIES(internal::traits::Flags&AlignedBit, (size_t(m_data) % 16) == 0) - && "input pointer is not aligned on a 16 byte boundary"); +#if EIGEN_MAX_ALIGN_BYTES>0 + eigen_assert(( ((internal::UIntPtr(m_data) % internal::traits::Alignment) == 0) + || (cols() * rows() * innerStride() * sizeof(Scalar)) < internal::traits::Alignment ) && "data is not aligned"); +#endif } + template + EIGEN_DEVICE_FUNC + void checkSanity(typename internal::enable_if::Alignment==0,void*>::type = 0) const + {} + PointerType m_data; const internal::variable_if_dynamic m_rows; const internal::variable_if_dynamic m_cols; }; +/** \ingroup Core_Module + * + * \brief Base class for non-const dense Map and Block expression with direct access + * + * This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of + * dense Map and Block objects with direct access. + * It inherits MapBase which defines the const variant for reading specific entries. + * + * \sa class Map, class Block + */ template class MapBase : public MapBase { @@ -179,7 +222,7 @@ template class MapBase typedef typename Base::Scalar Scalar; typedef typename Base::PacketScalar PacketScalar; - typedef typename Base::Index Index; + typedef typename Base::StorageIndex StorageIndex; typedef typename Base::PointerType PointerType; using Base::derived; @@ -200,14 +243,18 @@ template class MapBase const Scalar >::type ScalarWithConstIfNotLvalue; + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return this->m_data; } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) { return this->m_data[col * colStride() + row * rowStride()]; } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index index) { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) @@ -229,10 +276,11 @@ template class MapBase (this->m_data + index * innerStride(), val); } - explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} - inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} - inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {} + EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {} + EIGEN_DEVICE_FUNC Derived& operator=(const MapBase& other) { ReadOnlyMapBase::Base::operator=(other); diff --git a/external/eigen3/Eigen/src/Core/MathFunctions.h b/external/eigen3/Eigen/src/Core/MathFunctions.h index f707aa4..a648aa0 100644 --- a/external/eigen3/Eigen/src/Core/MathFunctions.h +++ b/external/eigen3/Eigen/src/Core/MathFunctions.h @@ -10,11 +10,25 @@ #ifndef EIGEN_MATHFUNCTIONS_H #define EIGEN_MATHFUNCTIONS_H +// source: http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html +// TODO this should better be moved to NumTraits +#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L + + namespace Eigen { +// On WINCE, std::abs is defined for int only, so let's defined our own overloads: +// This issue has been confirmed with MSVC 2008 only, but the issue might exist for more recent versions too. +#if EIGEN_OS_WINCE && EIGEN_COMP_MSVC && EIGEN_COMP_MSVC<=1500 +long abs(long x) { return (labs(x)); } +double abs(double x) { return (fabs(x)); } +float abs(float x) { return (fabsf(x)); } +long double abs(long double x) { return (fabsl(x)); } +#endif + namespace internal { -/** \internal \struct global_math_functions_filtering_base +/** \internal \class global_math_functions_filtering_base * * What it does: * Defines a typedef 'type' as follows: @@ -62,6 +76,7 @@ template::IsComplex> struct real_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x; @@ -72,6 +87,7 @@ template struct real_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { using std::real; @@ -81,13 +97,25 @@ struct real_default_impl template struct real_impl : real_default_impl {}; +#ifdef __CUDA_ARCH__ +template +struct real_impl > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex& x) + { + return x.real(); + } +}; +#endif + template struct real_retval { typedef typename NumTraits::Real type; }; - /**************************************************************************** * Implementation of imag * ****************************************************************************/ @@ -96,6 +124,7 @@ template::IsComplex> struct imag_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar&) { return RealScalar(0); @@ -106,6 +135,7 @@ template struct imag_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { using std::imag; @@ -115,6 +145,19 @@ struct imag_default_impl template struct imag_impl : imag_default_impl {}; +#ifdef __CUDA_ARCH__ +template +struct imag_impl > +{ + typedef T RealScalar; + EIGEN_DEVICE_FUNC + static inline T run(const std::complex& x) + { + return x.imag(); + } +}; +#endif + template struct imag_retval { @@ -129,10 +172,12 @@ template struct real_ref_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast(&x)[0]; } + EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) { return reinterpret_cast(&x)[0]; @@ -153,10 +198,12 @@ template struct imag_ref_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast(&x)[1]; } + EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) { return reinterpret_cast(&x)[1]; @@ -166,10 +213,12 @@ struct imag_ref_default_impl template struct imag_ref_default_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(Scalar&) { return Scalar(0); } + EIGEN_DEVICE_FUNC static inline const Scalar run(const Scalar&) { return Scalar(0); @@ -192,6 +241,7 @@ struct imag_ref_retval template::IsComplex> struct conj_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { return x; @@ -201,6 +251,7 @@ struct conj_impl template struct conj_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { using std::conj; @@ -222,6 +273,7 @@ template struct abs2_impl_default { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x*x; @@ -232,6 +284,7 @@ template struct abs2_impl_default // IsComplex { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return real(x)*real(x) + imag(x)*imag(x); @@ -242,6 +295,7 @@ template struct abs2_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return abs2_impl_default::IsComplex>::run(x); @@ -262,9 +316,10 @@ template struct norm1_default_impl { typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { - using std::abs; + EIGEN_USING_STD_MATH(abs); return abs(real(x)) + abs(imag(x)); } }; @@ -272,9 +327,10 @@ struct norm1_default_impl template struct norm1_default_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { - using std::abs; + EIGEN_USING_STD_MATH(abs); return abs(x); } }; @@ -298,16 +354,22 @@ struct hypot_impl typedef typename NumTraits::Real RealScalar; static inline RealScalar run(const Scalar& x, const Scalar& y) { - using std::max; - using std::min; - using std::abs; - using std::sqrt; + EIGEN_USING_STD_MATH(abs); + EIGEN_USING_STD_MATH(sqrt); RealScalar _x = abs(x); RealScalar _y = abs(y); - RealScalar p = (max)(_x, _y); + Scalar p, qp; + if(_x>_y) + { + p = _x; + qp = _y / p; + } + else + { + p = _y; + qp = _x / p; + } if(p==RealScalar(0)) return RealScalar(0); - RealScalar q = (min)(_x, _y); - RealScalar qp = q/p; return p * sqrt(RealScalar(1) + qp*qp); } }; @@ -325,6 +387,7 @@ struct hypot_retval template struct cast_impl { + EIGEN_DEVICE_FUNC static inline NewType run(const OldType& x) { return static_cast(x); @@ -334,48 +397,124 @@ struct cast_impl // here, for once, we're plainly returning NewType: we don't want cast to do weird things. template +EIGEN_DEVICE_FUNC inline NewType cast(const OldType& x) { return cast_impl::run(x); } /**************************************************************************** -* Implementation of atanh2 * +* Implementation of round * ****************************************************************************/ -template -struct atanh2_default_impl -{ - typedef Scalar retval; - typedef typename NumTraits::Real RealScalar; - static inline Scalar run(const Scalar& x, const Scalar& y) +#if EIGEN_HAS_CXX11_MATH + template + struct round_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + using std::round; + return round(x); + } + }; +#else + template + struct round_impl { - using std::abs; - using std::log; - using std::sqrt; - Scalar z = x / y; - if (y == Scalar(0) || abs(z) > sqrt(NumTraits::epsilon())) - return RealScalar(0.5) * log((y + x) / (y - x)); - else - return z + z*z*z / RealScalar(3); - } + static inline Scalar run(const Scalar& x) + { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) + EIGEN_USING_STD_MATH(floor); + EIGEN_USING_STD_MATH(ceil); + return (x > Scalar(0)) ? floor(x + Scalar(0.5)) : ceil(x - Scalar(0.5)); + } + }; +#endif + +template +struct round_retval +{ + typedef Scalar type; }; +/**************************************************************************** +* Implementation of arg * +****************************************************************************/ + +#if EIGEN_HAS_CXX11_MATH + template + struct arg_impl { + static inline Scalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; +#else + template::IsComplex> + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + return (x < Scalar(0)) ? Scalar(EIGEN_PI) : Scalar(0); } + }; + + template + struct arg_default_impl + { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC + static inline RealScalar run(const Scalar& x) + { + EIGEN_USING_STD_MATH(arg); + return arg(x); + } + }; + + template struct arg_impl : arg_default_impl {}; +#endif + template -struct atanh2_default_impl +struct arg_retval { - static inline Scalar run(const Scalar&, const Scalar&) + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** +* Implementation of log1p * +****************************************************************************/ + +namespace std_fallback { + // fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar, + // or that there is no suitable std::log1p function available + template + EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + typedef typename NumTraits::Real RealScalar; + EIGEN_USING_STD_MATH(log); + Scalar x1p = RealScalar(1) + x; + return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); + } +} + +template +struct log1p_impl { + static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - return Scalar(0); + #if EIGEN_HAS_CXX11_MATH + using std::log1p; + #endif + using std_fallback::log1p; + return log1p(x); } }; -template -struct atanh2_impl : atanh2_default_impl::IsInteger> {}; template -struct atanh2_retval +struct log1p_retval { typedef Scalar type; }; @@ -384,24 +523,26 @@ struct atanh2_retval * Implementation of pow * ****************************************************************************/ -template -struct pow_default_impl +template::IsInteger&&NumTraits::IsInteger> +struct pow_impl { - typedef Scalar retval; - static inline Scalar run(const Scalar& x, const Scalar& y) + //typedef Scalar retval; + typedef typename ScalarBinaryOpTraits >::ReturnType result_type; + static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) { - using std::pow; + EIGEN_USING_STD_MATH(pow); return pow(x, y); } }; -template -struct pow_default_impl +template +struct pow_impl { - static inline Scalar run(Scalar x, Scalar y) + typedef ScalarX result_type; + static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y) { - Scalar res(1); - eigen_assert(!NumTraits::IsSigned || y >= 0); + ScalarX res(1); + eigen_assert(!NumTraits::IsSigned || y >= 0); if(y & 1) res *= x; y >>= 1; while(y) @@ -414,15 +555,6 @@ struct pow_default_impl } }; -template -struct pow_impl : pow_default_impl::IsInteger> {}; - -template -struct pow_retval -{ - typedef Scalar type; -}; - /**************************************************************************** * Implementation of random * ****************************************************************************/ @@ -458,48 +590,48 @@ struct random_default_impl }; enum { - floor_log2_terminate, - floor_log2_move_up, - floor_log2_move_down, - floor_log2_bogus + meta_floor_log2_terminate, + meta_floor_log2_move_up, + meta_floor_log2_move_down, + meta_floor_log2_bogus }; -template struct floor_log2_selector +template struct meta_floor_log2_selector { enum { middle = (lower + upper) / 2, - value = (upper <= lower + 1) ? int(floor_log2_terminate) - : (n < (1 << middle)) ? int(floor_log2_move_down) - : (n==0) ? int(floor_log2_bogus) - : int(floor_log2_move_up) + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n==0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) }; }; template::value> -struct floor_log2 {}; + int selector = meta_floor_log2_selector::value> +struct meta_floor_log2 {}; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle>::value }; + enum { value = meta_floor_log2::middle>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { - enum { value = floor_log2::middle, upper>::value }; + enum { value = meta_floor_log2::middle, upper>::value }; }; template -struct floor_log2 +struct meta_floor_log2 { enum { value = (n >= ((unsigned int)(1) << (lower+1))) ? lower+1 : lower }; }; template -struct floor_log2 +struct meta_floor_log2 { // no value, error at compile time }; @@ -508,7 +640,7 @@ template struct random_default_impl { static inline Scalar run(const Scalar& x, const Scalar& y) - { + { typedef typename conditional::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; if(y #ifdef EIGEN_MAKING_DOCS return run(Scalar(NumTraits::IsSigned ? -10 : 0), Scalar(10)); #else - enum { rand_bits = floor_log2<(unsigned int)(RAND_MAX)+1>::value, + enum { rand_bits = meta_floor_log2<(unsigned int)(RAND_MAX)+1>::value, scalar_bits = sizeof(Scalar) * CHAR_BIT, shift = EIGEN_PLAIN_ENUM_MAX(0, int(rand_bits) - int(scalar_bits)), offset = NumTraits::IsSigned ? (1 << (EIGEN_PLAIN_ENUM_MIN(rand_bits,scalar_bits)-1)) : 0 @@ -569,97 +701,601 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); } +// Implementatin of is* functions + +// std::is* do not work with fast-math and gcc, std::is* are available on MSVC 2013 and newer, as well as in clang. +#if (EIGEN_HAS_CXX11_MATH && !(EIGEN_COMP_GNUC_STRICT && __FINITE_MATH_ONLY__)) || (EIGEN_COMP_MSVC>=1800) || (EIGEN_COMP_CLANG) +#define EIGEN_USE_STD_FPCLASSIFY 1 +#else +#define EIGEN_USE_STD_FPCLASSIFY 0 +#endif + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isnan_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isinf_impl(const T&) { return false; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if::value,bool>::type +isfinite_impl(const T&) { return true; } + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isfinite_impl(const T& x) +{ + #ifdef __CUDA_ARCH__ + return (::isfinite)(x); + #elif EIGEN_USE_STD_FPCLASSIFY + using std::isfinite; + return isfinite EIGEN_NOT_A_MACRO (x); + #else + return x<=NumTraits::highest() && x>=NumTraits::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isinf_impl(const T& x) +{ + #ifdef __CUDA_ARCH__ + return (::isinf)(x); + #elif EIGEN_USE_STD_FPCLASSIFY + using std::isinf; + return isinf EIGEN_NOT_A_MACRO (x); + #else + return x>NumTraits::highest() || x::lowest(); + #endif +} + +template +EIGEN_DEVICE_FUNC +typename internal::enable_if<(!internal::is_integral::value)&&(!NumTraits::IsComplex),bool>::type +isnan_impl(const T& x) +{ + #ifdef __CUDA_ARCH__ + return (::isnan)(x); + #elif EIGEN_USE_STD_FPCLASSIFY + using std::isnan; + return isnan EIGEN_NOT_A_MACRO (x); + #else + return x != x; + #endif +} + +#if (!EIGEN_USE_STD_FPCLASSIFY) + +#if EIGEN_COMP_MSVC + +template EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x) +{ + return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; +} + +//MSVC defines a _isnan builtin function, but for double only +EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; } +EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; } + +EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } +EIGEN_DEVICE_FUNC inline bool isinf_impl(const float& x) { return isinf_msvc_helper(x); } + +#elif (defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ && EIGEN_COMP_GNUC) + +#if EIGEN_GNUC_AT_LEAST(5,0) + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((optimize("no-finite-math-only"))) +#else + // NOTE the inline qualifier and noinline attribute are both needed: the former is to avoid linking issue (duplicate symbol), + // while the second prevent too aggressive optimizations in fast-math mode: + #define EIGEN_TMP_NOOPT_ATTRIB EIGEN_DEVICE_FUNC inline __attribute__((noinline,optimize("no-finite-math-only"))) +#endif + +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const long double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const double& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isnan_impl(const float& x) { return __builtin_isnan(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const double& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const float& x) { return __builtin_isinf(x); } +template<> EIGEN_TMP_NOOPT_ATTRIB bool isinf_impl(const long double& x) { return __builtin_isinf(x); } + +#undef EIGEN_TMP_NOOPT_ATTRIB + +#endif + +#endif + +// The following overload are defined at the end of this file +template EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& x); +template EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x); +template EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x); + +template T generic_fast_tanh_float(const T& a_x); + } // end namespace internal /**************************************************************************** -* Generic math function * +* Generic math functions * ****************************************************************************/ namespace numext { +#ifndef __CUDA_ARCH__ +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(min); + return min EIGEN_NOT_A_MACRO (x,y); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + EIGEN_USING_STD_MATH(max); + return max EIGEN_NOT_A_MACRO (x,y); +} +#else +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) +{ + return y < x ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) +{ + return fminf(x, y); +} +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) +{ + return x < y ? y : x; +} +template<> +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) +{ + return fmaxf(x, y); +} +#endif + + template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); -} +} template +EIGEN_DEVICE_FUNC inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x) { return internal::real_ref_impl::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) { return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x) { return internal::imag_ref_impl::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x) { return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x); } template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y) { return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); } template -inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y) +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float log1p(const float &x) { return ::log1pf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double log1p(const double &x) { return ::log1p(x); } +#endif + +template +EIGEN_DEVICE_FUNC +inline typename internal::pow_impl::result_type pow(const ScalarX& x, const ScalarY& y) { - return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y); + return internal::pow_impl::run(x, y); } +template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } +template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } +template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } + template -inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y) +EIGEN_DEVICE_FUNC +inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) +{ + return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC +T (floor)(const T& x) { - return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y); + EIGEN_USING_STD_MATH(floor); + return floor(x); } -// std::isfinite is non standard, so let's define our own version, -// even though it is not very efficient. -template bool (isfinite)(const T& x) +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float floor(const float &x) { return ::floorf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double floor(const double &x) { return ::floor(x); } +#endif + +template +EIGEN_DEVICE_FUNC +T (ceil)(const T& x) +{ + EIGEN_USING_STD_MATH(ceil); + return ceil(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float ceil(const float &x) { return ::ceilf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double ceil(const double &x) { return ::ceil(x); } +#endif + + +/** Log base 2 for 32 bits positive integers. + * Conveniently returns 0 for x==0. */ +inline int log2(int x) { - return x::highest() && x>NumTraits::lowest(); + eigen_assert(x>=0); + unsigned int v(x); + static const int table[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return table[(v * 0x07C4ACDDU) >> 27]; } +/** \returns the square root of \a x. + * + * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode, + * but slightly faster for float/double and some compilers (e.g., gcc), thanks to + * specializations when SSE is enabled. + * + * It's usage is justified in performance critical functions, like norm/normalize. + */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sqrt(const T &x) +{ + EIGEN_USING_STD_MATH(sqrt); + return sqrt(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T log(const T &x) { + EIGEN_USING_STD_MATH(log); + return log(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float log(const float &x) { return ::logf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double log(const double &x) { return ::log(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if::IsSigned || NumTraits::IsComplex,typename NumTraits::Real>::type +abs(const T &x) { + EIGEN_USING_STD_MATH(abs); + return abs(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if::IsSigned || NumTraits::IsComplex),typename NumTraits::Real>::type +abs(const T &x) { + return x; +} + +#if defined(__SYCL_DEVICE_ONLY__) +EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } +EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } +#endif // defined(__SYCL_DEVICE_ONLY__) + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float abs(const float &x) { return ::fabsf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double abs(const double &x) { return ::fabs(x); } + +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float abs(const std::complex& x) { + return ::hypotf(x.real(), x.imag()); +} + +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double abs(const std::complex& x) { + return ::hypot(x.real(), x.imag()); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T exp(const T &x) { + EIGEN_USING_STD_MATH(exp); + return exp(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float exp(const float &x) { return ::expf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double exp(const double &x) { return ::exp(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T cos(const T &x) { + EIGEN_USING_STD_MATH(cos); + return cos(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float cos(const float &x) { return ::cosf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double cos(const double &x) { return ::cos(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sin(const T &x) { + EIGEN_USING_STD_MATH(sin); + return sin(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float sin(const float &x) { return ::sinf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double sin(const double &x) { return ::sin(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T tan(const T &x) { + EIGEN_USING_STD_MATH(tan); + return tan(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tan(const float &x) { return ::tanf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double tan(const double &x) { return ::tan(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T acos(const T &x) { + EIGEN_USING_STD_MATH(acos); + return acos(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float acos(const float &x) { return ::acosf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double acos(const double &x) { return ::acos(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T asin(const T &x) { + EIGEN_USING_STD_MATH(asin); + return asin(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float asin(const float &x) { return ::asinf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double asin(const double &x) { return ::asin(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T atan(const T &x) { + EIGEN_USING_STD_MATH(atan); + return atan(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float atan(const float &x) { return ::atanf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double atan(const double &x) { return ::atan(x); } +#endif + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T cosh(const T &x) { + EIGEN_USING_STD_MATH(cosh); + return cosh(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float cosh(const float &x) { return ::coshf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double cosh(const double &x) { return ::cosh(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sinh(const T &x) { + EIGEN_USING_STD_MATH(sinh); + return sinh(x); +} + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float sinh(const float &x) { return ::sinhf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double sinh(const double &x) { return ::sinh(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T tanh(const T &x) { + EIGEN_USING_STD_MATH(tanh); + return tanh(x); +} + +#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tanh(float x) { return internal::generic_fast_tanh_float(x); } +#endif + +#ifdef __CUDACC__ +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float tanh(const float &x) { return ::tanhf(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double tanh(const double &x) { return ::tanh(x); } +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T fmod(const T& a, const T& b) { + EIGEN_USING_STD_MATH(fmod); + return fmod(a, b); +} + +#ifdef __CUDACC__ +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float fmod(const float& a, const float& b) { + return ::fmodf(a, b); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double fmod(const double& a, const double& b) { + return ::fmod(a, b); +} +#endif + } // end namespace numext namespace internal { +template +EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& x) +{ + return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); +} + +template +EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x) +{ + return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); +} + +template +EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x) +{ + return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); +} + /**************************************************************************** * Implementation of fuzzy comparisons * ****************************************************************************/ @@ -673,18 +1309,17 @@ template struct scalar_fuzzy_default_impl { typedef typename NumTraits::Real RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) { - using std::abs; - return abs(x) <= abs(y) * prec; + return numext::abs(x) <= numext::abs(y) * prec; } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - using std::min; - using std::abs; - return abs(x - y) <= (min)(abs(x), abs(y)) * prec; + return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) { return x <= y || isApprox(x, y, prec); @@ -695,15 +1330,17 @@ template struct scalar_fuzzy_default_impl { typedef typename NumTraits::Real RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&) { return x == Scalar(0); } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&) { return x == y; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&) { return x <= y; @@ -714,36 +1351,36 @@ template struct scalar_fuzzy_default_impl { typedef typename NumTraits::Real RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec) { return numext::abs2(x) <= numext::abs2(y) * prec * prec; } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { - using std::min; - return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec; + return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; } }; template struct scalar_fuzzy_impl : scalar_fuzzy_default_impl::IsComplex, NumTraits::IsInteger> {}; -template +template EIGEN_DEVICE_FUNC inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const typename NumTraits::Real &precision = NumTraits::dummy_precision()) { return scalar_fuzzy_impl::template isMuchSmallerThan(x, y, precision); } -template +template EIGEN_DEVICE_FUNC inline bool isApprox(const Scalar& x, const Scalar& y, const typename NumTraits::Real &precision = NumTraits::dummy_precision()) { return scalar_fuzzy_impl::isApprox(x, y, precision); } -template +template EIGEN_DEVICE_FUNC inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const typename NumTraits::Real &precision = NumTraits::dummy_precision()) { @@ -766,17 +1403,19 @@ template<> struct scalar_fuzzy_impl { typedef bool RealScalar; - template + template EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { return !x; } + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { return x == y; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&) { return (!x) || y; diff --git a/external/eigen3/Eigen/src/Core/MathFunctionsImpl.h b/external/eigen3/Eigen/src/Core/MathFunctionsImpl.h new file mode 100644 index 0000000..3c9ef22 --- /dev/null +++ b/external/eigen3/Eigen/src/Core/MathFunctionsImpl.h @@ -0,0 +1,78 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONSIMPL_H +#define EIGEN_MATHFUNCTIONSIMPL_H + +namespace Eigen { + +namespace internal { + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/6-degree rational interpolant which + is accurate up to a couple of ulp in the range [-9, 9], outside of which + the tanh(x) = +/-1. + + This implementation works on both scalars and packets. +*/ +template +T generic_fast_tanh_float(const T& a_x) +{ + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + const T plus_9 = pset1(9.f); + const T minus_9 = pset1(-9.f); + // NOTE GCC prior to 6.3 might improperly optimize this max/min + // step such that if a_x is nan, x will be either 9 or -9, + // and tanh will return 1 or -1 instead of nan. + // This is supposed to be fixed in gcc6.3, + // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 + const T x = pmax(minus_9,pmin(plus_9,a_x)); + // The monomial coefficients of the numerator polynomial (odd). + const T alpha_1 = pset1(4.89352455891786e-03f); + const T alpha_3 = pset1(6.37261928875436e-04f); + const T alpha_5 = pset1(1.48572235717979e-05f); + const T alpha_7 = pset1(5.12229709037114e-08f); + const T alpha_9 = pset1(-8.60467152213735e-11f); + const T alpha_11 = pset1(2.00018790482477e-13f); + const T alpha_13 = pset1(-2.76076847742355e-16f); + + // The monomial coefficients of the denominator polynomial (even). + const T beta_0 = pset1(4.89352518554385e-03f); + const T beta_2 = pset1(2.26843463243900e-03f); + const T beta_4 = pset1(1.18534705686654e-04f); + const T beta_6 = pset1(1.19825839466702e-06f); + + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + T p = pmadd(x2, alpha_13, alpha_11); + p = pmadd(x2, p, alpha_9); + p = pmadd(x2, p, alpha_7); + p = pmadd(x2, p, alpha_5); + p = pmadd(x2, p, alpha_3); + p = pmadd(x2, p, alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial p. + T q = pmadd(x2, beta_6, beta_4); + q = pmadd(x2, q, beta_2); + q = pmadd(x2, q, beta_0); + + // Divide the numerator by the denominator. + return pdiv(p, q); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONSIMPL_H diff --git a/external/eigen3/Eigen/src/Core/Matrix.h b/external/eigen3/Eigen/src/Core/Matrix.h index 02be142..90c336d 100644 --- a/external/eigen3/Eigen/src/Core/Matrix.h +++ b/external/eigen3/Eigen/src/Core/Matrix.h @@ -13,6 +13,45 @@ namespace Eigen { +namespace internal { +template +struct traits > +{ +private: + enum { size = internal::size_at_compile_time<_Rows,_Cols>::ret }; + typedef typename find_best_packet<_Scalar,size>::type PacketScalar; + enum { + row_major_bit = _Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = _MaxRows==Dynamic || _MaxCols==Dynamic, + max_size = is_dynamic_size_storage ? Dynamic : _MaxRows*_MaxCols, + default_alignment = compute_default_alignment<_Scalar,max_size>::value, + actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, + required_alignment = unpacket_traits::alignment, + packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0 + }; + +public: + typedef _Scalar Scalar; + typedef Dense StorageKind; + typedef Eigen::Index StorageIndex; + typedef MatrixXpr XprKind; + enum { + RowsAtCompileTime = _Rows, + ColsAtCompileTime = _Cols, + MaxRowsAtCompileTime = _MaxRows, + MaxColsAtCompileTime = _MaxCols, + Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, + Options = _Options, + InnerStrideAtCompileTime = 1, + OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime, + + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, + Alignment = actual_alignment + }; +}; +} + /** \class Matrix * \ingroup Core_Module * @@ -24,13 +63,13 @@ namespace Eigen { * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note"). * * The first three template parameters are required: - * \tparam _Scalar \anchor matrix_tparam_scalar Numeric type, e.g. float, double, int or std::complex. - * User defined sclar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam _Scalar Numeric type, e.g. float, double, int or std::complex. + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). * \tparam _Rows Number of rows, or \b Dynamic * \tparam _Cols Number of columns, or \b Dynamic * * The remaining template parameters are optional -- in most cases you don't have to worry about them. - * \tparam _Options \anchor matrix_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either + * \tparam _Options A combination of either \b #RowMajor or \b #ColMajor, and of either * \b #AutoAlign or \b #DontAlign. * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required * for vectorization. It defaults to aligning matrices except for fixed sizes that aren't a multiple of the packet size. @@ -67,7 +106,7 @@ namespace Eigen { * \endcode * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN. * * Some notes: * @@ -97,32 +136,44 @@ namespace Eigen { * are the dimensions of the original matrix, while _Rows and _Cols are Dynamic. * * - * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, - * \ref TopicStorageOrders + * ABI and storage layout + * + * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3. + * + * + * + * + * + * + *
Matrix typeEquivalent C structure
\code Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index rows, cols; + * }; + * \endcode
\code + * Matrix + * Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index size; + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[Rows*Cols]; // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0 + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[MaxRows*MaxCols]; // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0 + * Eigen::Index rows, cols; + * }; + * \endcode
+ * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest possible power-of-two + * smaller to EIGEN_MAX_STATIC_ALIGN_BYTES. + * + * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, + * \ref TopicStorageOrders */ -namespace internal { -template -struct traits > -{ - typedef _Scalar Scalar; - typedef Dense StorageKind; - typedef DenseIndex Index; - typedef MatrixXpr XprKind; - enum { - RowsAtCompileTime = _Rows, - ColsAtCompileTime = _Cols, - MaxRowsAtCompileTime = _MaxRows, - MaxColsAtCompileTime = _MaxCols, - Flags = compute_matrix_flags<_Scalar, _Rows, _Cols, _Options, _MaxRows, _MaxCols>::ret, - CoeffReadCost = NumTraits::ReadCost, - Options = _Options, - InnerStrideAtCompileTime = 1, - OuterStrideAtCompileTime = (Options&RowMajor) ? ColsAtCompileTime : RowsAtCompileTime - }; -}; -} - template class Matrix : public PlainObjectBase > @@ -151,6 +202,7 @@ class Matrix * * \callgraph */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other) { return Base::_set(other); @@ -167,7 +219,8 @@ class Matrix * remain row-vectors and vectors remain vectors. */ template - EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase& other) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase& other) { return Base::_set(other); } @@ -179,12 +232,14 @@ class Matrix * \copydetails DenseBase::operator=(const EigenBase &other) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase &other) { return Base::operator=(other); } template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue& func) { return Base::operator=(func); @@ -200,6 +255,7 @@ class Matrix * * \sa resize(Index,Index) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix() : Base() { Base::_check_template_params(); @@ -207,60 +263,87 @@ class Matrix } // FIXME is it still needed - Matrix(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC + explicit Matrix(internal::constructor_without_unaligned_array_assert) : Base(internal::constructor_without_unaligned_array_assert()) { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - Matrix(Matrix&& other) +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + Matrix(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible::value) : Base(std::move(other)) { Base::_check_template_params(); if (RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic) Base::_set_noalias(other); } - Matrix& operator=(Matrix&& other) + EIGEN_DEVICE_FUNC + Matrix& operator=(Matrix&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable::value) { other.swap(*this); return *this; } #endif - /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors - * - * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, - * it is redundant to pass the dimension here, so it makes more sense to use the default - * constructor Matrix() instead. - */ - EIGEN_STRONG_INLINE explicit Matrix(Index dim) - : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim) + #ifndef EIGEN_PARSED_BY_DOXYGEN + + // This constructor is for both 1x1 matrices and dynamic vectors + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit Matrix(const T& x) { Base::_check_template_params(); - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix) - eigen_assert(dim >= 0); - eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim); - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + Base::template _init1(x); } - #ifndef EIGEN_PARSED_BY_DOXYGEN template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) { Base::_check_template_params(); Base::template _init2(x, y); } #else + /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC + explicit Matrix(const Scalar *data); + + /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * This is useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance, + * calling Matrix(1) will call the initialization constructor: Matrix(const Scalar&). + * For fixed-size \c 1x1 matrices it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_STRONG_INLINE explicit Matrix(Index dim); + /** \brief Constructs an initialized 1x1 matrix with the given coefficient */ + Matrix(const Scalar& x); /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns. * * This is useful for dynamic-size matrices. For fixed-size matrices, * it is redundant to pass these parameters, so one should use the default constructor - * Matrix() instead. */ + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance, + * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y). + * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_DEVICE_FUNC Matrix(Index rows, Index cols); + /** \brief Constructs an initialized 2D vector with given coefficients */ Matrix(const Scalar& x, const Scalar& y); #endif /** \brief Constructs an initialized 3D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) { Base::_check_template_params(); @@ -270,6 +353,7 @@ class Matrix m_storage.data()[2] = z; } /** \brief Constructs an initialized 4D vector with given coefficients */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w) { Base::_check_template_params(); @@ -280,76 +364,33 @@ class Matrix m_storage.data()[3] = w; } - explicit Matrix(const Scalar *data); - /** \brief Constructor copying the value of the expression \a other */ - template - EIGEN_STRONG_INLINE Matrix(const MatrixBase& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - // This test resides here, to bring the error messages closer to the user. Normally, these checks - // are performed deeply within the library, thus causing long and scary error traces. - EIGEN_STATIC_ASSERT((internal::is_same::value), - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) - - Base::_check_template_params(); - Base::_set_noalias(other); - } /** \brief Copy constructor */ - EIGEN_STRONG_INLINE Matrix(const Matrix& other) - : Base(other.rows() * other.cols(), other.rows(), other.cols()) - { - Base::_check_template_params(); - Base::_set_noalias(other); - } - /** \brief Copy constructor with in-place evaluation */ - template - EIGEN_STRONG_INLINE Matrix(const ReturnByValue& other) - { - Base::_check_template_params(); - Base::resize(other.rows(), other.cols()); - other.evalTo(*this); - } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Matrix(const Matrix& other) : Base(other) + { } /** \brief Copy constructor for generic expressions. * \sa MatrixBase::operator=(const EigenBase&) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const EigenBase &other) - : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols()) - { - Base::_check_template_params(); - Base::_resize_to_match(other); - // FIXME/CHECK: isn't *this = other.derived() more efficient. it allows to - // go for pure _set() implementations, right? - *this = other; - } - - /** \internal - * \brief Override MatrixBase::swap() since for dynamic-sized matrices - * of same type it is enough to swap the data pointers. - */ - template - void swap(MatrixBase const & other) - { this->_swap(other.derived()); } + : Base(other.derived()) + { } - inline Index innerStride() const { return 1; } - inline Index outerStride() const { return this->innerSize(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); } /////////// Geometry module /////////// template + EIGEN_DEVICE_FUNC explicit Matrix(const RotationBase& r); template + EIGEN_DEVICE_FUNC Matrix& operator=(const RotationBase& r); - #ifdef EIGEN2_SUPPORT - template - explicit Matrix(const eigen2_RotationBase& r); - template - Matrix& operator=(const eigen2_RotationBase& r); - #endif - // allow to extend Matrix outside Eigen #ifdef EIGEN_MATRIX_PLUGIN #include EIGEN_MATRIX_PLUGIN diff --git a/external/eigen3/Eigen/src/Core/MatrixBase.h b/external/eigen3/Eigen/src/Core/MatrixBase.h index e83ef4d..ce41218 100644 --- a/external/eigen3/Eigen/src/Core/MatrixBase.h +++ b/external/eigen3/Eigen/src/Core/MatrixBase.h @@ -41,9 +41,9 @@ namespace Eigen { * \endcode * * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. * - * \sa \ref TopicClassHierarchy + * \sa \blank \ref TopicClassHierarchy */ template class MatrixBase : public DenseBase @@ -52,7 +52,7 @@ template class MatrixBase #ifndef EIGEN_PARSED_BY_DOXYGEN typedef MatrixBase StorageBaseType; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; + typedef typename internal::traits::StorageIndex StorageIndex; typedef typename internal::traits::Scalar Scalar; typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; @@ -66,7 +66,6 @@ template class MatrixBase using Base::MaxSizeAtCompileTime; using Base::IsVectorAtCompileTime; using Base::Flags; - using Base::CoeffReadCost; using Base::derived; using Base::const_cast_derived; @@ -98,25 +97,14 @@ template class MatrixBase /** \returns the size of the main diagonal, which is min(rows(),cols()). * \sa rows(), cols(), SizeAtCompileTime. */ - inline Index diagonalSize() const { return (std::min)(rows(),cols()); } + EIGEN_DEVICE_FUNC + inline Index diagonalSize() const { return (numext::mini)(rows(),cols()); } - /** \brief The plain matrix type corresponding to this expression. - * - * This is not necessarily exactly the return type of eval(). In the case of plain matrices, - * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed - * that the return type of eval() is either PlainObject or const PlainObject&. - */ - typedef Matrix::Scalar, - internal::traits::RowsAtCompileTime, - internal::traits::ColsAtCompileTime, - AutoAlign | (internal::traits::Flags&RowMajorBit ? RowMajor : ColMajor), - internal::traits::MaxRowsAtCompileTime, - internal::traits::MaxColsAtCompileTime - > PlainObject; + typedef typename Base::PlainObject PlainObject; #ifndef EIGEN_PARSED_BY_DOXYGEN /** \internal Represents a matrix with all coefficients equal to one another*/ - typedef CwiseNullaryOp,Derived> ConstantReturnType; + typedef CwiseNullaryOp,PlainObject> ConstantReturnType; /** \internal the return type of MatrixBase::adjoint() */ typedef typename internal::conditional::IsComplex, CwiseUnaryOp, ConstTransposeReturnType>, @@ -125,7 +113,7 @@ template class MatrixBase /** \internal Return type of eigenvalues() */ typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> EigenvaluesReturnType; /** \internal the return type of identity */ - typedef CwiseNullaryOp,Derived> IdentityReturnType; + typedef CwiseNullaryOp,PlainObject> IdentityReturnType; /** \internal the return type of unit vectors */ typedef Block, SquareMatrixType>, internal::traits::RowsAtCompileTime, @@ -133,6 +121,7 @@ template class MatrixBase #endif // not EIGEN_PARSED_BY_DOXYGEN #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase +#define EIGEN_DOC_UNARY_ADDONS(X,Y) # include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" @@ -141,41 +130,53 @@ template class MatrixBase # include EIGEN_MATRIXBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const MatrixBase& other); // We cannot inherit here via Base::operator= since it is causing // trouble with MSVC. template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); template + EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase& other); template + EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& other); - template - Derived& lazyAssign(const ProductBase& other); - - template - Derived& lazyAssign(const MatrixPowerProduct& other); - template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const MatrixBase& other); template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const MatrixBase& other); +#ifdef __CUDACC__ + template + EIGEN_DEVICE_FUNC + const Product + operator*(const MatrixBase &other) const + { return this->lazyProduct(other); } +#else + template - const typename ProductReturnType::Type + const Product operator*(const MatrixBase &other) const; +#endif + template - const typename LazyProductReturnType::Type + EIGEN_DEVICE_FUNC + const Product lazyProduct(const MatrixBase &other) const; template @@ -188,84 +189,93 @@ template class MatrixBase void applyOnTheRight(const EigenBase& other); template - const DiagonalProduct + EIGEN_DEVICE_FUNC + const Product operator*(const DiagonalBase &diagonal) const; template - typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType + EIGEN_DEVICE_FUNC + typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType dot(const MatrixBase& other) const; - #ifdef EIGEN2_SUPPORT - template - Scalar eigen2_dot(const MatrixBase& other) const; - #endif - - RealScalar squaredNorm() const; - RealScalar norm() const; + EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; + EIGEN_DEVICE_FUNC RealScalar norm() const; RealScalar stableNorm() const; RealScalar blueNorm() const; RealScalar hypotNorm() const; - const PlainObject normalized() const; - void normalize(); + EIGEN_DEVICE_FUNC const PlainObject normalized() const; + EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const; + EIGEN_DEVICE_FUNC void normalize(); + EIGEN_DEVICE_FUNC void stableNormalize(); - const AdjointReturnType adjoint() const; - void adjointInPlace(); + EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; + EIGEN_DEVICE_FUNC void adjointInPlace(); typedef Diagonal DiagonalReturnType; + EIGEN_DEVICE_FUNC DiagonalReturnType diagonal(); + typedef typename internal::add_const >::type ConstDiagonalReturnType; + EIGEN_DEVICE_FUNC ConstDiagonalReturnType diagonal() const; template struct DiagonalIndexReturnType { typedef Diagonal Type; }; template struct ConstDiagonalIndexReturnType { typedef const Diagonal Type; }; - template typename DiagonalIndexReturnType::Type diagonal(); - template typename ConstDiagonalIndexReturnType::Type diagonal() const; - + template + EIGEN_DEVICE_FUNC + typename DiagonalIndexReturnType::Type diagonal(); + + template + EIGEN_DEVICE_FUNC + typename ConstDiagonalIndexReturnType::Type diagonal() const; + typedef Diagonal DiagonalDynamicIndexReturnType; typedef typename internal::add_const >::type ConstDiagonalDynamicIndexReturnType; + EIGEN_DEVICE_FUNC DiagonalDynamicIndexReturnType diagonal(Index index); + EIGEN_DEVICE_FUNC ConstDiagonalDynamicIndexReturnType diagonal(Index index) const; - #ifdef EIGEN2_SUPPORT - template typename internal::eigen2_part_return_type::type part(); - template const typename internal::eigen2_part_return_type::type part() const; - - // huuuge hack. make Eigen2's matrix.part() work in eigen3. Problem: Diagonal is now a class template instead - // of an integer constant. Solution: overload the part() method template wrt template parameters list. - template class U> - const DiagonalWrapper part() const - { return diagonal().asDiagonal(); } - #endif // EIGEN2_SUPPORT - template struct TriangularViewReturnType { typedef TriangularView Type; }; template struct ConstTriangularViewReturnType { typedef const TriangularView Type; }; - template typename TriangularViewReturnType::Type triangularView(); - template typename ConstTriangularViewReturnType::Type triangularView() const; + template + EIGEN_DEVICE_FUNC + typename TriangularViewReturnType::Type triangularView(); + template + EIGEN_DEVICE_FUNC + typename ConstTriangularViewReturnType::Type triangularView() const; template struct SelfAdjointViewReturnType { typedef SelfAdjointView Type; }; template struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView Type; }; - template typename SelfAdjointViewReturnType::Type selfadjointView(); - template typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; + template + EIGEN_DEVICE_FUNC + typename SelfAdjointViewReturnType::Type selfadjointView(); + template + EIGEN_DEVICE_FUNC + typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; const SparseView sparseView(const Scalar& m_reference = Scalar(0), const typename NumTraits::Real& m_epsilon = NumTraits::dummy_precision()) const; - static const IdentityReturnType Identity(); - static const IdentityReturnType Identity(Index rows, Index cols); - static const BasisReturnType Unit(Index size, Index i); - static const BasisReturnType Unit(Index i); - static const BasisReturnType UnitX(); - static const BasisReturnType UnitY(); - static const BasisReturnType UnitZ(); - static const BasisReturnType UnitW(); - + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(); + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitX(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitY(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitW(); + + EIGEN_DEVICE_FUNC const DiagonalWrapper asDiagonal() const; const PermutationWrapper asPermutation() const; + EIGEN_DEVICE_FUNC Derived& setIdentity(); + EIGEN_DEVICE_FUNC Derived& setIdentity(Index rows, Index cols); bool isIdentity(const RealScalar& prec = NumTraits::dummy_precision()) const; @@ -284,7 +294,7 @@ template class MatrixBase * fuzzy comparison such as isApprox() * \sa isApprox(), operator!= */ template - inline bool operator==(const MatrixBase& other) const + EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase& other) const { return cwiseEqual(other).all(); } /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. @@ -292,64 +302,50 @@ template class MatrixBase * fuzzy comparison such as isApprox() * \sa isApprox(), operator== */ template - inline bool operator!=(const MatrixBase& other) const + EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase& other) const { return cwiseNotEqual(other).any(); } NoAlias noalias(); - inline const ForceAlignedAccess forceAlignedAccess() const; - inline ForceAlignedAccess forceAlignedAccess(); - template inline typename internal::add_const_on_value_type,Derived&>::type>::type forceAlignedAccessIf() const; - template inline typename internal::conditional,Derived&>::type forceAlignedAccessIf(); + // TODO forceAlignedAccess is temporarily disabled + // Need to find a nicer workaround. + inline const Derived& forceAlignedAccess() const { return derived(); } + inline Derived& forceAlignedAccess() { return derived(); } + template inline const Derived& forceAlignedAccessIf() const { return derived(); } + template inline Derived& forceAlignedAccessIf() { return derived(); } - Scalar trace() const; + EIGEN_DEVICE_FUNC Scalar trace() const; -/////////// Array module /////////// + template EIGEN_DEVICE_FUNC RealScalar lpNorm() const; - template RealScalar lpNorm() const; - - MatrixBase& matrix() { return *this; } - const MatrixBase& matrix() const { return *this; } + EIGEN_DEVICE_FUNC MatrixBase& matrix() { return *this; } + EIGEN_DEVICE_FUNC const MatrixBase& matrix() const { return *this; } /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix * \sa ArrayBase::matrix() */ - ArrayWrapper array() { return derived(); } - const ArrayWrapper array() const { return derived(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper array() { return ArrayWrapper(derived()); } + /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper array() const { return ArrayWrapper(derived()); } /////////// LU module /////////// - const FullPivLU fullPivLu() const; - const PartialPivLU partialPivLu() const; + inline const FullPivLU fullPivLu() const; + inline const PartialPivLU partialPivLu() const; - #if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS - const LU lu() const; - #endif + inline const PartialPivLU lu() const; - #ifdef EIGEN2_SUPPORT - const LU eigen2_lu() const; - #endif + inline const Inverse inverse() const; - #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS - const PartialPivLU lu() const; - #endif - - #ifdef EIGEN2_SUPPORT template - void computeInverse(MatrixBase *result) const { - *result = this->inverse(); - } - #endif - - const internal::inverse_impl inverse() const; - template - void computeInverseAndDetWithCheck( + inline void computeInverseAndDetWithCheck( ResultType& inverse, typename ResultType::Scalar& determinant, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() ) const; template - void computeInverseWithCheck( + inline void computeInverseWithCheck( ResultType& inverse, bool& invertible, const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision() @@ -358,65 +354,70 @@ template class MatrixBase /////////// Cholesky module /////////// - const LLT llt() const; - const LDLT ldlt() const; + inline const LLT llt() const; + inline const LDLT ldlt() const; /////////// QR module /////////// - const HouseholderQR householderQr() const; - const ColPivHouseholderQR colPivHouseholderQr() const; - const FullPivHouseholderQR fullPivHouseholderQr() const; - - #ifdef EIGEN2_SUPPORT - const QR qr() const; - #endif + inline const HouseholderQR householderQr() const; + inline const ColPivHouseholderQR colPivHouseholderQr() const; + inline const FullPivHouseholderQR fullPivHouseholderQr() const; + inline const CompleteOrthogonalDecomposition completeOrthogonalDecomposition() const; - EigenvaluesReturnType eigenvalues() const; - RealScalar operatorNorm() const; +/////////// Eigenvalues module /////////// -/////////// SVD module /////////// + inline EigenvaluesReturnType eigenvalues() const; + inline RealScalar operatorNorm() const; - JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; +/////////// SVD module /////////// - #ifdef EIGEN2_SUPPORT - SVD svd() const; - #endif + inline JacobiSVD jacobiSvd(unsigned int computationOptions = 0) const; + inline BDCSVD bdcSvd(unsigned int computationOptions = 0) const; /////////// Geometry module /////////// #ifndef EIGEN_PARSED_BY_DOXYGEN /// \internal helper struct to form the return type of the cross product template struct cross_product_return_type { - typedef typename internal::scalar_product_traits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; + typedef typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType Scalar; typedef Matrix type; }; #endif // EIGEN_PARSED_BY_DOXYGEN template - typename cross_product_return_type::type + EIGEN_DEVICE_FUNC +#ifndef EIGEN_PARSED_BY_DOXYGEN + inline typename cross_product_return_type::type +#else + inline PlainObject +#endif cross(const MatrixBase& other) const; + template - PlainObject cross3(const MatrixBase& other) const; - PlainObject unitOrthogonal(void) const; - Matrix eulerAngles(Index a0, Index a1, Index a2) const; - - #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS - ScalarMultipleReturnType operator*(const UniformScaling& s) const; + EIGEN_DEVICE_FUNC + inline PlainObject cross3(const MatrixBase& other) const; + + EIGEN_DEVICE_FUNC + inline PlainObject unitOrthogonal(void) const; + + EIGEN_DEVICE_FUNC + inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; + // put this as separate enum value to work around possible GCC 4.3 bug (?) - enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1?Vertical:Horizontal }; + enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) + : ColsAtCompileTime==1 ? Vertical : Horizontal }; typedef Homogeneous HomogeneousReturnType; - HomogeneousReturnType homogeneous() const; - #endif - + EIGEN_DEVICE_FUNC + inline HomogeneousReturnType homogeneous() const; + enum { SizeMinusOne = SizeAtCompileTime==Dynamic ? Dynamic : SizeAtCompileTime-1 }; typedef Block::ColsAtCompileTime==1 ? SizeMinusOne : 1, internal::traits::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; - typedef CwiseUnaryOp::Scalar>, - const ConstStartMinusOne > HNormalizedReturnType; - - const HNormalizedReturnType hnormalized() const; + typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType; + EIGEN_DEVICE_FUNC + inline const HNormalizedReturnType hnormalized() const; ////////// Householder module /////////// @@ -461,49 +462,15 @@ template class MatrixBase const MatrixSquareRootReturnValue sqrt() const; const MatrixLogarithmReturnValue log() const; const MatrixPowerReturnValue pow(const RealScalar& p) const; - -#ifdef EIGEN2_SUPPORT - template - Derived& operator+=(const Flagged, 0, - EvalBeforeAssigningBit>& other); - - template - Derived& operator-=(const Flagged, 0, - EvalBeforeAssigningBit>& other); - - /** \deprecated because .lazy() is deprecated - * Overloaded for cache friendly product evaluation */ - template - Derived& lazyAssign(const Flagged& other) - { return lazyAssign(other._expression()); } - - template - const Flagged marked() const; - const Flagged lazy() const; - - inline const Cwise cwise() const; - inline Cwise cwise(); - - VectorBlock start(Index size); - const VectorBlock start(Index size) const; - VectorBlock end(Index size); - const VectorBlock end(Index size) const; - template VectorBlock start(); - template const VectorBlock start() const; - template VectorBlock end(); - template const VectorBlock end() const; - - Minor minor(Index row, Index col); - const Minor minor(Index row, Index col) const; -#endif + const MatrixComplexPowerReturnValue pow(const std::complex& p) const; protected: - MatrixBase() : Base() {} + EIGEN_DEVICE_FUNC MatrixBase() : Base() {} private: - explicit MatrixBase(int); - MatrixBase(int,int); - template explicit MatrixBase(const MatrixBase&); + EIGEN_DEVICE_FUNC explicit MatrixBase(int); + EIGEN_DEVICE_FUNC MatrixBase(int,int); + template EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase&); protected: // mixing arrays and matrices is not legal template Derived& operator+=(const ArrayBase& ) diff --git a/external/eigen3/Eigen/src/Core/NestByValue.h b/external/eigen3/Eigen/src/Core/NestByValue.h index a893b17..13adf07 100644 --- a/external/eigen3/Eigen/src/Core/NestByValue.h +++ b/external/eigen3/Eigen/src/Core/NestByValue.h @@ -13,25 +13,24 @@ namespace Eigen { +namespace internal { +template +struct traits > : public traits +{}; +} + /** \class NestByValue * \ingroup Core_Module * * \brief Expression which must be nested by value * - * \param ExpressionType the type of the object of which we are requiring nesting-by-value + * \tparam ExpressionType the type of the object of which we are requiring nesting-by-value * * This class is the return type of MatrixBase::nestByValue() * and most of the time this is the only way it is used. * * \sa MatrixBase::nestByValue() */ - -namespace internal { -template -struct traits > : public traits -{}; -} - template class NestByValue : public internal::dense_xpr_base< NestByValue >::type { @@ -40,29 +39,29 @@ template class NestByValue typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) - inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} - inline Index rows() const { return m_expression.rows(); } - inline Index cols() const { return m_expression.cols(); } - inline Index outerStride() const { return m_expression.outerStride(); } - inline Index innerStride() const { return m_expression.innerStride(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_expression.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_expression.cols(); } + EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_expression.innerStride(); } - inline const CoeffReturnType coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { return m_expression.coeff(row, col); } - inline Scalar& coeffRef(Index row, Index col) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return m_expression.const_cast_derived().coeffRef(row, col); } - inline const CoeffReturnType coeff(Index index) const + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } - inline Scalar& coeffRef(Index index) + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } @@ -91,7 +90,7 @@ template class NestByValue m_expression.const_cast_derived().template writePacket(index, x); } - operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } protected: const ExpressionType m_expression; diff --git a/external/eigen3/Eigen/src/Core/NoAlias.h b/external/eigen3/Eigen/src/Core/NoAlias.h index 768bfb1..3390801 100644 --- a/external/eigen3/Eigen/src/Core/NoAlias.h +++ b/external/eigen3/Eigen/src/Core/NoAlias.h @@ -17,7 +17,7 @@ namespace Eigen { * * \brief Pseudo expression providing an operator = assuming no aliasing * - * \param ExpressionType the type of the object on which to do the lazy assignment + * \tparam ExpressionType the type of the object on which to do the lazy assignment * * This class represents an expression with special assignment operators * assuming no aliasing between the target expression and the source expression. @@ -30,62 +30,36 @@ namespace Eigen { template class StorageBase> class NoAlias { - typedef typename ExpressionType::Scalar Scalar; public: - NoAlias(ExpressionType& expression) : m_expression(expression) {} - - /** Behaves like MatrixBase::lazyAssign(other) - * \sa MatrixBase::lazyAssign() */ + typedef typename ExpressionType::Scalar Scalar; + + explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) - { return internal::assign_selector::run(m_expression,other.derived()); } - - /** \sa MatrixBase::operator+= */ + { + call_assignment_no_alias(m_expression, other.derived(), internal::assign_op()); + return m_expression; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) { - typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested::type OtherDerivedNested; - typedef typename internal::remove_all::type _OtherDerivedNested; - internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op()); return m_expression; } - - /** \sa MatrixBase::operator-= */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) { - typedef SelfCwiseBinaryOp, ExpressionType, OtherDerived> SelfAdder; - SelfAdder tmp(m_expression); - typedef typename internal::nested::type OtherDerivedNested; - typedef typename internal::remove_all::type _OtherDerivedNested; - internal::assign_selector::run(tmp,OtherDerivedNested(other.derived())); + call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op()); return m_expression; } -#ifndef EIGEN_PARSED_BY_DOXYGEN - template - EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase& other) - { other.derived().addTo(m_expression); return m_expression; } - - template - EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase& other) - { other.derived().subTo(m_expression); return m_expression; } - - template - EIGEN_STRONG_INLINE ExpressionType& operator+=(const CoeffBasedProduct& other) - { return m_expression.derived() += CoeffBasedProduct(other.lhs(), other.rhs()); } - - template - EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct& other) - { return m_expression.derived() -= CoeffBasedProduct(other.lhs(), other.rhs()); } - - template - ExpressionType& operator=(const ReturnByValue& func) - { return m_expression = func; } -#endif - + EIGEN_DEVICE_FUNC ExpressionType& expression() const { return m_expression; @@ -126,7 +100,7 @@ class NoAlias template NoAlias MatrixBase::noalias() { - return derived(); + return NoAlias(derived()); } } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/NumTraits.h b/external/eigen3/Eigen/src/Core/NumTraits.h index bac9e50..daf4898 100644 --- a/external/eigen3/Eigen/src/Core/NumTraits.h +++ b/external/eigen3/Eigen/src/Core/NumTraits.h @@ -12,24 +12,57 @@ namespace Eigen { +namespace internal { + +// default implementation of digits10(), based on numeric_limits if specialized, +// 0 for integer types, and log10(epsilon()) otherwise. +template< typename T, + bool use_numeric_limits = std::numeric_limits::is_specialized, + bool is_integer = NumTraits::IsInteger> +struct default_digits10_impl +{ + static int run() { return std::numeric_limits::digits10; } +}; + +template +struct default_digits10_impl // Floating point +{ + static int run() { + using std::log10; + using std::ceil; + typedef typename NumTraits::Real Real; + return int(ceil(-log10(NumTraits::epsilon()))); + } +}; + +template +struct default_digits10_impl // Integer +{ + static int run() { return 0; } +}; + +} // end namespace internal + /** \class NumTraits * \ingroup Core_Module * * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen. * - * \param T the numeric type at hand + * \tparam T the numeric type at hand * * This class stores enums, typedefs and static methods giving information about a numeric type. * * The provided data consists of: - * \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real, - * then \a Real is just a typedef to \a T. If \a T is \c std::complex then \a Real + * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real, + * then \c Real is just a typedef to \a T. If \a T is \c std::complex then \c Real * is a typedef to \a U. - * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values, + * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values, * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is * only intended as a helper for code that needs to explicitly promote types. + * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex, Literal is defined as \c U. + * Of course, this type must be fully compatible with \a T. In doubt, just use \a T here. * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what * this means, just use \a T here. * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex @@ -42,10 +75,14 @@ namespace Eigen { * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. - * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), returns a \a Real instead of a \a T. + * \li An epsilon() function which, unlike std::numeric_limits::epsilon(), + * it returns a \a Real instead of a \a T. * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default * value by the fuzzy comparison operators. * \li highest() and lowest() functions returning the highest and lowest possible values respectively. + * \li digits10() function returning the number of decimal digits that can be represented without change. This is + * the analogue of std::numeric_limits::digits10 + * which is used as the default implementation if specialized. */ template struct GenericNumTraits @@ -67,22 +104,47 @@ template struct GenericNumTraits T >::type NonInteger; typedef T Nested; + typedef T Literal; + + EIGEN_DEVICE_FUNC + static inline Real epsilon() + { + return numext::numeric_limits::epsilon(); + } + + EIGEN_DEVICE_FUNC + static inline int digits10() + { + return internal::default_digits10_impl::run(); + } - static inline Real epsilon() { return std::numeric_limits::epsilon(); } + EIGEN_DEVICE_FUNC static inline Real dummy_precision() { // make sure to override this for floating-point types return Real(0); } - static inline T highest() { return (std::numeric_limits::max)(); } - static inline T lowest() { return IsInteger ? (std::numeric_limits::min)() : (-(std::numeric_limits::max)()); } - -#ifdef EIGEN2_SUPPORT - enum { - HasFloatingPoint = !IsInteger - }; - typedef NonInteger FloatingPoint; -#endif + + + EIGEN_DEVICE_FUNC + static inline T highest() { + return (numext::numeric_limits::max)(); + } + + EIGEN_DEVICE_FUNC + static inline T lowest() { + return IsInteger ? (numext::numeric_limits::min)() : (-(numext::numeric_limits::max)()); + } + + EIGEN_DEVICE_FUNC + static inline T infinity() { + return numext::numeric_limits::infinity(); + } + + EIGEN_DEVICE_FUNC + static inline T quiet_NaN() { + return numext::numeric_limits::quiet_NaN(); + } }; template struct NumTraits : GenericNumTraits @@ -91,11 +153,13 @@ template struct NumTraits : GenericNumTraits template<> struct NumTraits : GenericNumTraits { + EIGEN_DEVICE_FUNC static inline float dummy_precision() { return 1e-5f; } }; template<> struct NumTraits : GenericNumTraits { + EIGEN_DEVICE_FUNC static inline double dummy_precision() { return 1e-12; } }; @@ -109,6 +173,7 @@ template struct NumTraits > : GenericNumTraits > { typedef _Real Real; + typedef typename NumTraits<_Real>::Literal Literal; enum { IsComplex = 1, RequireInitialization = NumTraits<_Real>::RequireInitialization, @@ -117,8 +182,12 @@ template struct NumTraits > MulCost = 4 * NumTraits::MulCost + 2 * NumTraits::AddCost }; + EIGEN_DEVICE_FUNC static inline Real epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return NumTraits::dummy_precision(); } + EIGEN_DEVICE_FUNC + static inline int digits10() { return NumTraits::digits10(); } }; template @@ -130,21 +199,50 @@ struct NumTraits > typedef typename NumTraits::NonInteger NonIntegerScalar; typedef Array NonInteger; typedef ArrayType & Nested; - + typedef typename NumTraits::Literal Literal; + enum { IsComplex = NumTraits::IsComplex, IsInteger = NumTraits::IsInteger, IsSigned = NumTraits::IsSigned, RequireInitialization = 1, - ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, - AddCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::AddCost, - MulCost = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits::MulCost + ReadCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::ReadCost, + AddCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::AddCost, + MulCost = ArrayType::SizeAtCompileTime==Dynamic ? HugeCost : ArrayType::SizeAtCompileTime * NumTraits::MulCost }; - + + EIGEN_DEVICE_FUNC static inline RealScalar epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC static inline RealScalar dummy_precision() { return NumTraits::dummy_precision(); } + + static inline int digits10() { return NumTraits::digits10(); } +}; + +template<> struct NumTraits + : GenericNumTraits +{ + enum { + RequireInitialization = 1, + ReadCost = HugeCost, + AddCost = HugeCost, + MulCost = HugeCost + }; + + static inline int digits10() { return 0; } + +private: + static inline std::string epsilon(); + static inline std::string dummy_precision(); + static inline std::string lowest(); + static inline std::string highest(); + static inline std::string infinity(); + static inline std::string quiet_NaN(); }; +// Empty specialization for void to allow template specialization based on NumTraits::Real with T==void and SFINAE. +template<> struct NumTraits {}; + } // end namespace Eigen #endif // EIGEN_NUMTRAITS_H diff --git a/external/eigen3/Eigen/src/Core/PermutationMatrix.h b/external/eigen3/Eigen/src/Core/PermutationMatrix.h index bda79fa..b1fb455 100644 --- a/external/eigen3/Eigen/src/Core/PermutationMatrix.h +++ b/external/eigen3/Eigen/src/Core/PermutationMatrix.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2009 Benoit Jacob -// Copyright (C) 2009-2011 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -13,14 +13,18 @@ namespace Eigen { -template class PermutedImpl; +namespace internal { + +enum PermPermProduct_t {PermPermProduct}; + +} // end namespace internal /** \class PermutationBase * \ingroup Core_Module * * \brief Base class for permutations * - * \param Derived the derived class + * \tparam Derived the derived class * * This class is the base class for all expressions representing a permutation matrix, * internally stored as a vector of integers. @@ -38,17 +42,6 @@ template -struct permut_matrix_product_retval; -template -struct permut_sparsematrix_product_retval; -enum PermPermProduct_t {PermPermProduct}; - -} // end namespace internal - template class PermutationBase : public EigenBase { @@ -60,19 +53,20 @@ class PermutationBase : public EigenBase typedef typename Traits::IndicesType IndicesType; enum { Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, RowsAtCompileTime = Traits::RowsAtCompileTime, ColsAtCompileTime = Traits::ColsAtCompileTime, MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, MaxColsAtCompileTime = Traits::MaxColsAtCompileTime }; - typedef typename Traits::Scalar Scalar; - typedef typename Traits::Index Index; - typedef Matrix + typedef typename Traits::StorageIndex StorageIndex; + typedef Matrix DenseMatrixType; - typedef PermutationMatrix + typedef PermutationMatrix PlainPermutationType; + typedef PlainPermutationType PlainObject; using Base::derived; + typedef Inverse InverseReturnType; + typedef void Scalar; #endif /** Copies the other permutation into *this */ @@ -118,7 +112,7 @@ class PermutationBase : public EigenBase void evalTo(MatrixBase& other) const { other.setZero(); - for (int i=0; i /** Sets *this to be the identity permutation matrix */ void setIdentity() { - for(Index i = 0; i < size(); ++i) + StorageIndex n = StorageIndex(size()); + for(StorageIndex i = 0; i < n; ++i) indices().coeffRef(i) = i; } @@ -163,18 +158,18 @@ class PermutationBase : public EigenBase * * \returns a reference to *this. * - * \warning This is much slower than applyTranspositionOnTheRight(int,int): + * \warning This is much slower than applyTranspositionOnTheRight(Index,Index): * this has linear complexity and requires a lot of branching. * - * \sa applyTranspositionOnTheRight(int,int) + * \sa applyTranspositionOnTheRight(Index,Index) */ Derived& applyTranspositionOnTheLeft(Index i, Index j) { eigen_assert(i>=0 && j>=0 && i * * This is a fast operation, it only consists in swapping two indices. * - * \sa applyTranspositionOnTheLeft(int,int) + * \sa applyTranspositionOnTheLeft(Index,Index) */ Derived& applyTranspositionOnTheRight(Index i, Index j) { @@ -196,16 +191,16 @@ class PermutationBase : public EigenBase /** \returns the inverse permutation matrix. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ - inline Transpose inverse() const - { return derived(); } + inline InverseReturnType inverse() const + { return InverseReturnType(derived()); } /** \returns the tranpose permutation matrix. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ - inline Transpose transpose() const - { return derived(); } + inline InverseReturnType transpose() const + { return InverseReturnType(derived()); } /**** multiplication helpers to hopefully get RVO ****/ @@ -215,13 +210,13 @@ class PermutationBase : public EigenBase template void assignTranspose(const PermutationBase& other) { - for (int i=0; i void assignProduct(const Lhs& lhs, const Rhs& rhs) { eigen_assert(lhs.cols() == rhs.rows()); - for (int i=0; i /** \returns the product permutation matrix. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ template inline PlainPermutationType operator*(const PermutationBase& other) const @@ -237,18 +232,18 @@ class PermutationBase : public EigenBase /** \returns the product of a permutation with another inverse permutation. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ template - inline PlainPermutationType operator*(const Transpose >& other) const + inline PlainPermutationType operator*(const InverseImpl& other) const { return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); } /** \returns the product of an inverse permutation with another permutation. * - * \note \note_try_to_help_rvo + * \note \blank \note_try_to_help_rvo */ template friend - inline PlainPermutationType operator*(const Transpose >& other, const PermutationBase& perm) + inline PlainPermutationType operator*(const InverseImpl& other, const PermutationBase& perm) { return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); } /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the permutation. @@ -284,39 +279,43 @@ class PermutationBase : public EigenBase }; +namespace internal { +template +struct traits > + : traits > +{ + typedef PermutationStorage StorageKind; + typedef Matrix<_StorageIndex, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; +}; +} + /** \class PermutationMatrix * \ingroup Core_Module * * \brief Permutation matrix * - * \param SizeAtCompileTime the number of rows/cols, or Dynamic - * \param MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. - * \param IndexType the interger type of the indices + * \tparam SizeAtCompileTime the number of rows/cols, or Dynamic + * \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to SizeAtCompileTime. Most of the time, you should not have to specify it. + * \tparam _StorageIndex the integer type of the indices * * This class represents a permutation matrix, internally stored as a vector of integers. * * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix */ - -namespace internal { -template -struct traits > - : traits > -{ - typedef IndexType Index; - typedef Matrix IndicesType; -}; -} - -template -class PermutationMatrix : public PermutationBase > +template +class PermutationMatrix : public PermutationBase > { typedef PermutationBase Base; typedef internal::traits Traits; public: + typedef const PermutationMatrix& Nested; + #ifndef EIGEN_PARSED_BY_DOXYGEN typedef typename Traits::IndicesType IndicesType; + typedef typename Traits::StorageIndex StorageIndex; #endif inline PermutationMatrix() @@ -324,8 +323,10 @@ class PermutationMatrix : public PermutationBase::highest()); + } /** Copy constructor. */ template @@ -346,7 +347,7 @@ class PermutationMatrix : public PermutationBase - explicit inline PermutationMatrix(const MatrixBase& a_indices) : m_indices(a_indices) + explicit inline PermutationMatrix(const MatrixBase& indices) : m_indices(indices) {} /** Convert the Transpositions \a tr to a permutation matrix */ @@ -393,10 +394,13 @@ class PermutationMatrix : public PermutationBase - PermutationMatrix(const Transpose >& other) - : m_indices(other.nestedPermutation().size()) + PermutationMatrix(const InverseImpl& other) + : m_indices(other.derived().nestedExpression().size()) { - for (int i=0; i::highest()); + StorageIndex end = StorageIndex(m_indices.size()); + for (StorageIndex i=0; i PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) @@ -413,18 +417,20 @@ class PermutationMatrix : public PermutationBase -struct traits,_PacketAccess> > - : traits > +template +struct traits,_PacketAccess> > + : traits > { - typedef IndexType Index; - typedef Map, _PacketAccess> IndicesType; + typedef PermutationStorage StorageKind; + typedef Map, _PacketAccess> IndicesType; + typedef _StorageIndex StorageIndex; + typedef void Scalar; }; } -template -class Map,_PacketAccess> - : public PermutationBase,_PacketAccess> > +template +class Map,_PacketAccess> + : public PermutationBase,_PacketAccess> > { typedef PermutationBase Base; typedef internal::traits Traits; @@ -432,14 +438,14 @@ class Map, #ifndef EIGEN_PARSED_BY_DOXYGEN typedef typename Traits::IndicesType IndicesType; - typedef typename IndicesType::Scalar Index; + typedef typename IndicesType::Scalar StorageIndex; #endif - inline Map(const Index* indicesPtr) + inline Map(const StorageIndex* indicesPtr) : m_indices(indicesPtr) {} - inline Map(const Index* indicesPtr, Index size) + inline Map(const StorageIndex* indicesPtr, Index size) : m_indices(indicesPtr,size) {} @@ -474,40 +480,36 @@ class Map, IndicesType m_indices; }; -/** \class PermutationWrapper - * \ingroup Core_Module - * - * \brief Class to view a vector of integers as a permutation matrix - * - * \param _IndicesType the type of the vector of integer (can be any compatible expression) - * - * This class allows to view any vector expression of integers as a permutation matrix. - * - * \sa class PermutationBase, class PermutationMatrix - */ - -struct PermutationStorage {}; - template class TranspositionsWrapper; namespace internal { template struct traits > { typedef PermutationStorage StorageKind; - typedef typename _IndicesType::Scalar Scalar; - typedef typename _IndicesType::Scalar Index; + typedef void Scalar; + typedef typename _IndicesType::Scalar StorageIndex; typedef _IndicesType IndicesType; enum { RowsAtCompileTime = _IndicesType::SizeAtCompileTime, ColsAtCompileTime = _IndicesType::SizeAtCompileTime, - MaxRowsAtCompileTime = IndicesType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = IndicesType::MaxColsAtCompileTime, - Flags = 0, - CoeffReadCost = _IndicesType::CoeffReadCost + MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + Flags = 0 }; }; } +/** \class PermutationWrapper + * \ingroup Core_Module + * + * \brief Class to view a vector of integers as a permutation matrix + * + * \tparam _IndicesType the type of the vector of integer (can be any compatible expression) + * + * This class allows to view any vector expression of integers as a permutation matrix. + * + * \sa class PermutationBase, class PermutationMatrix + */ template class PermutationWrapper : public PermutationBase > { @@ -519,8 +521,8 @@ class PermutationWrapper : public PermutationBase -inline const internal::permut_matrix_product_retval -operator*(const MatrixBase& matrix, - const PermutationBase &permutation) +template +EIGEN_DEVICE_FUNC +const Product +operator*(const MatrixBase &matrix, + const PermutationBase& permutation) { - return internal::permut_matrix_product_retval - - (permutation.derived(), matrix.derived()); + return Product + (matrix.derived(), permutation.derived()); } /** \returns the matrix with the permutation applied to the rows. */ -template -inline const internal::permut_matrix_product_retval - +template +EIGEN_DEVICE_FUNC +const Product operator*(const PermutationBase &permutation, - const MatrixBase& matrix) + const MatrixBase& matrix) { - return internal::permut_matrix_product_retval - - (permutation.derived(), matrix.derived()); + return Product + (permutation.derived(), matrix.derived()); } -namespace internal { -template -struct traits > +template +class InverseImpl + : public EigenBase > { - typedef typename MatrixType::PlainObject ReturnType; -}; - -template -struct permut_matrix_product_retval - : public ReturnByValue > -{ - typedef typename remove_all::type MatrixTypeNestedCleaned; - typedef typename MatrixType::Index Index; - - permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix) - : m_permutation(perm), m_matrix(matrix) - {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - template inline void evalTo(Dest& dst) const - { - const Index n = Side==OnTheLeft ? rows() : cols(); - // FIXME we need an is_same for expression that is not sensitive to constness. For instance - // is_same_xpr, Block >::value should be true. - const typename Dest::Scalar *dst_data = internal::extract_data(dst); - if( is_same::value - && blas_traits::HasUsableDirectAccess - && blas_traits::HasUsableDirectAccess - && dst_data!=0 && dst_data == extract_data(m_matrix)) - { - // apply the permutation inplace - Matrix mask(m_permutation.size()); - mask.fill(false); - Index r = 0; - while(r < m_permutation.size()) - { - // search for the next seed - while(r=m_permutation.size()) - break; - // we got one, let's follow it until we are back to the seed - Index k0 = r++; - Index kPrev = k0; - mask.coeffRef(k0) = true; - for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k)) - { - Block(dst, k) - .swap(Block - (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); - - mask.coeffRef(k) = true; - kPrev = k; - } - } - } - else - { - for(int i = 0; i < n; ++i) - { - Block - (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i) - - = - - Block - (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i); - } - } - } - - protected: - const PermutationType& m_permutation; - typename MatrixType::Nested m_matrix; -}; - -/* Template partial specialization for transposed/inverse permutations */ - -template -struct traits > > - : traits -{}; - -} // end namespace internal - -template -class Transpose > - : public EigenBase > > -{ - typedef Derived PermutationType; - typedef typename PermutationType::IndicesType IndicesType; typedef typename PermutationType::PlainPermutationType PlainPermutationType; + typedef internal::traits PermTraits; + protected: + InverseImpl() {} public: + typedef Inverse InverseType; + using EigenBase >::derived; #ifndef EIGEN_PARSED_BY_DOXYGEN - typedef internal::traits Traits; - typedef typename Derived::DenseMatrixType DenseMatrixType; + typedef typename PermutationType::DenseMatrixType DenseMatrixType; enum { - Flags = Traits::Flags, - CoeffReadCost = Traits::CoeffReadCost, - RowsAtCompileTime = Traits::RowsAtCompileTime, - ColsAtCompileTime = Traits::ColsAtCompileTime, - MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + RowsAtCompileTime = PermTraits::RowsAtCompileTime, + ColsAtCompileTime = PermTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime }; - typedef typename Traits::Scalar Scalar; #endif - Transpose(const PermutationType& p) : m_permutation(p) {} - - inline int rows() const { return m_permutation.rows(); } - inline int cols() const { return m_permutation.cols(); } - #ifndef EIGEN_PARSED_BY_DOXYGEN template void evalTo(MatrixBase& other) const { other.setZero(); - for (int i=0; i friend - inline const internal::permut_matrix_product_retval - operator*(const MatrixBase& matrix, const Transpose& trPerm) + const Product + operator*(const MatrixBase& matrix, const InverseType& trPerm) { - return internal::permut_matrix_product_retval(trPerm.m_permutation, matrix.derived()); + return Product(matrix.derived(), trPerm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template - inline const internal::permut_matrix_product_retval + const Product operator*(const MatrixBase& matrix) const { - return internal::permut_matrix_product_retval(m_permutation, matrix.derived()); + return Product(derived(), matrix.derived()); } - - const PermutationType& nestedPermutation() const { return m_permutation; } - - protected: - const PermutationType& m_permutation; }; template @@ -717,6 +622,12 @@ const PermutationWrapper MatrixBase::asPermutation() con return derived(); } +namespace internal { + +template<> struct AssignmentKind { typedef EigenBase2EigenBase Kind; }; + +} // end namespace internal + } // end namespace Eigen #endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/external/eigen3/Eigen/src/Core/PlainObjectBase.h b/external/eigen3/Eigen/src/Core/PlainObjectBase.h index 9f71956..77f4f60 100644 --- a/external/eigen3/Eigen/src/Core/PlainObjectBase.h +++ b/external/eigen3/Eigen/src/Core/PlainObjectBase.h @@ -28,6 +28,7 @@ namespace internal { template struct check_rows_cols_for_overflow { template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE void run(Index, Index) { } @@ -35,11 +36,12 @@ template struct check_rows_cols_for_overflow { template<> struct check_rows_cols_for_overflow { template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols) { // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242 // we assume Index is signed - Index max_index = (size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed + Index max_index = (std::size_t(1) << (8 * sizeof(Index) - 1)) - 1; // assume Index is signed bool error = (rows == 0 || cols == 0) ? false : (rows > max_index / cols); if (error) @@ -56,33 +58,41 @@ template struct m } // end namespace internal -/** \class PlainObjectBase - * \brief %Dense storage base class for matrices and arrays. - * - * This class can be extended with the help of the plugin mechanism described on the page - * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. - * - * \sa \ref TopicClassHierarchy - */ #ifdef EIGEN_PARSED_BY_DOXYGEN -namespace internal { +namespace doxygen { -// this is a warkaround to doxygen not being able to understand the inheritence logic +// This is a workaround to doxygen not being able to understand the inheritance logic // when it is hidden by the dense_xpr_base helper struct. -template struct dense_xpr_base_dispatcher_for_doxygen;// : public MatrixBase {}; +// Moreover, doxygen fails to include members that are not documented in the declaration body of +// MatrixBase if we inherits MatrixBase >, +// this is why we simply inherits MatrixBase, though this does not make sense. + +/** This class is just a workaround for Doxygen and it does not not actually exist. */ +template struct dense_xpr_base_dispatcher; /** This class is just a workaround for Doxygen and it does not not actually exist. */ template -struct dense_xpr_base_dispatcher_for_doxygen > - : public MatrixBase > {}; +struct dense_xpr_base_dispatcher > + : public MatrixBase {}; /** This class is just a workaround for Doxygen and it does not not actually exist. */ template -struct dense_xpr_base_dispatcher_for_doxygen > - : public ArrayBase > {}; +struct dense_xpr_base_dispatcher > + : public ArrayBase {}; -} // namespace internal +} // namespace doxygen +/** \class PlainObjectBase + * \ingroup Core_Module + * \brief %Dense storage base class for matrices and arrays. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. + * + * \tparam Derived is the derived type, e.g., a Matrix or Array + * + * \sa \ref TopicClassHierarchy + */ template -class PlainObjectBase : public internal::dense_xpr_base_dispatcher_for_doxygen +class PlainObjectBase : public doxygen::dense_xpr_base_dispatcher #else template class PlainObjectBase : public internal::dense_xpr_base::type @@ -93,8 +103,8 @@ class PlainObjectBase : public internal::dense_xpr_base::type typedef typename internal::dense_xpr_base::type Base; typedef typename internal::traits::StorageKind StorageKind; - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; typedef typename NumTraits::Real RealScalar; typedef Derived DenseType; @@ -113,28 +123,40 @@ class PlainObjectBase : public internal::dense_xpr_base::type typedef Eigen::Map MapType; friend class Eigen::Map; typedef const Eigen::Map ConstMapType; - friend class Eigen::Map; - typedef Eigen::Map AlignedMapType; - friend class Eigen::Map; - typedef const Eigen::Map ConstAlignedMapType; +#if EIGEN_MAX_ALIGN_BYTES>0 + // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class twice. + friend class Eigen::Map; + friend class Eigen::Map; +#endif + typedef Eigen::Map AlignedMapType; + typedef const Eigen::Map ConstAlignedMapType; template struct StridedMapType { typedef Eigen::Map type; }; template struct StridedConstMapType { typedef Eigen::Map type; }; - template struct StridedAlignedMapType { typedef Eigen::Map type; }; - template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; + template struct StridedAlignedMapType { typedef Eigen::Map type; }; + template struct StridedConstAlignedMapType { typedef Eigen::Map type; }; protected: DenseStorage m_storage; public: - enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits::Flags & AlignedBit) != 0 }; + enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits::Alignment>0) }; EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) + EIGEN_DEVICE_FUNC Base& base() { return *static_cast(this); } + EIGEN_DEVICE_FUNC const Base& base() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); } + /** This is an overloaded version of DenseCoeffsBase::coeff(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const { if(Flags & RowMajorBit) @@ -143,11 +165,21 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + /** This is an overloaded version of DenseCoeffsBase::coeff(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const { return m_storage.data()[index]; } + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index,Index) const for details. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId) { if(Flags & RowMajorBit) @@ -156,11 +188,19 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index) const for details. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_storage.data()[index]; } + /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index). + * It is provided for convenience. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const { if(Flags & RowMajorBit) @@ -169,6 +209,9 @@ class PlainObjectBase : public internal::dense_xpr_base::type return m_storage.data()[rowId + colId * m_storage.rows()]; } + /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index). + * It is provided for convenience. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const { return m_storage.data()[index]; @@ -209,11 +252,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type } /** \returns a const pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE const Scalar *data() const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } /** \returns a pointer to the data array of this matrix */ - EIGEN_STRONG_INLINE Scalar *data() + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } /** Resizes \c *this to a \a rows x \a cols matrix. @@ -232,22 +275,22 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t) */ - EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols) - { - eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime) - && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,nbCols==ColsAtCompileTime) - && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,nbRows<=MaxRowsAtCompileTime) - && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime) - && nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array."); - internal::check_rows_cols_for_overflow::run(nbRows, nbCols); + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void resize(Index rows, Index cols) + { + eigen_assert( EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,rows==RowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime!=Dynamic,cols==ColsAtCompileTime) + && EIGEN_IMPLIES(RowsAtCompileTime==Dynamic && MaxRowsAtCompileTime!=Dynamic,rows<=MaxRowsAtCompileTime) + && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,cols<=MaxColsAtCompileTime) + && rows>=0 && cols>=0 && "Invalid sizes when resizing a matrix or array."); + internal::check_rows_cols_for_overflow::run(rows, cols); #ifdef EIGEN_INITIALIZE_COEFFS - Index size = nbRows*nbCols; + Index size = rows*cols; bool size_changed = size != this->size(); - m_storage.resize(size, nbRows, nbCols); + m_storage.resize(size, rows, cols); if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED #else - internal::check_rows_cols_for_overflow::run(nbRows, nbCols); - m_storage.resize(nbRows*nbCols, nbRows, nbCols); + m_storage.resize(rows*cols, rows, cols); #endif } @@ -262,6 +305,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t) */ + EIGEN_DEVICE_FUNC inline void resize(Index size) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase) @@ -286,9 +330,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index) */ - inline void resize(NoChange_t, Index nbCols) + EIGEN_DEVICE_FUNC + inline void resize(NoChange_t, Index cols) { - resize(rows(), nbCols); + resize(rows(), cols); } /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special value \c NoChange @@ -299,9 +344,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \sa resize(Index,Index) */ - inline void resize(Index nbRows, NoChange_t) + EIGEN_DEVICE_FUNC + inline void resize(Index rows, NoChange_t) { - resize(nbRows, cols()); + resize(rows, cols()); } /** Resizes \c *this to have the same dimensions as \a other. @@ -312,11 +358,12 @@ class PlainObjectBase : public internal::dense_xpr_base::type * remain row-vectors and vectors remain vectors. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) { const OtherDerived& other = _other.derived(); - internal::check_rows_cols_for_overflow::run(Index(other.rows()), Index(other.cols())); - const Index othersize = Index(other.rows())*Index(other.cols()); + internal::check_rows_cols_for_overflow::run(other.rows(), other.cols()); + const Index othersize = other.rows()*other.cols(); if(RowsAtCompileTime == 1) { eigen_assert(other.rows() == 1 || other.cols() == 1); @@ -339,9 +386,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * Matrices are resized relative to the top-left element. In case values need to be * appended to the matrix they will be uninitialized. */ - EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols) { - internal::conservative_resize_like_impl::run(*this, nbRows, nbCols); + internal::conservative_resize_like_impl::run(*this, rows, cols); } /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. @@ -351,10 +399,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * In case the matrix is growing, new rows will be uninitialized. */ - EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t) { // Note: see the comment in conservativeResize(Index,Index) - conservativeResize(nbRows, cols()); + conservativeResize(rows, cols()); } /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. @@ -364,10 +413,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * In case the matrix is growing, new columns will be uninitialized. */ - EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols) { // Note: see the comment in conservativeResize(Index,Index) - conservativeResize(rows(), nbCols); + conservativeResize(rows(), cols); } /** Resizes the vector to \a size while retaining old values. @@ -378,6 +428,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * When values are appended, they will be uninitialized. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResize(Index size) { internal::conservative_resize_like_impl::run(*this, size); @@ -393,6 +444,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * appended to the matrix they will copied from \c other. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase& other) { internal::conservative_resize_like_impl::run(*this, other); @@ -401,6 +453,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other) { return _set(other); @@ -408,6 +461,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type /** \sa MatrixBase::lazyAssign() */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase& other) { _resize_to_match(other); @@ -415,12 +469,18 @@ class PlainObjectBase : public internal::dense_xpr_base::type } template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue& func) { resize(func.rows(), func.cols()); return Base::operator=(func); } + // Prevent user from trying to instantiate PlainObjectBase objects + // by making all its constructor protected. See bug 1074. + protected: + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase() : m_storage() { // _check_template_params(); @@ -430,20 +490,23 @@ class PlainObjectBase : public internal::dense_xpr_base::type #ifndef EIGEN_PARSED_BY_DOXYGEN // FIXME is it still needed ? /** \internal */ - PlainObjectBase(internal::constructor_without_unaligned_array_assert) + EIGEN_DEVICE_FUNC + explicit PlainObjectBase(internal::constructor_without_unaligned_array_assert) : m_storage(internal::constructor_without_unaligned_array_assert()) { // _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } #endif -#ifdef EIGEN_HAVE_RVALUE_REFERENCES - PlainObjectBase(PlainObjectBase&& other) +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC + PlainObjectBase(PlainObjectBase&& other) EIGEN_NOEXCEPT : m_storage( std::move(other.m_storage) ) { } - PlainObjectBase& operator=(PlainObjectBase&& other) + EIGEN_DEVICE_FUNC + PlainObjectBase& operator=(PlainObjectBase&& other) EIGEN_NOEXCEPT { using std::swap; swap(m_storage, other.m_storage); @@ -452,31 +515,56 @@ class PlainObjectBase : public internal::dense_xpr_base::type #endif /** Copy constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const PlainObjectBase& other) - : m_storage() + : Base(), m_storage(other.m_storage) { } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) + : m_storage(size, rows, cols) { - _check_template_params(); - lazyAssign(other); +// _check_template_params(); +// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } + /** \sa PlainObjectBase::operator=(const EigenBase&) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase &other) : m_storage() { _check_template_params(); - lazyAssign(other); + resizeLike(other); + _set_noalias(other); } - EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols) - : m_storage(a_size, nbRows, nbCols) + /** \sa PlainObjectBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase &other) + : m_storage() { -// _check_template_params(); -// EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + _check_template_params(); + resizeLike(other); + *this = other.derived(); + } + /** \brief Copy constructor with in-place evaluation */ + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue& other) + { + _check_template_params(); + // FIXME this does not automatically transpose vectors if necessary + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); } - /** \copydoc MatrixBase::operator=(const EigenBase&) + public: + + /** \brief Copies the generic expression \a other into *this. + * \copydetails DenseBase::operator=(const EigenBase &other) */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const EigenBase &other) { _resize_to_match(other); @@ -484,16 +572,6 @@ class PlainObjectBase : public internal::dense_xpr_base::type return this->derived(); } - /** \sa MatrixBase::operator=(const EigenBase&) */ - template - EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase &other) - : m_storage(Index(other.derived().rows()) * Index(other.derived().cols()), other.derived().rows(), other.derived().cols()) - { - _check_template_params(); - internal::check_rows_cols_for_overflow::run(other.derived().rows(), other.derived().cols()); - Base::operator=(other.derived()); - } - /** \name Map * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects, * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned @@ -568,16 +646,16 @@ class PlainObjectBase : public internal::dense_xpr_base::type //@} using Base::setConstant; - Derived& setConstant(Index size, const Scalar& value); - Derived& setConstant(Index rows, Index cols, const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val); + EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val); using Base::setZero; - Derived& setZero(Index size); - Derived& setZero(Index rows, Index cols); + EIGEN_DEVICE_FUNC Derived& setZero(Index size); + EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols); using Base::setOnes; - Derived& setOnes(Index size); - Derived& setOnes(Index rows, Index cols); + EIGEN_DEVICE_FUNC Derived& setOnes(Index size); + EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols); using Base::setRandom; Derived& setRandom(Index size); @@ -596,6 +674,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type * remain row-vectors and vectors remain vectors. */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase& other) { #ifdef EIGEN_NO_AUTOMATIC_RESIZING @@ -603,8 +682,6 @@ class PlainObjectBase : public internal::dense_xpr_base::type : (rows() == other.rows() && cols() == other.cols()))) && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); EIGEN_ONLY_USED_FOR_DEBUG(other); - if(this->size()==0) - resizeLike(other); #else resizeLike(other); #endif @@ -624,25 +701,23 @@ class PlainObjectBase : public internal::dense_xpr_base::type * * \internal */ + // aliasing is dealt once in internall::call_assignment + // so at this stage we have to assume aliasing... and resising has to be done later. template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set(const DenseBase& other) { - _set_selector(other.derived(), typename internal::conditional(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type()); + internal::call_assignment(this->derived(), other.derived()); return this->derived(); } - template - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); } - - template - EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); } - /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which * is the case when creating a new matrix) so one can enforce lazy evaluation. * * \sa operator=(const MatrixBase&), _set() */ template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase& other) { // I don't think we need this resize call since the lazyAssign will anyways resize @@ -650,40 +725,175 @@ class PlainObjectBase : public internal::dense_xpr_base::type //_resize_to_match(other); // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because // it wouldn't allow to copy a row-vector into a column-vector. - return internal::assign_selector::run(this->derived(), other.derived()); + internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op()); + return this->derived(); } template - EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if::type* = 0) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, typename internal::enable_if::type* = 0) { EIGEN_STATIC_ASSERT(bool(NumTraits::IsInteger) && bool(NumTraits::IsInteger), FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) - resize(nbRows,nbCols); + resize(rows,cols); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, typename internal::enable_if::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); } + template - EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if::type* = 0) + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init2(const Index& val0, const Index& val1, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==2,T1>::type* = 0) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } + + // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array, + // then the argument is meant to be the size of the object. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(Index size, typename internal::enable_if< (Base::SizeAtCompileTime!=1 || !internal::is_convertible::value) + && ((!internal::is_same::XprKind,ArrayXpr>::value || Base::SizeAtCompileTime==Dynamic)),T>::type* = 0) + { + // NOTE MSVC 2008 complains if we directly put bool(NumTraits::IsInteger) as the EIGEN_STATIC_ASSERT argument. + const bool is_integer = NumTraits::IsInteger; + EIGEN_UNUSED_VARIABLE(is_integer); + EIGEN_STATIC_ASSERT(is_integer, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + resize(size); + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type can be implicitely converted) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, typename internal::enable_if::value,T>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) m_storage.data()[0] = val0; - m_storage.data()[1] = val1; + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar type match the index type) + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime==1 + && internal::is_convertible::value,T*>::type* = 0) + { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = Scalar(val0); } + // Initialize a fixed size matrix from a pointer to raw data + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar* data){ + this->_set_noalias(ConstMapType(data)); + } + + // Initialize an arbitrary matrix from a dense expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const DenseBase& other){ + this->_set_noalias(other); + } + + // Initialize an arbitrary matrix from an object convertible to the Derived type. + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Derived& other){ + this->_set_noalias(other); + } + + // Initialize an arbitrary matrix from a generic Eigen expression + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const EigenBase& other){ + this->derived() = other; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const ReturnByValue& other) + { + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const RotationBase& r) + { + this->derived() = r; + } + + // For fixed-size Array + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Scalar& val0, + typename internal::enable_if< Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T>::type* = 0) + { + Base::setConstant(val0); + } + + // For fixed-size Array + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void _init1(const Index& val0, + typename internal::enable_if< (!internal::is_same::value) + && (internal::is_same::value) + && Base::SizeAtCompileTime!=Dynamic + && Base::SizeAtCompileTime!=1 + && internal::is_convertible::value + && internal::is_same::XprKind,ArrayXpr>::value,T*>::type* = 0) + { + Base::setConstant(val0); + } + template friend struct internal::matrix_swap_impl; - /** \internal generic implementation of swap for dense storage since for dynamic-sized matrices of same type it is enough to swap the - * data pointers. + public: + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal + * \brief Override DenseBase::swap() since for dynamic-sized matrices + * of same type it is enough to swap the data pointers. */ template - void _swap(DenseBase const & other) + EIGEN_DEVICE_FUNC + void swap(DenseBase & other) { enum { SwapPointers = internal::is_same::value && Base::SizeAtCompileTime==Dynamic }; - internal::matrix_swap_impl::run(this->derived(), other.const_cast_derived()); + internal::matrix_swap_impl::run(this->derived(), other.derived()); } - - public: -#ifndef EIGEN_PARSED_BY_DOXYGEN + + /** \internal + * \brief const version forwarded to DenseBase::swap + */ + template + EIGEN_DEVICE_FUNC + void swap(DenseBase const & other) + { Base::swap(other.derived()); } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void _check_template_params() { EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor) @@ -697,10 +907,9 @@ class PlainObjectBase : public internal::dense_xpr_base::type && (Options & (DontAlign|RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS) } -#endif -private: - enum { ThisConstantIsPrivateInPlainObjectBase }; + enum { IsPlainObjectBase = 1 }; +#endif }; namespace internal { @@ -708,7 +917,6 @@ namespace internal { template struct conservative_resize_like_impl { - typedef typename Derived::Index Index; static void run(DenseBase& _this, Index rows, Index cols) { if (_this.rows() == rows && _this.cols() == cols) return; @@ -724,8 +932,8 @@ struct conservative_resize_like_impl { // The storage order does not allow us to use reallocation. typename Derived::PlainObject tmp(rows,cols); - const Index common_rows = (std::min)(rows, _this.rows()); - const Index common_cols = (std::min)(cols, _this.cols()); + const Index common_rows = numext::mini(rows, _this.rows()); + const Index common_cols = numext::mini(cols, _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); _this.derived().swap(tmp); } @@ -758,8 +966,8 @@ struct conservative_resize_like_impl { // The storage order does not allow us to use reallocation. typename Derived::PlainObject tmp(other); - const Index common_rows = (std::min)(tmp.rows(), _this.rows()); - const Index common_cols = (std::min)(tmp.cols(), _this.cols()); + const Index common_rows = numext::mini(tmp.rows(), _this.rows()); + const Index common_cols = numext::mini(tmp.cols(), _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); _this.derived().swap(tmp); } @@ -774,7 +982,6 @@ struct conservative_resize_like_impl { using conservative_resize_like_impl::run; - typedef typename Derived::Index Index; static void run(DenseBase& _this, Index size) { const Index new_rows = Derived::RowsAtCompileTime==1 ? 1 : size; @@ -800,6 +1007,7 @@ struct conservative_resize_like_impl template struct matrix_swap_impl { + EIGEN_DEVICE_FUNC static inline void run(MatrixTypeA& a, MatrixTypeB& b) { a.base().swap(b); @@ -809,6 +1017,7 @@ struct matrix_swap_impl template struct matrix_swap_impl { + EIGEN_DEVICE_FUNC static inline void run(MatrixTypeA& a, MatrixTypeB& b) { static_cast(a).m_storage.swap(static_cast(b).m_storage); diff --git a/external/eigen3/Eigen/src/Core/Product.h b/external/eigen3/Eigen/src/Core/Product.h new file mode 100644 index 0000000..ae0c94b --- /dev/null +++ b/external/eigen3/Eigen/src/Core/Product.h @@ -0,0 +1,186 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PRODUCT_H +#define EIGEN_PRODUCT_H + +namespace Eigen { + +template class ProductImpl; + +namespace internal { + +template +struct traits > +{ + typedef typename remove_all::type LhsCleaned; + typedef typename remove_all::type RhsCleaned; + typedef traits LhsTraits; + typedef traits RhsTraits; + + typedef MatrixXpr XprKind; + + typedef typename ScalarBinaryOpTraits::Scalar, typename traits::Scalar>::ReturnType Scalar; + typedef typename product_promote_storage_type::ret>::ret StorageKind; + typedef typename promote_index_type::type StorageIndex; + + enum { + RowsAtCompileTime = LhsTraits::RowsAtCompileTime, + ColsAtCompileTime = RhsTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime, + + // FIXME: only needed by GeneralMatrixMatrixTriangular + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime), + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? RowMajorBit + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : ( ((LhsTraits::Flags&NoPreferredStorageOrderBit) && (RhsTraits::Flags&RowMajorBit)) + || ((RhsTraits::Flags&NoPreferredStorageOrderBit) && (LhsTraits::Flags&RowMajorBit)) ) ? RowMajorBit + : NoPreferredStorageOrderBit + }; +}; + +} // end namespace internal + +/** \class Product + * \ingroup Core_Module + * + * \brief Expression of the product of two arbitrary matrices or vectors + * + * \tparam _Lhs the type of the left-hand side expression + * \tparam _Rhs the type of the right-hand side expression + * + * This class represents an expression of the product of two arbitrary matrices. + * + * The other template parameters are: + * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct + * + */ +template +class Product : public ProductImpl<_Lhs,_Rhs,Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits<_Rhs>::StorageKind, + internal::product_type<_Lhs,_Rhs>::ret>::ret> +{ + public: + + typedef _Lhs Lhs; + typedef _Rhs Rhs; + + typedef typename ProductImpl< + Lhs, Rhs, Option, + typename internal::product_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, + internal::product_type::ret>::ret>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Product) + + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + EIGEN_DEVICE_FUNC Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) + { + eigen_assert(lhs.cols() == rhs.rows() + && "invalid matrix product" + && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } + + EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } + + protected: + + LhsNested m_lhs; + RhsNested m_rhs; +}; + +namespace internal { + +template::ret> +class dense_product_base + : public internal::dense_xpr_base >::type +{}; + +/** Convertion to scalar for inner-products */ +template +class dense_product_base + : public internal::dense_xpr_base >::type +{ + typedef Product ProductXpr; + typedef typename internal::dense_xpr_base::type Base; +public: + using Base::derived; + typedef typename Base::Scalar Scalar; + + operator const Scalar() const + { + return internal::evaluator(derived()).coeff(0,0); + } +}; + +} // namespace internal + +// Generic API dispatcher +template +class ProductImpl : public internal::generic_xpr_base, MatrixXpr, StorageKind>::type +{ + public: + typedef typename internal::generic_xpr_base, MatrixXpr, StorageKind>::type Base; +}; + +template +class ProductImpl + : public internal::dense_product_base +{ + typedef Product Derived; + + public: + + typedef typename internal::dense_product_base Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + protected: + enum { + IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && + (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic), + EnableCoeff = IsOneByOne || Option==LazyProduct + }; + + public: + + EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(row,col); + } + + EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); + + return internal::evaluator(derived()).coeff(i); + } + + +}; + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/external/eigen3/Eigen/src/Core/ProductBase.h b/external/eigen3/Eigen/src/Core/ProductBase.h deleted file mode 100644 index cf74470..0000000 --- a/external/eigen3/Eigen/src/Core/ProductBase.h +++ /dev/null @@ -1,290 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009-2010 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_PRODUCTBASE_H -#define EIGEN_PRODUCTBASE_H - -namespace Eigen { - -/** \class ProductBase - * \ingroup Core_Module - * - */ - -namespace internal { -template -struct traits > -{ - typedef MatrixXpr XprKind; - typedef typename remove_all<_Lhs>::type Lhs; - typedef typename remove_all<_Rhs>::type Rhs; - typedef typename scalar_product_traits::ReturnType Scalar; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; - enum { - RowsAtCompileTime = traits::RowsAtCompileTime, - ColsAtCompileTime = traits::ColsAtCompileTime, - MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, - MaxColsAtCompileTime = traits::MaxColsAtCompileTime, - Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) - | EvalBeforeNestingBit | EvalBeforeAssigningBit | NestByRefBit, - // Note that EvalBeforeNestingBit and NestByRefBit - // are not used in practice because nested is overloaded for products - CoeffReadCost = 0 // FIXME why is it needed ? - }; -}; -} - -#define EIGEN_PRODUCT_PUBLIC_INTERFACE(Derived) \ - typedef ProductBase Base; \ - EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ - typedef typename Base::LhsNested LhsNested; \ - typedef typename Base::_LhsNested _LhsNested; \ - typedef typename Base::LhsBlasTraits LhsBlasTraits; \ - typedef typename Base::ActualLhsType ActualLhsType; \ - typedef typename Base::_ActualLhsType _ActualLhsType; \ - typedef typename Base::RhsNested RhsNested; \ - typedef typename Base::_RhsNested _RhsNested; \ - typedef typename Base::RhsBlasTraits RhsBlasTraits; \ - typedef typename Base::ActualRhsType ActualRhsType; \ - typedef typename Base::_ActualRhsType _ActualRhsType; \ - using Base::m_lhs; \ - using Base::m_rhs; - -template -class ProductBase : public MatrixBase -{ - public: - typedef MatrixBase Base; - EIGEN_DENSE_PUBLIC_INTERFACE(ProductBase) - - typedef typename Lhs::Nested LhsNested; - typedef typename internal::remove_all::type _LhsNested; - typedef internal::blas_traits<_LhsNested> LhsBlasTraits; - typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; - typedef typename internal::remove_all::type _ActualLhsType; - typedef typename internal::traits::Scalar LhsScalar; - - typedef typename Rhs::Nested RhsNested; - typedef typename internal::remove_all::type _RhsNested; - typedef internal::blas_traits<_RhsNested> RhsBlasTraits; - typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - typedef typename internal::remove_all::type _ActualRhsType; - typedef typename internal::traits::Scalar RhsScalar; - - // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once - typedef CoeffBasedProduct FullyLazyCoeffBaseProductType; - - public: - -#ifndef EIGEN_NO_MALLOC - typedef typename Base::PlainObject BasePlainObject; - typedef Matrix DynPlainObject; - typedef typename internal::conditional<(BasePlainObject::SizeAtCompileTime==Dynamic) || (BasePlainObject::SizeAtCompileTime*int(sizeof(Scalar)) < int(EIGEN_STACK_ALLOCATION_LIMIT)), - BasePlainObject, DynPlainObject>::type PlainObject; -#else - typedef typename Base::PlainObject PlainObject; -#endif - - ProductBase(const Lhs& a_lhs, const Rhs& a_rhs) - : m_lhs(a_lhs), m_rhs(a_rhs) - { - eigen_assert(a_lhs.cols() == a_rhs.rows() - && "invalid matrix product" - && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); - } - - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst,Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); } - - const _LhsNested& lhs() const { return m_lhs; } - const _RhsNested& rhs() const { return m_rhs; } - - // Implicit conversion to the nested type (trigger the evaluation of the product) - operator const PlainObject& () const - { - m_result.resize(m_lhs.rows(), m_rhs.cols()); - derived().evalTo(m_result); - return m_result; - } - - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - template - const Diagonal diagonal() const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); } - - const Diagonal diagonal(Index index) const - { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); } - - // restrict coeff accessors to 1x1 expressions. No need to care about mutators here since this isnt a Lvalue expression - typename Base::CoeffReturnType coeff(Index row, Index col) const - { -#ifdef EIGEN2_SUPPORT - return lhs().row(row).cwiseProduct(rhs().col(col).transpose()).sum(); -#else - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(row,col); -#endif - } - - typename Base::CoeffReturnType coeff(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - Matrix result = *this; - return result.coeff(i); - } - - const Scalar& coeffRef(Index row, Index col) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(row,col); - } - - const Scalar& coeffRef(Index i) const - { - EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) - eigen_assert(this->rows() == 1 && this->cols() == 1); - return derived().coeffRef(i); - } - - protected: - - LhsNested m_lhs; - RhsNested m_rhs; - - mutable PlainObject m_result; -}; - -// here we need to overload the nested rule for products -// such that the nested type is a const reference to a plain matrix -namespace internal { -template -struct nested, N, PlainObject> -{ - typedef typename GeneralProduct::PlainObject const& type; -}; -template -struct nested, N, PlainObject> -{ - typedef typename GeneralProduct::PlainObject const& type; -}; -} - -template -class ScaledProduct; - -// Note that these two operator* functions are not defined as member -// functions of ProductBase, because, otherwise we would have to -// define all overloads defined in MatrixBase. Furthermore, Using -// "using Base::operator*" would not work with MSVC. -// -// Also note that here we accept any compatible scalar types -template -const ScaledProduct -operator*(const ProductBase& prod, const typename Derived::Scalar& x) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const ProductBase& prod, const typename Derived::RealScalar& x) -{ return ScaledProduct(prod.derived(), x); } - - -template -const ScaledProduct -operator*(const typename Derived::Scalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -template -typename internal::enable_if::value, - const ScaledProduct >::type -operator*(const typename Derived::RealScalar& x,const ProductBase& prod) -{ return ScaledProduct(prod.derived(), x); } - -namespace internal { -template -struct traits > - : traits, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> > -{ - typedef typename traits::StorageKind StorageKind; -}; -} - -template -class ScaledProduct - : public ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> -{ - public: - typedef ProductBase, - typename NestedProduct::_LhsNested, - typename NestedProduct::_RhsNested> Base; - typedef typename Base::Scalar Scalar; - typedef typename Base::PlainObject PlainObject; -// EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct) - - ScaledProduct(const NestedProduct& prod, const Scalar& x) - : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {} - - template - inline void evalTo(Dest& dst) const { dst.setZero(); scaleAndAddTo(dst, Scalar(1)); } - - template - inline void addTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(1)); } - - template - inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); } - - template - inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); } - - const Scalar& alpha() const { return m_alpha; } - - protected: - const NestedProduct& m_prod; - Scalar m_alpha; -}; - -/** \internal - * Overloaded to perform an efficient C = (A*B).lazy() */ -template -template -Derived& MatrixBase::lazyAssign(const ProductBase& other) -{ - other.derived().evalTo(derived()); - return derived(); -} - -} // end namespace Eigen - -#endif // EIGEN_PRODUCTBASE_H diff --git a/external/eigen3/Eigen/src/Core/ProductEvaluators.h b/external/eigen3/Eigen/src/Core/ProductEvaluators.h new file mode 100644 index 0000000..c42725d --- /dev/null +++ b/external/eigen3/Eigen/src/Core/ProductEvaluators.h @@ -0,0 +1,1105 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2011 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_PRODUCTEVALUATORS_H +#define EIGEN_PRODUCTEVALUATORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * Evaluator of a product expression. + * Since products require special treatments to handle all possible cases, + * we simply deffer the evaluation logic to a product_evaluator class + * which offers more partial specialization possibilities. + * + * \sa class product_evaluator + */ +template +struct evaluator > + : public product_evaluator > +{ + typedef Product XprType; + typedef product_evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" +// TODO we should apply that rule only if that's really helpful +template +struct evaluator_assume_aliasing, + const CwiseNullaryOp, Plain1>, + const Product > > +{ + static const bool value = true; +}; +template +struct evaluator, + const CwiseNullaryOp, Plain1>, + const Product > > + : public evaluator > +{ + typedef CwiseBinaryOp, + const CwiseNullaryOp, Plain1>, + const Product > XprType; + typedef evaluator > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) + {} +}; + + +template +struct evaluator, DiagIndex> > + : public evaluator, DiagIndex> > +{ + typedef Diagonal, DiagIndex> XprType; + typedef evaluator, DiagIndex> > Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : Base(Diagonal, DiagIndex>( + Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), + xpr.index() )) + {} +}; + + +// Helper class to perform a matrix product with the destination at hand. +// Depending on the sizes of the factors, there are different evaluation strategies +// as controlled by internal::product_type. +template< typename Lhs, typename Rhs, + typename LhsShape = typename evaluator_traits::Shape, + typename RhsShape = typename evaluator_traits::Shape, + int ProductType = internal::product_type::value> +struct generic_product_impl; + +template +struct evaluator_assume_aliasing > { + static const bool value = true; +}; + +// This is the default evaluator implementation for products: +// It creates a temporary and call generic_product_impl +template +struct product_evaluator, ProductTag, LhsShape, RhsShape> + : public evaluator::PlainObject> +{ + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + +// FIXME shall we handle nested_eval here?, +// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.) +// typedef typename internal::nested_eval::type LhsNested; +// typedef typename internal::nested_eval::type RhsNested; +// typedef typename internal::remove_all::type LhsNestedCleaned; +// typedef typename internal::remove_all::type RhsNestedCleaned; +// +// const LhsNested lhs(xpr.lhs()); +// const RhsNested rhs(xpr.rhs()); +// +// generic_product_impl::evalTo(m_result, lhs, rhs); + + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + +protected: + PlainObject m_result; +}; + +// The following three shortcuts are enabled only if the scalar types match excatly. +// TODO: we could enable them for different scalar types when the product is not vectorized. + +// Dense = Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> +{ + typedef Product SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + // FIXME shall we handle nested_eval here? + generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense += Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::add_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> +{ + typedef Product SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + // FIXME shall we handle nested_eval here? + generic_product_impl::addTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense -= Product +template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar> +struct Assignment, internal::sub_assign_op, Dense2Dense, + typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type> +{ + typedef Product SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) + { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + // FIXME shall we handle nested_eval here? + generic_product_impl::subTo(dst, src.lhs(), src.rhs()); + } +}; + + +// Dense ?= scalar * Product +// TODO we should apply that rule if that's really helpful +// for instance, this is not good for inner products +template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain> +struct Assignment, const CwiseNullaryOp,Plain>, + const Product >, AssignFunc, Dense2Dense> +{ + typedef CwiseBinaryOp, + const CwiseNullaryOp,Plain>, + const Product > SrcXprType; + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func) + { + call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func); + } +}; + +//---------------------------------------- +// Catch "Dense ?= xpr + Product<>" expression to save one temporary +// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct + +template +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, + const Product >, DenseShape > { + static const bool value = true; +}; + +template +struct evaluator_assume_aliasing::Scalar>, const OtherXpr, + const Product >, DenseShape > { + static const bool value = true; +}; + +template +struct assignment_from_xpr_op_product +{ + template + static EIGEN_STRONG_INLINE + void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/) + { + call_assignment_no_alias(dst, src.lhs(), Func1()); + call_assignment_no_alias(dst, src.rhs(), Func2()); + } +}; + +#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \ + template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \ + struct Assignment, const OtherXpr, \ + const Product >, internal::ASSIGN_OP, Dense2Dense> \ + : assignment_from_xpr_op_product, internal::ASSIGN_OP, internal::ASSIGN_OP2 > \ + {} + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op); + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op); + +//---------------------------------------- + +template +struct generic_product_impl +{ + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); + } + + template + static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } +}; + + +/*********************************************************************** +* Implementation of outer dense * dense vector product +***********************************************************************/ + +// Column major result +template +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&) +{ + evaluator rhsEval(rhs); + typename nested_eval::type actual_lhs(lhs); + // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + const Index cols = dst.cols(); + for (Index j=0; j +void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&) +{ + evaluator lhsEval(lhs); + typename nested_eval::type actual_rhs(rhs); + // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + const Index rows = dst.rows(); + for (Index i=0; i +struct generic_product_impl +{ + template struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {}; + typedef typename Product::Scalar Scalar; + + // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose + struct set { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } }; + struct add { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } }; + struct sub { template void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } }; + struct adds { + Scalar m_scale; + explicit adds(const Scalar& s) : m_scale(s) {} + template void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += m_scale * src; + } + }; + + template + static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); + } + + template + static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); + } + + template + static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); + } + + template + static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); + } + +}; + + +// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo +template +struct generic_product_impl_base +{ + typedef typename Product::Scalar Scalar; + + template + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); } + + template + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); } + + template + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } + + template + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); } + +}; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; + typedef typename Product::Scalar Scalar; + enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; + typedef typename internal::remove_all::type>::type MatrixType; + + template + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + LhsNested actual_lhs(lhs); + RhsNested actual_rhs(rhs); + internal::gemv_dense_selector::HasUsableDirectAccess) + >::run(actual_lhs, actual_rhs, dst, alpha); + } +}; + +template +struct generic_product_impl +{ + typedef typename Product::Scalar Scalar; + + template + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // Same as: dst.noalias() = lhs.lazyProduct(rhs); + // but easier on the compiler side + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + } + + template + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() += lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + } + + template + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + { + // dst.noalias() -= lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + } + +// template +// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) +// { dst.noalias() += alpha * lhs.lazyProduct(rhs); } +}; + +// This specialization enforces the use of a coefficient-based evaluation strategy +template +struct generic_product_impl + : generic_product_impl {}; + +// Case 2: Evaluate coeff by coeff +// +// This is mostly taken from CoeffBasedProduct.h +// The main difference is that we add an extra argument to the etor_product_*_impl::run() function +// for the inner dimension of the product, because evaluator object do not know their size. + +template +struct etor_product_coeff_impl; + +template +struct etor_product_packet_impl; + +template +struct product_evaluator, ProductTag, DenseShape, DenseShape> + : evaluator_base > +{ + typedef Product XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + explicit product_evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! + m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed, + // or perhaps declare them on the fly on the packet method... We have experiment to check what's best. + m_innerDim(xpr.lhs().cols()) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); +#if 0 + std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n"; + std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n"; + std::cerr << "LhsAlignment= " << LhsAlignment << "\n"; + std::cerr << "RhsAlignment= " << RhsAlignment << "\n"; + std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n"; + std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n"; + std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n"; + std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n"; + std::cerr << "Alignment= " << Alignment << "\n"; + std::cerr << "Flags= " << Flags << "\n"; +#endif + } + + // Everything below here is taken from CoeffBasedProduct.h + + typedef typename internal::nested_eval::type LhsNested; + typedef typename internal::nested_eval::type RhsNested; + + typedef typename internal::remove_all::type LhsNestedCleaned; + typedef typename internal::remove_all::type RhsNestedCleaned; + + typedef evaluator LhsEtorType; + typedef evaluator RhsEtorType; + + enum { + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime + }; + + typedef typename find_best_packet::type LhsVecPacketType; + typedef typename find_best_packet::type RhsVecPacketType; + + enum { + + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, + RhsCoeffReadCost = RhsEtorType::CoeffReadCost, + CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost + : InnerSize == Dynamic ? HugeCost + : InnerSize * (NumTraits::MulCost + LhsCoeffReadCost + RhsCoeffReadCost) + + (InnerSize - 1) * NumTraits::AddCost, + + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + LhsVecPacketSize = unpacket_traits::size, + RhsVecPacketSize = unpacket_traits::size, + + // Here, we don't care about alignment larger than the usable packet size. + LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))), + RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))), + + SameType = is_same::value, + + CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (bool(RhsRowMajor) && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) + | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), + + LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), + RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), + + Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) + : 0, + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (InnerSize % packet_traits::size == 0) + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const + { + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + * TODO: this seems possible when the result is a vector + */ + EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const + { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; + return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum(); + } + + template + const PacketType packet(Index row, Index col) const + { + PacketType res; + typedef etor_product_packet_impl PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + template + const PacketType packet(Index index) const + { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0; + return packet(row,col); + } + +protected: + typename internal::add_const_on_value_type::type m_lhs; + typename internal::add_const_on_value_type::type m_rhs; + + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; + + // TODO: Get rid of m_innerDim if known at compile time + Index m_innerDim; +}; + +template +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape> + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape> +{ + typedef Product XprType; + typedef Product BaseProduct; + typedef product_evaluator Base; + enum { + Flags = Base::Flags | EvalBeforeNestingBit + }; + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(BaseProduct(xpr.lhs(),xpr.rhs())) + {} +}; + +/**************************************** +*** Coeff based product, Packet path *** +****************************************/ + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(pset1(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet(Index(UnrollingIndex-1), col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res) + { + etor_product_packet_impl::run(row, col, lhs, rhs, innerDim, res); + res = pmadd(lhs.template packet(row, Index(UnrollingIndex-1)), pset1(rhs.coeff(Index(UnrollingIndex-1), col)), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(pset1(lhs.coeff(row, Index(0))),rhs.template packet(Index(0), col)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res) + { + res = pmul(lhs.template packet(row, Index(0)), pset1(rhs.coeff(Index(0), col))); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(typename unpacket_traits::type(0)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res) + { + res = pset1(typename unpacket_traits::type(0)); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(typename unpacket_traits::type(0)); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } +}; + +template +struct etor_product_packet_impl +{ + static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res) + { + res = pset1(typename unpacket_traits::type(0)); + for(Index i = 0; i < innerDim; ++i) + res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); + } +}; + + +/*************************************************************************** +* Triangular products +***************************************************************************/ +template +struct triangular_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl + ::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + triangular_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* SelfAdjoint products +***************************************************************************/ +template +struct selfadjoint_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl +: generic_product_impl_base > +{ + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + { + selfadjoint_product_impl::run(dst, lhs, rhs.nestedExpression(), alpha); + } +}; + + +/*************************************************************************** +* Diagonal products +***************************************************************************/ + +template +struct diagonal_product_evaluator_base + : evaluator_base +{ + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; +public: + enum { + CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, + + MatrixFlags = evaluator::Flags, + DiagFlags = evaluator::Flags, + _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), + Alignment = evaluator::Alignment + }; + + diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) + : m_diagImpl(diag), m_matImpl(mat) + { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const + { + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + } + +protected: + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const + { + return internal::pmul(m_matImpl.template packet(row, col), + internal::pset1(m_diagImpl.coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const + { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + }; + return internal::pmul(m_matImpl.template packet(row, col), + m_diagImpl.template packet(id)); + } + + evaluator m_diagImpl; + evaluator m_matImpl; +}; + +// diagonal * dense +template +struct product_evaluator, ProductTag, DiagonalShape, DenseShape> + : diagonal_product_evaluator_base, OnTheLeft> +{ + typedef diagonal_product_evaluator_base, OnTheLeft> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { + StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor + }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.rhs(), xpr.lhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. + // See also similar calls below. + return this->template packet_impl(row,col, row, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +// dense * diagonal +template +struct product_evaluator, ProductTag, DenseShape, DiagonalShape> + : diagonal_product_evaluator_base, OnTheRight> +{ + typedef diagonal_product_evaluator_base, OnTheRight> Base; + using Base::m_diagImpl; + using Base::m_matImpl; + using Base::coeff; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor }; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) + : Base(xpr.lhs(), xpr.rhs().diagonal()) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const + { + return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); + } + +#ifndef __CUDACC__ + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const + { + return this->template packet_impl(row,col, col, + typename internal::conditional::type()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const + { + return packet(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx); + } +#endif +}; + +/*************************************************************************** +* Products with permutation matrices +***************************************************************************/ + +/** \internal + * \class permutation_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h + */ +template +struct permutation_matrix_product; + +template +struct permutation_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr) + { + MatrixType mat(xpr); + const Index n = Side==OnTheLeft ? mat.rows() : mat.cols(); + // FIXME we need an is_same for expression that is not sensitive to constness. For instance + // is_same_xpr, Block >::value should be true. + //if(is_same::value && extract_data(dst) == extract_data(mat)) + if(is_same_dense(dst, mat)) + { + // apply the permutation inplace + Matrix mask(perm.size()); + mask.fill(false); + Index r = 0; + while(r < perm.size()) + { + // search for the next seed + while(r=perm.size()) + break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + Index kPrev = k0; + mask.coeffRef(k0) = true; + for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k)) + { + Block(dst, k) + .swap(Block + (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev)); + + mask.coeffRef(k) = true; + kPrev = k; + } + } + } + else + { + for(Index i = 0; i < n; ++i) + { + Block + (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i) + + = + + Block + (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i); + } + } + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, rhs, lhs); + } +}; + +template +struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) + { + permutation_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) + { + permutation_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + + +/*************************************************************************** +* Products with transpositions matrices +***************************************************************************/ + +// FIXME could we unify Transpositions and Permutation into a single "shape"?? + +/** \internal + * \class transposition_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + */ +template +struct transposition_matrix_product +{ + typedef typename nested_eval::type MatrixType; + typedef typename remove_all::type MatrixTypeCleaned; + + template + static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) + { + MatrixType mat(xpr); + typedef typename TranspositionType::StorageIndex StorageIndex; + const Index size = tr.size(); + StorageIndex j = 0; + + if(!is_same_dense(dst,mat)) + dst = mat; + + for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, rhs, lhs); + } +}; + + +template +struct generic_product_impl, Rhs, TranspositionsShape, MatrixShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) + { + transposition_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, TranspositionsShape, ProductTag> +{ + template + static void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) + { + transposition_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_EVALUATORS_H diff --git a/external/eigen3/Eigen/src/Core/Random.h b/external/eigen3/Eigen/src/Core/Random.h index 480fea4..6faf789 100644 --- a/external/eigen3/Eigen/src/Core/Random.h +++ b/external/eigen3/Eigen/src/Core/Random.h @@ -16,8 +16,7 @@ namespace internal { template struct scalar_random_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_random_op) - template - inline const Scalar operator() (Index, Index = 0) const { return random(); } + inline const Scalar operator() () const { return random(); } }; template @@ -28,12 +27,18 @@ struct functor_traits > /** \returns a random matrix expression * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * * The parameters \a rows and \a cols are the number of rows and of columns of * the returned matrix. Must be compatible with this MatrixBase type. * + * \not_reentrant + * * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used * instead. + * * * Example: \include MatrixBase_random_int_int.cpp * Output: \verbinclude MatrixBase_random_int_int.out @@ -41,22 +46,28 @@ struct functor_traits > * This expression has the "evaluate before nesting" flag so that it will be evaluated into * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. + * + * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators. * - * \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random() + * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random() */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index rows, Index cols) { return NullaryExpr(rows, cols, internal::scalar_random_op()); } /** \returns a random vector expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. * * The parameter \a size is the size of the returned vector. * Must be compatible with this MatrixBase type. * * \only_for_vectors + * \not_reentrant * * This variant is meant to be used for dynamic-size vector types. For fixed-size types, * it is redundant to pass \a size as argument, so Random() should be used @@ -69,10 +80,10 @@ DenseBase::Random(Index rows, Index cols) * a temporary vector whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. * - * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random() + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random() */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index size) { return NullaryExpr(size, internal::scalar_random_op()); @@ -80,6 +91,9 @@ DenseBase::Random(Index size) /** \returns a fixed-size random matrix or vector expression * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you * need to use the variants taking size arguments. * @@ -89,11 +103,13 @@ DenseBase::Random(Index size) * This expression has the "evaluate before nesting" flag so that it will be evaluated into * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. + * + * \not_reentrant * - * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index) + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index) */ template -inline const CwiseNullaryOp::Scalar>, Derived> +inline const typename DenseBase::RandomReturnType DenseBase::Random() { return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op()); @@ -101,6 +117,11 @@ DenseBase::Random() /** Sets all coefficients in this expression to random values. * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * * Example: \include MatrixBase_setRandom.cpp * Output: \verbinclude MatrixBase_setRandom.out * @@ -114,12 +135,16 @@ inline Derived& DenseBase::setRandom() /** Resizes to the given \a newSize, and sets all coefficients in this expression to random values. * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * * \only_for_vectors + * \not_reentrant * * Example: \include Matrix_setRandom_int.cpp * Output: \verbinclude Matrix_setRandom_int.out * - * \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random() + * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random() */ template EIGEN_STRONG_INLINE Derived& @@ -131,19 +156,24 @@ PlainObjectBase::setRandom(Index newSize) /** Resizes to the given size, and sets all coefficients in this expression to random values. * - * \param nbRows the new number of rows - * \param nbCols the new number of columns + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * \param rows the new number of rows + * \param cols the new number of columns * * Example: \include Matrix_setRandom_int_int.cpp * Output: \verbinclude Matrix_setRandom_int_int.out * - * \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random() + * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random() */ template EIGEN_STRONG_INLINE Derived& -PlainObjectBase::setRandom(Index nbRows, Index nbCols) +PlainObjectBase::setRandom(Index rows, Index cols) { - resize(nbRows, nbCols); + resize(rows, cols); return setRandom(); } diff --git a/external/eigen3/Eigen/src/Core/Redux.h b/external/eigen3/Eigen/src/Core/Redux.h index 9b8662a..b6e8f88 100644 --- a/external/eigen3/Eigen/src/Core/Redux.h +++ b/external/eigen3/Eigen/src/Core/Redux.h @@ -27,8 +27,9 @@ template struct redux_traits { public: + typedef typename find_best_packet::type PacketType; enum { - PacketSize = packet_traits::size, + PacketSize = unpacket_traits::size, InnerMaxSize = int(Derived::IsRowMajor) ? Derived::MaxColsAtCompileTime : Derived::MaxRowsAtCompileTime @@ -37,8 +38,8 @@ public: enum { MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) && (functor_traits::PacketAccess), - MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), - MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize + MayLinearVectorize = bool(MightVectorize) && (int(Derived::Flags)&LinearAccessBit), + MaySliceVectorize = bool(MightVectorize) && int(InnerMaxSize)>=3*PacketSize }; public: @@ -50,21 +51,34 @@ public: public: enum { - Cost = ( Derived::SizeAtCompileTime == Dynamic - || Derived::CoeffReadCost == Dynamic - || (Derived::SizeAtCompileTime!=1 && functor_traits::Cost == Dynamic) - ) ? Dynamic - : Derived::SizeAtCompileTime * Derived::CoeffReadCost - + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, + Cost = Derived::SizeAtCompileTime == Dynamic ? HugeCost + : Derived::SizeAtCompileTime * Derived::CoeffReadCost + (Derived::SizeAtCompileTime-1) * functor_traits::Cost, UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) }; public: enum { - Unrolling = Cost != Dynamic && Cost <= UnrollingLimit - ? CompleteUnrolling - : NoUnrolling + Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + std::cerr << "Xpr: " << typeid(typename Derived::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(Derived::Flags) + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(Unrolling) + std::cerr << std::endl; + } +#endif }; /*************************************************************************** @@ -82,6 +96,7 @@ struct redux_novec_unroller typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { return func(redux_novec_unroller::run(mat,func), @@ -99,6 +114,7 @@ struct redux_novec_unroller typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) { return mat.coeffByOuterInner(outer, inner); @@ -112,6 +128,7 @@ template struct redux_novec_unroller { typedef typename Derived::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } }; @@ -121,12 +138,12 @@ template struct redux_vec_unroller { enum { - PacketSize = packet_traits::size, + PacketSize = redux_traits::PacketSize, HalfLength = Length/2 }; typedef typename Derived::Scalar Scalar; - typedef typename packet_traits::type PacketScalar; + typedef typename redux_traits::PacketType PacketScalar; static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) { @@ -140,18 +157,18 @@ template struct redux_vec_unroller { enum { - index = Start * packet_traits::size, + index = Start * redux_traits::PacketSize, outer = index / int(Derived::InnerSizeAtCompileTime), inner = index % int(Derived::InnerSizeAtCompileTime), - alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned + alignment = Derived::Alignment }; typedef typename Derived::Scalar Scalar; - typedef typename packet_traits::type PacketScalar; + typedef typename redux_traits::PacketType PacketScalar; static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) { - return mat.template packetByOuterInner(outer, inner); + return mat.template packetByOuterInner(outer, inner); } }; @@ -169,8 +186,8 @@ template struct redux_impl { typedef typename Derived::Scalar Scalar; - typedef typename Derived::Index Index; - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); Scalar res; @@ -193,19 +210,19 @@ template struct redux_impl { typedef typename Derived::Scalar Scalar; - typedef typename packet_traits::type PacketScalar; - typedef typename Derived::Index Index; + typedef typename redux_traits::PacketType PacketScalar; - static Scalar run(const Derived& mat, const Func& func) + static Scalar run(const Derived &mat, const Func& func) { const Index size = mat.size(); - eigen_assert(size && "you are using an empty matrix"); - const Index packetSize = packet_traits::size; - const Index alignedStart = internal::first_aligned(mat); + + const Index packetSize = redux_traits::PacketSize; + const int packetAlignment = unpacket_traits::alignment; enum { - alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit) - ? Aligned : Unaligned + alignment0 = (bool(Derived::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) ? int(packetAlignment) : int(Unaligned), + alignment = EIGEN_PLAIN_ENUM_MAX(alignment0, Derived::Alignment) }; + const Index alignedStart = internal::first_default_aligned(mat.nestedExpression()); const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); const Index alignedEnd2 = alignedStart + alignedSize2; @@ -213,19 +230,19 @@ struct redux_impl Scalar res; if(alignedSize) { - PacketScalar packet_res0 = mat.template packet(alignedStart); + PacketScalar packet_res0 = mat.template packet(alignedStart); if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop { - PacketScalar packet_res1 = mat.template packet(alignedStart+packetSize); + PacketScalar packet_res1 = mat.template packet(alignedStart+packetSize); for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) { - packet_res0 = func.packetOp(packet_res0, mat.template packet(index)); - packet_res1 = func.packetOp(packet_res1, mat.template packet(index+packetSize)); + packet_res0 = func.packetOp(packet_res0, mat.template packet(index)); + packet_res1 = func.packetOp(packet_res1, mat.template packet(index+packetSize)); } packet_res0 = func.packetOp(packet_res0,packet_res1); if(alignedEnd>alignedEnd2) - packet_res0 = func.packetOp(packet_res0, mat.template packet(alignedEnd2)); + packet_res0 = func.packetOp(packet_res0, mat.template packet(alignedEnd2)); } res = func.predux(packet_res0); @@ -252,25 +269,24 @@ template struct redux_impl { typedef typename Derived::Scalar Scalar; - typedef typename packet_traits::type PacketScalar; - typedef typename Derived::Index Index; + typedef typename redux_traits::PacketType PacketType; - static Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); const Index innerSize = mat.innerSize(); const Index outerSize = mat.outerSize(); enum { - packetSize = packet_traits::size + packetSize = redux_traits::PacketSize }; const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; Scalar res; if(packetedInnerSize) { - PacketScalar packet_res = mat.template packet(0,0); + PacketType packet_res = mat.template packet(0,0); for(Index j=0; j(j,i)); + packet_res = func.packetOp(packet_res, mat.template packetByOuterInner(j,i)); res = func.predux(packet_res); for(Index j=0; j struct redux_impl { typedef typename Derived::Scalar Scalar; - typedef typename packet_traits::type PacketScalar; + + typedef typename redux_traits::PacketType PacketScalar; enum { - PacketSize = packet_traits::size, + PacketSize = redux_traits::PacketSize, Size = Derived::SizeAtCompileTime, VectorizedSize = (Size / PacketSize) * PacketSize }; - static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) { eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); - Scalar res = func.predux(redux_vec_unroller::run(mat,func)); - if (VectorizedSize != Size) - res = func(res,redux_novec_unroller::run(mat,func)); - return res; + if (VectorizedSize > 0) { + Scalar res = func.predux(redux_vec_unroller::run(mat,func)); + if (VectorizedSize != Size) + res = func(res,redux_novec_unroller::run(mat,func)); + return res; + } + else { + return redux_novec_unroller::run(mat,func); + } } }; +// evaluator adaptor +template +class redux_evaluator +{ +public: + typedef _XprType XprType; + EIGEN_DEVICE_FUNC explicit redux_evaluator(const XprType &xpr) : m_evaluator(xpr), m_xpr(xpr) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + typedef typename XprType::PacketReturnType PacketReturnType; + + enum { + MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, + // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime from the evaluator + Flags = evaluator::Flags & ~DirectAccessBit, + IsRowMajor = XprType::IsRowMajor, + SizeAtCompileTime = XprType::SizeAtCompileTime, + InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime, + CoeffReadCost = evaluator::CoeffReadCost, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC Index rows() const { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC Index cols() const { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC Index size() const { return m_xpr.size(); } + EIGEN_DEVICE_FUNC Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC Index outerSize() const { return m_xpr.outerSize(); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index row, Index col) const + { return m_evaluator.coeff(row, col); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeff(Index index) const + { return m_evaluator.coeff(index); } + + template + PacketType packet(Index row, Index col) const + { return m_evaluator.template packet(row, col); } + + template + PacketType packet(Index index) const + { return m_evaluator.template packet(index); } + + EIGEN_DEVICE_FUNC + CoeffReturnType coeffByOuterInner(Index outer, Index inner) const + { return m_evaluator.coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + template + PacketType packetByOuterInner(Index outer, Index inner) const + { return m_evaluator.template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); } + + const XprType & nestedExpression() const { return m_xpr; } + +protected: + internal::evaluator m_evaluator; + const XprType &m_xpr; +}; + } // end namespace internal /*************************************************************************** @@ -317,18 +401,21 @@ struct redux_impl /** \returns the result of a full redux operation on the whole matrix or vector using \a func * * The template parameter \a BinaryOp is the type of the functor \a func which must be - * an associative operator. Both current STL and TR1 functor styles are handled. + * an associative operator. Both current C++98 and C++11 functor styles are handled. * * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() */ template template -EIGEN_STRONG_INLINE typename internal::result_of::Scalar)>::type +typename internal::traits::Scalar DenseBase::redux(const Func& func) const { - typedef typename internal::remove_all::type ThisNested; - return internal::redux_impl - ::run(derived(), func); + eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); + + typedef typename internal::redux_evaluator ThisEvaluator; + ThisEvaluator thisEval(derived()); + + return internal::redux_impl::run(thisEval, func); } /** \returns the minimum of all coefficients of \c *this. @@ -338,7 +425,7 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { - return this->redux(Eigen::internal::scalar_min_op()); + return derived().redux(Eigen::internal::scalar_min_op()); } /** \returns the maximum of all coefficients of \c *this. @@ -348,10 +435,12 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { - return this->redux(Eigen::internal::scalar_max_op()); + return derived().redux(Eigen::internal::scalar_max_op()); } -/** \returns the sum of all coefficients of *this +/** \returns the sum of all coefficients of \c *this + * + * If \c *this is empty, then the value 0 is returned. * * \sa trace(), prod(), mean() */ @@ -361,7 +450,7 @@ DenseBase::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(0); - return this->redux(Eigen::internal::scalar_sum_op()); + return derived().redux(Eigen::internal::scalar_sum_op()); } /** \returns the mean of all coefficients of *this @@ -372,7 +461,14 @@ template EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::mean() const { - return Scalar(this->redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif } /** \returns the product of all coefficients of *this @@ -388,7 +484,7 @@ DenseBase::prod() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) return Scalar(1); - return this->redux(Eigen::internal::scalar_product_op()); + return derived().redux(Eigen::internal::scalar_product_op()); } /** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. diff --git a/external/eigen3/Eigen/src/Core/Ref.h b/external/eigen3/Eigen/src/Core/Ref.h index 7a3beca..bdf24f5 100644 --- a/external/eigen3/Eigen/src/Core/Ref.h +++ b/external/eigen3/Eigen/src/Core/Ref.h @@ -12,79 +12,6 @@ namespace Eigen { -template class RefBase; -template,OuterStride<> >::type > class Ref; - -/** \class Ref - * \ingroup Core_Module - * - * \brief A matrix or vector expression mapping an existing expressions - * - * \tparam PlainObjectType the equivalent matrix type of the mapped data - * \tparam Options specifies whether the pointer is \c #Aligned, or \c #Unaligned. - * The default is \c #Unaligned. - * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), - * but accept a variable outer stride (leading dimension). - * This can be overridden by specifying strides. - * The type passed here must be a specialization of the Stride template, see examples below. - * - * This class permits to write non template functions taking Eigen's object as parameters while limiting the number of copies. - * A Ref<> object can represent either a const expression or a l-value: - * \code - * // in-out argument: - * void foo1(Ref x); - * - * // read-only const argument: - * void foo2(const Ref& x); - * \endcode - * - * In the in-out case, the input argument must satisfies the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. - * By default, a Ref can reference any dense vector expression of float having a contiguous memory layout. - * Likewise, a Ref can reference any column major dense matrix expression of float whose column's elements are contiguously stored with - * the possibility to have a constant space inbetween each column, i.e.: the inner stride mmust be equal to 1, but the outer-stride (or leading dimension), - * can be greater than the number of rows. - * - * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. - * Here are some examples: - * \code - * MatrixXf A; - * VectorXf a; - * foo1(a.head()); // OK - * foo1(A.col()); // OK - * foo1(A.row()); // compilation error because here innerstride!=1 - * foo2(A.row()); // The row is copied into a contiguous temporary - * foo2(2*a); // The expression is evaluated into a temporary - * foo2(A.col().segment(2,4)); // No temporary - * \endcode - * - * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameter. - * Here is an example accepting an innerstride!=1: - * \code - * // in-out argument: - * void foo3(Ref > x); - * foo3(A.row()); // OK - * \endcode - * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involved more - * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overloads internally calling a - * template function, e.g.: - * \code - * // in the .h: - * void foo(const Ref& A); - * void foo(const Ref >& A); - * - * // in the .cpp: - * template void foo_impl(const TypeOfA& A) { - * ... // crazy code goes here - * } - * void foo(const Ref& A) { foo_impl(A); } - * void foo(const Ref >& A) { foo_impl(A); } - * \endcode - * - * - * \sa PlainObjectBase::Map(), \ref TopicStorageOrders - */ - namespace internal { template @@ -95,7 +22,8 @@ struct traits > typedef _StrideType StrideType; enum { Options = _Options, - Flags = traits >::Flags | NestByRefBit + Flags = traits >::Flags | NestByRefBit, + Alignment = traits >::Alignment }; template struct match { @@ -107,7 +35,13 @@ struct traits > || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1), OuterStrideMatch = Derived::IsVectorAtCompileTime || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime), - AlignmentMatch = (_Options!=Aligned) || ((PlainObjectType::Flags&AlignedBit)==0) || ((traits::Flags&AlignedBit)==AlignedBit), + // NOTE, this indirection of evaluator::Alignment is needed + // to workaround a very strange bug in MSVC related to the instantiation + // of has_*ary_operator in evaluator. + // This line is surprisingly very sensitive. For instance, simply adding parenthesis + // as "DerivedAlignment = (int(evaluator::Alignment))," will make MSVC fail... + DerivedAlignment = int(evaluator::Alignment), + AlignmentMatch = (int(traits::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment ScalarTypeMatch = internal::is_same::value, MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch }; @@ -132,12 +66,12 @@ public: typedef MapBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; } - inline Index outerStride() const + EIGEN_DEVICE_FUNC inline Index outerStride() const { return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() : IsVectorAtCompileTime ? this->size() @@ -145,7 +79,7 @@ public: : this->rows(); } - RefBase() + EIGEN_DEVICE_FUNC RefBase() : Base(0,RowsAtCompileTime==Dynamic?0:RowsAtCompileTime,ColsAtCompileTime==Dynamic?0:ColsAtCompileTime), // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime, @@ -159,7 +93,7 @@ protected: typedef Stride StrideBase; template - void construct(Expression& expr) + EIGEN_DEVICE_FUNC void construct(Expression& expr) { if(PlainObjectType::RowsAtCompileTime==1) { @@ -184,15 +118,83 @@ protected: StrideBase m_stride; }; - +/** \class Ref + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing expression + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, \c #Aligned16, \c #Aligned8 or \c #Unaligned. + * The default is \c #Unaligned. + * \tparam StrideType optionally specifies strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), + * but accepts a variable outer stride (leading dimension). + * This can be overridden by specifying strides. + * The type passed here must be a specialization of the Stride template, see examples below. + * + * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the number of copies. + * A Ref<> object can represent either a const expression or a l-value: + * \code + * // in-out argument: + * void foo1(Ref x); + * + * // read-only const argument: + * void foo2(const Ref& x); + * \endcode + * + * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation issue will be triggered. + * By default, a Ref can reference any dense vector expression of float having a contiguous memory layout. + * Likewise, a Ref can reference any column-major dense matrix expression of float whose column's elements are contiguously stored with + * the possibility to have a constant space in-between each column, i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension) + * can be greater than the number of rows. + * + * In the const case, if the input expression does not match the above requirement, then it is evaluated into a temporary before being passed to the function. + * Here are some examples: + * \code + * MatrixXf A; + * VectorXf a; + * foo1(a.head()); // OK + * foo1(A.col()); // OK + * foo1(A.row()); // Compilation error because here innerstride!=1 + * foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object + * foo2(A.row().transpose()); // The row is copied into a contiguous temporary + * foo2(2*a); // The expression is evaluated into a temporary + * foo2(A.col().segment(2,4)); // No temporary + * \endcode + * + * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters. + * Here is an example accepting an innerstride!=1: + * \code + * // in-out argument: + * void foo3(Ref > x); + * foo3(A.row()); // OK + * \endcode + * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to exploit vectorization, and will involve more + * expensive address computations even if the input is contiguously stored in memory. To overcome this issue, one might propose to overload internally calling a + * template function, e.g.: + * \code + * // in the .h: + * void foo(const Ref& A); + * void foo(const Ref >& A); + * + * // in the .cpp: + * template void foo_impl(const TypeOfA& A) { + * ... // crazy code goes here + * } + * void foo(const Ref& A) { foo_impl(A); } + * void foo(const Ref >& A) { foo_impl(A); } + * \endcode + * + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ template class Ref : public RefBase > { private: typedef internal::traits Traits; template - inline Ref(const PlainObjectBase& expr, - typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0); + EIGEN_DEVICE_FUNC inline Ref(const PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0); public: typedef RefBase Base; @@ -201,23 +203,24 @@ template class Ref #ifndef EIGEN_PARSED_BY_DOXYGEN template - inline Ref(PlainObjectBase& expr, - typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + EIGEN_DEVICE_FUNC inline Ref(PlainObjectBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) { - EIGEN_STATIC_ASSERT(static_cast(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); Base::construct(expr.derived()); } template - inline Ref(const DenseBase& expr, - typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::MatchAtCompileTime),Derived>::type* = 0) #else + /** Implicit constructor from any dense expression */ template inline Ref(DenseBase& expr) #endif { - EIGEN_STATIC_ASSERT(static_cast(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); - EIGEN_STATIC_ASSERT(static_cast(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); - enum { THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY = Derived::ThisConstantIsPrivateInPlainObjectBase}; + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase,THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); Base::construct(expr.const_cast_derived()); } @@ -236,36 +239,36 @@ template class Ref< EIGEN_DENSE_PUBLIC_INTERFACE(Ref) template - inline Ref(const DenseBase& expr, - typename internal::enable_if::ScalarTypeMatch),Derived>::type* = 0) + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + typename internal::enable_if::ScalarTypeMatch),Derived>::type* = 0) { // std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," << match_helper::InnerStrideMatch << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; // std::cout << int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n"; construct(expr.derived(), typename Traits::template match::type()); } - - inline Ref(const Ref& other) : Base(other) { + + EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy } template - inline Ref(const RefBase& other) { + EIGEN_DEVICE_FUNC inline Ref(const RefBase& other) { construct(other.derived(), typename Traits::template match::type()); } protected: template - void construct(const Expression& expr,internal::true_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr,internal::true_type) { Base::construct(expr); } template - void construct(const Expression& expr, internal::false_type) + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { - m_object.lazyAssign(expr); + internal::call_assignment_no_alias(m_object,expr,internal::assign_op()); Base::construct(m_object); } diff --git a/external/eigen3/Eigen/src/Core/Replicate.h b/external/eigen3/Eigen/src/Core/Replicate.h index ac4537c..9960ef8 100644 --- a/external/eigen3/Eigen/src/Core/Replicate.h +++ b/external/eigen3/Eigen/src/Core/Replicate.h @@ -12,21 +12,6 @@ namespace Eigen { -/** - * \class Replicate - * \ingroup Core_Module - * - * \brief Expression of the multiple replication of a matrix or vector - * - * \param MatrixType the type of the object we are replicating - * - * This class represents an expression of the multiple replication of a matrix or vector. - * It is the return type of DenseBase::replicate() and most of the time - * this is the only way it is used. - * - * \sa DenseBase::replicate() - */ - namespace internal { template struct traits > @@ -35,10 +20,7 @@ struct traits > typedef typename MatrixType::Scalar Scalar; typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; - enum { - Factor = (RowFactor==Dynamic || ColFactor==Dynamic) ? Dynamic : RowFactor*ColFactor - }; - typedef typename nested::type MatrixTypeNested; + typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; enum { RowsAtCompileTime = RowFactor==Dynamic || int(MatrixType::RowsAtCompileTime)==Dynamic @@ -53,12 +35,29 @@ struct traits > IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 : (MatrixType::Flags & RowMajorBit) ? 1 : 0, - Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0), - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 }; }; } +/** + * \class Replicate + * \ingroup Core_Module + * + * \brief Expression of the multiple replication of a matrix or vector + * + * \tparam MatrixType the type of the object we are replicating + * \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic. + * \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic. + * + * This class represents an expression of the multiple replication of a matrix or vector. + * It is the return type of DenseBase::replicate() and most of the time + * this is the only way it is used. + * + * \sa DenseBase::replicate() + */ template class Replicate : public internal::dense_xpr_base< Replicate >::type { @@ -68,10 +67,12 @@ template class Replicate typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) + typedef typename internal::remove_all::type NestedExpression; template - inline explicit Replicate(const OriginalMatrixType& a_matrix) - : m_matrix(a_matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) + EIGEN_DEVICE_FUNC + inline explicit Replicate(const OriginalMatrixType& matrix) + : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) { EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) @@ -79,41 +80,20 @@ template class Replicate } template - inline Replicate(const OriginalMatrixType& a_matrix, Index rowFactor, Index colFactor) - : m_matrix(a_matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) + EIGEN_DEVICE_FUNC + inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) + : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) { EIGEN_STATIC_ASSERT((internal::is_same::type,OriginalMatrixType>::value), THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() * m_rowFactor.value(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() * m_colFactor.value(); } - inline Scalar coeff(Index rowId, Index colId) const - { - // try to avoid using modulo; this is a pure optimization strategy - const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 - : RowFactor==1 ? rowId - : rowId%m_matrix.rows(); - const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 - : ColFactor==1 ? colId - : colId%m_matrix.cols(); - - return m_matrix.coeff(actual_row, actual_col); - } - template - inline PacketScalar packet(Index rowId, Index colId) const - { - const Index actual_row = internal::traits::RowsAtCompileTime==1 ? 0 - : RowFactor==1 ? rowId - : rowId%m_matrix.rows(); - const Index actual_col = internal::traits::ColsAtCompileTime==1 ? 0 - : ColFactor==1 ? colId - : colId%m_matrix.cols(); - - return m_matrix.template packet(actual_row, actual_col); - } - + EIGEN_DEVICE_FUNC const _MatrixTypeNested& nestedExpression() const { return m_matrix; @@ -141,21 +121,6 @@ DenseBase::replicate() const return Replicate(derived()); } -/** - * \return an expression of the replication of \c *this - * - * Example: \include MatrixBase_replicate_int_int.cpp - * Output: \verbinclude MatrixBase_replicate_int_int.out - * - * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate - */ -template -const typename DenseBase::ReplicateReturnType -DenseBase::replicate(Index rowFactor,Index colFactor) const -{ - return Replicate(derived(),rowFactor,colFactor); -} - /** * \return an expression of the replication of each column (or row) of \c *this * diff --git a/external/eigen3/Eigen/src/Core/ReturnByValue.h b/external/eigen3/Eigen/src/Core/ReturnByValue.h index f635598..c44b767 100644 --- a/external/eigen3/Eigen/src/Core/ReturnByValue.h +++ b/external/eigen3/Eigen/src/Core/ReturnByValue.h @@ -13,11 +13,6 @@ namespace Eigen { -/** \class ReturnByValue - * \ingroup Core_Module - * - */ - namespace internal { template @@ -38,17 +33,22 @@ struct traits > * So internal::nested always gives the plain return matrix type. * * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators */ template -struct nested, n, PlainObject> +struct nested_eval, n, PlainObject> { typedef typename traits::ReturnType type; }; } // end namespace internal +/** \class ReturnByValue + * \ingroup Core_Module + * + */ template class ReturnByValue - : internal::no_assignment_operator, public internal::dense_xpr_base< ReturnByValue >::type + : public internal::dense_xpr_base< ReturnByValue >::type, internal::no_assignment_operator { public: typedef typename internal::traits::ReturnType ReturnType; @@ -57,10 +57,11 @@ template class ReturnByValue EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue) template + EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { static_cast(this)->evalTo(dst); } - inline Index rows() const { return static_cast(this)->rows(); } - inline Index cols() const { return static_cast(this)->cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast(this)->rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast(this)->cols(); } #ifndef EIGEN_PARSED_BY_DOXYGEN #define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT @@ -72,8 +73,7 @@ template class ReturnByValue const Unusable& coeff(Index,Index) const { return *reinterpret_cast(this); } Unusable& coeffRef(Index) { return *reinterpret_cast(this); } Unusable& coeffRef(Index,Index) { return *reinterpret_cast(this); } - template Unusable& packet(Index) const; - template Unusable& packet(Index, Index) const; +#undef Unusable #endif }; @@ -85,14 +85,32 @@ Derived& DenseBase::operator=(const ReturnByValue& other) return derived(); } +namespace internal { + +// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that +// when a ReturnByValue expression is assigned, the evaluator is not constructed. +// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world + template -template -Derived& DenseBase::lazyAssign(const ReturnByValue& other) +struct evaluator > + : public evaluator::ReturnType> { - other.evalTo(derived()); - return derived(); -} + typedef ReturnByValue XprType; + typedef typename internal::traits::ReturnType PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) + { + ::new (static_cast(this)) Base(m_result); + xpr.evalTo(m_result); + } + +protected: + PlainObject m_result; +}; +} // end namespace internal } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/Reverse.h b/external/eigen3/Eigen/src/Core/Reverse.h index 041f822..0640cda 100644 --- a/external/eigen3/Eigen/src/Core/Reverse.h +++ b/external/eigen3/Eigen/src/Core/Reverse.h @@ -14,20 +14,6 @@ namespace Eigen { -/** \class Reverse - * \ingroup Core_Module - * - * \brief Expression of the reverse of a vector or matrix - * - * \param MatrixType the type of the object of which we are taking the reverse - * - * This class represents an expression of the reverse of a vector. - * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse() - * and most of the time this is the only way it is used. - * - * \sa MatrixBase::reverse(), VectorwiseOp::reverse() - */ - namespace internal { template @@ -37,36 +23,43 @@ struct traits > typedef typename MatrixType::Scalar Scalar; typedef typename traits::StorageKind StorageKind; typedef typename traits::XprKind XprKind; - typedef typename nested::type MatrixTypeNested; + typedef typename ref_selector::type MatrixTypeNested; typedef typename remove_reference::type _MatrixTypeNested; enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, - - // let's enable LinearAccess only with vectorization because of the product overhead - LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) - ? LinearAccessBit : 0, - - Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess), - - CoeffReadCost = _MatrixTypeNested::CoeffReadCost + Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit) }; }; -template struct reverse_packet_cond +template struct reverse_packet_cond { - static inline PacketScalar run(const PacketScalar& x) { return preverse(x); } + static inline PacketType run(const PacketType& x) { return preverse(x); } }; -template struct reverse_packet_cond +template struct reverse_packet_cond { - static inline PacketScalar run(const PacketScalar& x) { return x; } + static inline PacketType run(const PacketType& x) { return x; } }; } // end namespace internal +/** \class Reverse + * \ingroup Core_Module + * + * \brief Expression of the reverse of a vector or matrix + * + * \tparam MatrixType the type of the object of which we are taking the reverse + * \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections + * + * This class represents an expression of the reverse of a vector. + * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::reverse(), VectorwiseOp::reverse() + */ template class Reverse : public internal::dense_xpr_base< Reverse >::type { @@ -74,26 +67,9 @@ template class Reverse typedef typename internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + typedef typename internal::remove_all::type NestedExpression; using Base::IsRowMajor; - // The following two operators are provided to worarkound - // a MSVC 2013 issue. In theory, we could simply do: - // using Base::operator(); - // to make const version of operator() visible. - // Otheriwse, they would be hidden by the non-const versions defined in this file - - inline CoeffReturnType operator()(Index row, Index col) const - { - eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return coeff(row, col); - } - - inline CoeffReturnType operator()(Index index) const - { - eigen_assert(index >= 0 && index < m_matrix.size()); - return coeff(index); - } - protected: enum { PacketSize = internal::packet_traits::size, @@ -109,82 +85,19 @@ template class Reverse typedef internal::reverse_packet_cond reverse_packet; public: - inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } + EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) { } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); } - inline Index innerStride() const + EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); } - inline Scalar& operator()(Index row, Index col) - { - eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); - return coeffRef(row, col); - } - - inline Scalar& coeffRef(Index row, Index col) - { - return m_matrix.const_cast_derived().coeffRef(ReverseRow ? m_matrix.rows() - row - 1 : row, - ReverseCol ? m_matrix.cols() - col - 1 : col); - } - - inline CoeffReturnType coeff(Index row, Index col) const - { - return m_matrix.coeff(ReverseRow ? m_matrix.rows() - row - 1 : row, - ReverseCol ? m_matrix.cols() - col - 1 : col); - } - - inline CoeffReturnType coeff(Index index) const - { - return m_matrix.coeff(m_matrix.size() - index - 1); - } - - inline Scalar& coeffRef(Index index) - { - return m_matrix.const_cast_derived().coeffRef(m_matrix.size() - index - 1); - } - - inline Scalar& operator()(Index index) - { - eigen_assert(index >= 0 && index < m_matrix.size()); - return coeffRef(index); - } - - template - inline const PacketScalar packet(Index row, Index col) const - { - return reverse_packet::run(m_matrix.template packet( - ReverseRow ? m_matrix.rows() - row - OffsetRow : row, - ReverseCol ? m_matrix.cols() - col - OffsetCol : col)); - } - - template - inline void writePacket(Index row, Index col, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket( - ReverseRow ? m_matrix.rows() - row - OffsetRow : row, - ReverseCol ? m_matrix.cols() - col - OffsetCol : col, - reverse_packet::run(x)); - } - - template - inline const PacketScalar packet(Index index) const - { - return internal::preverse(m_matrix.template packet( m_matrix.size() - index - PacketSize )); - } - - template - inline void writePacket(Index index, const PacketScalar& x) - { - m_matrix.const_cast_derived().template writePacket(m_matrix.size() - index - PacketSize, internal::preverse(x)); - } - - const typename internal::remove_all::type& + EIGEN_DEVICE_FUNC const typename internal::remove_all::type& nestedExpression() const { return m_matrix; @@ -204,33 +117,93 @@ template inline typename DenseBase::ReverseReturnType DenseBase::reverse() { - return derived(); + return ReverseReturnType(derived()); } -/** This is the const version of reverse(). */ -template -inline const typename DenseBase::ConstReverseReturnType -DenseBase::reverse() const -{ - return derived(); -} + +//reverse const overload moved DenseBase.h due to a CUDA compiler bug /** This is the "in place" version of reverse: it reverses \c *this. * * In most cases it is probably better to simply use the reversed expression * of a matrix. However, when reversing the matrix data itself is really needed, * then this "in-place" version is probably the right choice because it provides - * the following additional features: + * the following additional benefits: * - less error prone: doing the same operation with .reverse() requires special care: * \code m = m.reverse().eval(); \endcode - * - this API allows to avoid creating a temporary (the current implementation creates a temporary, but that could be avoided using swap) + * - this API enables reverse operations without the need for a temporary * - it allows future optimizations (cache friendliness, etc.) * - * \sa reverse() */ + * \sa VectorwiseOp::reverseInPlace(), reverse() */ template inline void DenseBase::reverseInPlace() { - derived() = derived().reverse().eval(); + if(cols()>rows()) + { + Index half = cols()/2; + leftCols(half).swap(rightCols(half).reverse()); + if((cols()%2)==1) + { + Index half2 = rows()/2; + col(half).head(half2).swap(col(half).tail(half2).reverse()); + } + } + else + { + Index half = rows()/2; + topRows(half).swap(bottomRows(half).reverse()); + if((rows()%2)==1) + { + Index half2 = cols()/2; + row(half).head(half2).swap(row(half).tail(half2).reverse()); + } + } +} + +namespace internal { + +template +struct vectorwise_reverse_inplace_impl; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.rows()/2; + xpr.topRows(half).swap(xpr.bottomRows(half).colwise().reverse()); + } +}; + +template<> +struct vectorwise_reverse_inplace_impl +{ + template + static void run(ExpressionType &xpr) + { + Index half = xpr.cols()/2; + xpr.leftCols(half).swap(xpr.rightCols(half).rowwise().reverse()); + } +}; + +} // end namespace internal + +/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * + * \sa DenseBase::reverseInPlace(), reverse() */ +template +void VectorwiseOp::reverseInPlace() +{ + internal::vectorwise_reverse_inplace_impl::run(_expression().const_cast_derived()); } } // end namespace Eigen diff --git a/external/eigen3/Eigen/src/Core/Select.h b/external/eigen3/Eigen/src/Core/Select.h index 87993bb..79eec1b 100644 --- a/external/eigen3/Eigen/src/Core/Select.h +++ b/external/eigen3/Eigen/src/Core/Select.h @@ -43,23 +43,21 @@ struct traits > ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, - Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, - CoeffReadCost = traits::type>::CoeffReadCost - + EIGEN_SIZE_MAX(traits::type>::CoeffReadCost, - traits::type>::CoeffReadCost) + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit }; }; } template -class Select : internal::no_assignment_operator, - public internal::dense_xpr_base< Select >::type +class Select : public internal::dense_xpr_base< Select >::type, + internal::no_assignment_operator { public: typedef typename internal::dense_xpr_base" << endl; + cerr << "available actions:" << endl; + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + cerr << " " << (*it)->invokation_name() << endl; + } + cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl; + exit(1); +} + +int main(int argc, char* argv[]) +{ + cout.precision(default_precision); + cerr.precision(default_precision); + + vector> available_actions; + available_actions.emplace_back(new partition_action_t); + available_actions.emplace_back(new evaluate_defaults_action_t); + + vector input_filenames; + + action_t* action = nullptr; + + if (argc < 2) { + show_usage_and_exit(argc, argv, available_actions); + } + for (int i = 1; i < argc; i++) { + bool arg_handled = false; + // Step 1. Try to match action invokation names. + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + if (!strcmp(argv[i], (*it)->invokation_name())) { + if (!action) { + action = it->get(); + arg_handled = true; + break; + } else { + cerr << "can't specify more than one action!" << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + } + if (arg_handled) { + continue; + } + // Step 2. Try to match option names. + if (argv[i][0] == '-') { + if (!strcmp(argv[i], "--only-cubic-sizes")) { + only_cubic_sizes = true; + arg_handled = true; + } + if (!strcmp(argv[i], "--dump-tables")) { + dump_tables = true; + arg_handled = true; + } + if (!arg_handled) { + cerr << "Unrecognized option: " << argv[i] << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + if (arg_handled) { + continue; + } + // Step 3. Default to interpreting args as input filenames. + input_filenames.emplace_back(argv[i]); + } + + if (dump_tables && only_cubic_sizes) { + cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl; + show_usage_and_exit(argc, argv, available_actions); + } + + if (!action) { + show_usage_and_exit(argc, argv, available_actions); + } + + action->run(input_filenames); +} diff --git a/external/eigen3/bench/benchCholesky.cpp b/external/eigen3/bench/benchCholesky.cpp index 42b3e12..9a8e7cf 100644 --- a/external/eigen3/bench/benchCholesky.cpp +++ b/external/eigen3/bench/benchCholesky.cpp @@ -31,7 +31,7 @@ __attribute__ ((noinline)) void benchLLT(const MatrixType& m) int rows = m.rows(); int cols = m.cols(); - int cost = 0; + double cost = 0; for (int j=0; j0; ++i) + for (int i=0; dynsizes[i]>0; ++i) benchLLT(Matrix(dynsizes[i],dynsizes[i])); benchLLT(Matrix()); diff --git a/external/eigen3/bench/bench_gemm.cpp b/external/eigen3/bench/bench_gemm.cpp index 41ca8b3..8528c55 100644 --- a/external/eigen3/bench/bench_gemm.cpp +++ b/external/eigen3/bench/bench_gemm.cpp @@ -2,6 +2,14 @@ // g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2 ./a.out // icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp && OMP_NUM_THREADS=2 ./a.out +// Compilation options: +// +// -DSCALAR=std::complex +// -DSCALARA=double or -DSCALARB=double +// -DHAVE_BLAS +// -DDECOUPLED +// + #include #include #include @@ -14,10 +22,18 @@ using namespace Eigen; #define SCALAR float #endif +#ifndef SCALARA +#define SCALARA SCALAR +#endif + +#ifndef SCALARB +#define SCALARB SCALAR +#endif + typedef SCALAR Scalar; typedef NumTraits::Real RealScalar; -typedef Matrix A; -typedef Matrix B; +typedef Matrix A; +typedef Matrix B; typedef Matrix C; typedef Matrix M; @@ -129,35 +145,69 @@ int main(int argc, char ** argv) int tries = 2; // number of tries, we keep the best int s = 2048; - int cache_size = -1; + int m = s; + int n = s; + int p = s; + int cache_size1=-1, cache_size2=l2, cache_size3 = 0; bool need_help = false; - for (int i=1; i c t p\n"; + std::cout << argv[0] << " -s -c -t -p \n"; + std::cout << " : size\n"; + std::cout << " : rows columns depth\n"; return 1; } - if(cache_size>0) - setCpuCacheSizes(cache_size,96*cache_size); - - int m = s; - int n = s; - int p = s; +#if EIGEN_VERSION_AT_LEAST(3,2,90) + if(cache_size1>0) + setCpuCacheSizes(cache_size1,cache_size2,cache_size3); +#endif + A a(m,p); a.setRandom(); B b(p,n); b.setRandom(); C c(m,n); c.setOnes(); @@ -172,6 +222,7 @@ int main(int argc, char ** argv) // check the parallel product is correct #if defined EIGEN_HAS_OPENMP + Eigen::initParallel(); int procs = omp_get_max_threads(); if(procs>1) { @@ -188,11 +239,20 @@ int main(int argc, char ** argv) #elif defined HAVE_BLAS blas_gemm(a,b,r); c.noalias() += a * b; - if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n"; + if(!r.isApprox(c)) { + std::cout << r - c << "\n"; + std::cerr << "Warning, your product is crap!\n\n"; + } #else - gemm(a,b,c); - r.noalias() += a.cast() * b.cast(); - if(!r.isApprox(c)) std::cerr << "Warning, your product is crap!\n\n"; + if(1.*m*n*p<2000.*2000*2000) + { + gemm(a,b,c); + r.noalias() += a.cast() .lazyProduct( b.cast() ); + if(!r.isApprox(c)) { + std::cout << r - c << "\n"; + std::cerr << "Warning, your product is crap!\n\n"; + } + } #endif #ifdef HAVE_BLAS @@ -214,7 +274,7 @@ int main(int argc, char ** argv) { BenchTimer tmono; omp_set_num_threads(1); - Eigen::internal::setNbThreads(1); + Eigen::setNbThreads(1); c = rc; BENCH(tmono, tries, rep, gemm(a,b,c)); std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n"; @@ -223,6 +283,15 @@ int main(int argc, char ** argv) } #endif + if(1.*m*n*p<30*30*30) + { + BenchTimer tmt; + c = rc; + BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b)); + std::cout << "lazy cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n"; + std::cout << "lazy real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n"; + } + #ifdef DECOUPLED if((NumTraits::IsComplex) && (NumTraits::IsComplex)) { diff --git a/external/eigen3/bench/bench_norm.cpp b/external/eigen3/bench/bench_norm.cpp index 806db29..129afcf 100644 --- a/external/eigen3/bench/bench_norm.cpp +++ b/external/eigen3/bench/bench_norm.cpp @@ -6,19 +6,25 @@ using namespace Eigen; using namespace std; template -EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(const T& v) +EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v) { return v.norm(); } template -EIGEN_DONT_INLINE typename T::Scalar hypotNorm(const T& v) +EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v) +{ + return v.stableNorm(); +} + +template +EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v) { return v.hypotNorm(); } template -EIGEN_DONT_INLINE typename T::Scalar blueNorm(const T& v) +EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v) { return v.blueNorm(); } @@ -32,25 +38,25 @@ EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v) Scalar ssq = 1; for (int i=0;i= ax) { - ssq += internal::abs2(ax/scale); + ssq += numext::abs2(ax/scale); } else { - ssq = Scalar(1) + ssq * internal::abs2(scale/ax); + ssq = Scalar(1) + ssq * numext::abs2(scale/ax); scale = ax; } } - return scale * internal::sqrt(ssq); + return scale * std::sqrt(ssq); } template EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v) { typedef typename T::Scalar Scalar; - Scalar s = v.cwise().abs().maxCoeff(); + Scalar s = v.array().abs().maxCoeff(); return s*(v/s).norm(); } @@ -73,16 +79,20 @@ EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v) v(i) = v(2*i) + v(2*i+1); n = n/2; } - return internal::sqrt(v(0)); + return std::sqrt(v(0)); } +namespace Eigen { +namespace internal { #ifdef EIGEN_VECTORIZE -Packet4f internal::plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); } -Packet2d internal::plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); } +Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); } +Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); } -Packet4f internal::pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); } -Packet2d internal::pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); } +Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); } +Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); } #endif +} +} template EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) @@ -126,7 +136,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) overfl = rbig*s2m; // overfow boundary for abig eps = std::pow(ibeta, 1-it); - relerr = internal::sqrt(eps); // tolerance for neglecting asml + relerr = std::sqrt(eps); // tolerance for neglecting asml abig = 1.0/eps - 1.0; if (Scalar(nbig)>abig) nmax = abig; // largest safe n else nmax = nbig; @@ -134,13 +144,13 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) typedef typename internal::packet_traits::type Packet; const int ps = internal::packet_traits::size; - Packet pasml = internal::pset1(Scalar(0)); - Packet pamed = internal::pset1(Scalar(0)); - Packet pabig = internal::pset1(Scalar(0)); - Packet ps2m = internal::pset1(s2m); - Packet ps1m = internal::pset1(s1m); - Packet pb2 = internal::pset1(b2); - Packet pb1 = internal::pset1(b1); + Packet pasml = internal::pset1(Scalar(0)); + Packet pamed = internal::pset1(Scalar(0)); + Packet pabig = internal::pset1(Scalar(0)); + Packet ps2m = internal::pset1(s2m); + Packet ps1m = internal::pset1(s1m); + Packet pb2 = internal::pset1(b2); + Packet pb1 = internal::pset1(b1); for(int j=0; j(j)); @@ -170,7 +180,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) Scalar amed = internal::predux(pamed); if(abig > Scalar(0)) { - abig = internal::sqrt(abig); + abig = std::sqrt(abig); if(abig > overfl) { eigen_assert(false && "overflow"); @@ -179,7 +189,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) if(amed > Scalar(0)) { abig = abig/s2m; - amed = internal::sqrt(amed); + amed = std::sqrt(amed); } else { @@ -191,55 +201,56 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v) { if (amed > Scalar(0)) { - abig = internal::sqrt(amed); - amed = internal::sqrt(asml) / s1m; + abig = std::sqrt(amed); + amed = std::sqrt(asml) / s1m; } else { - return internal::sqrt(asml)/s1m; + return std::sqrt(asml)/s1m; } } else { - return internal::sqrt(amed); + return std::sqrt(amed); } asml = std::min(abig, amed); abig = std::max(abig, amed); if(asml <= abig*relerr) return abig; else - return abig * internal::sqrt(Scalar(1) + internal::abs2(asml/abig)); + return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig)); #endif } #define BENCH_PERF(NRM) { \ + float af = 0; double ad = 0; std::complex ac = 0; \ Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\ for (int k=0; k()); - double yd = based * internal::abs(internal::random()); + double yf = basef * std::abs(internal::random()); + double yd = based * std::abs(internal::random()); VectorXf vf = VectorXf::Ones(s) * yf; VectorXd vd = VectorXd::Ones(s) * yd; - std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n"; + std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n"; std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n"; std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n"; std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n"; @@ -255,8 +266,8 @@ void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s) VectorXd vd(s); for (int i=0; i()) * std::pow(double(10), internal::random(ef0,ef1)); - vd[i] = internal::abs(internal::random()) * std::pow(double(10), internal::random(ed0,ed1)); + vf[i] = std::abs(internal::random()) * std::pow(double(10), internal::random(ef0,ef1)); + vd[i] = std::abs(internal::random()) * std::pow(double(10), internal::random(ed0,ed1)); } //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n"; @@ -312,34 +323,38 @@ int main(int argc, char** argv) std::cout << "\n"; } + y = 1; std::cout.precision(4); - std::cerr << "Performance (out of cache):\n"; + int s1 = 1024*1024*32; + std::cerr << "Performance (out of cache, " << s1 << "):\n"; { int iters = 1; - VectorXf vf = VectorXf::Random(1024*1024*32) * y; - VectorXd vd = VectorXd::Random(1024*1024*32) * y; - VectorXcf vcf = VectorXcf::Random(1024*1024*32) * y; + VectorXf vf = VectorXf::Random(s1) * y; + VectorXd vd = VectorXd::Random(s1) * y; + VectorXcf vcf = VectorXcf::Random(s1) * y; BENCH_PERF(sqsumNorm); + BENCH_PERF(stableNorm); BENCH_PERF(blueNorm); -// BENCH_PERF(pblueNorm); -// BENCH_PERF(lapackNorm); -// BENCH_PERF(hypotNorm); -// BENCH_PERF(twopassNorm); + BENCH_PERF(pblueNorm); + BENCH_PERF(lapackNorm); + BENCH_PERF(hypotNorm); + BENCH_PERF(twopassNorm); BENCH_PERF(bl2passNorm); } - std::cerr << "\nPerformance (in cache):\n"; + std::cerr << "\nPerformance (in cache, " << 512 << "):\n"; { int iters = 100000; VectorXf vf = VectorXf::Random(512) * y; VectorXd vd = VectorXd::Random(512) * y; VectorXcf vcf = VectorXcf::Random(512) * y; BENCH_PERF(sqsumNorm); + BENCH_PERF(stableNorm); BENCH_PERF(blueNorm); -// BENCH_PERF(pblueNorm); -// BENCH_PERF(lapackNorm); -// BENCH_PERF(hypotNorm); -// BENCH_PERF(twopassNorm); + BENCH_PERF(pblueNorm); + BENCH_PERF(lapackNorm); + BENCH_PERF(hypotNorm); + BENCH_PERF(twopassNorm); BENCH_PERF(bl2passNorm); } } diff --git a/external/eigen3/bench/benchmark-blocking-sizes.cpp b/external/eigen3/bench/benchmark-blocking-sizes.cpp new file mode 100644 index 0000000..827be28 --- /dev/null +++ b/external/eigen3/bench/benchmark-blocking-sizes.cpp @@ -0,0 +1,677 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include +#include +#include +#include +#include +#include + +bool eigen_use_specific_block_size; +int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n; +#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size +#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k +#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m +#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n +#include + +#include + +using namespace Eigen; +using namespace std; + +static BenchTimer timer; + +// how many times we repeat each measurement. +// measurements are randomly shuffled - we're not doing +// all N identical measurements in a row. +const int measurement_repetitions = 3; + +// Timings below this value are too short to be accurate, +// we'll repeat measurements with more iterations until +// we get a timing above that threshold. +const float min_accurate_time = 1e-2f; + +// See --min-working-set-size command line parameter. +size_t min_working_set_size = 0; + +float max_clock_speed = 0.0f; + +// range of sizes that we will benchmark (in all 3 K,M,N dimensions) +const size_t maxsize = 2048; +const size_t minsize = 16; + +typedef MatrixXf MatrixType; +typedef MatrixType::Scalar Scalar; +typedef internal::packet_traits::type Packet; + +static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two"); +static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two"); +static_assert(maxsize > minsize, "maxsize must be larger than minsize"); +static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)"); + +// just a helper to store a triple of K,M,N sizes for matrix product +struct size_triple_t +{ + size_t k, m, n; + size_triple_t() : k(0), m(0), n(0) {} + size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {} + size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {} + size_triple_t(uint16_t compact) + { + k = 1 << ((compact & 0xf00) >> 8); + m = 1 << ((compact & 0x0f0) >> 4); + n = 1 << ((compact & 0x00f) >> 0); + } +}; + +uint8_t log2_pot(size_t x) { + size_t l = 0; + while (x >>= 1) l++; + return l; +} + +// Convert between size tripes and a compact form fitting in 12 bits +// where each size, which must be a POT, is encoded as its log2, on 4 bits +// so the largest representable size is 2^15 == 32k ... big enough. +uint16_t compact_size_triple(size_t k, size_t m, size_t n) +{ + return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n); +} + +uint16_t compact_size_triple(const size_triple_t& t) +{ + return compact_size_triple(t.k, t.m, t.n); +} + +// A single benchmark. Initially only contains benchmark params. +// Then call run(), which stores the result in the gflops field. +struct benchmark_t +{ + uint16_t compact_product_size; + uint16_t compact_block_size; + bool use_default_block_size; + float gflops; + benchmark_t() + : compact_product_size(0) + , compact_block_size(0) + , use_default_block_size(false) + , gflops(0) + { + } + benchmark_t(size_t pk, size_t pm, size_t pn, + size_t bk, size_t bm, size_t bn) + : compact_product_size(compact_size_triple(pk, pm, pn)) + , compact_block_size(compact_size_triple(bk, bm, bn)) + , use_default_block_size(false) + , gflops(0) + {} + benchmark_t(size_t pk, size_t pm, size_t pn) + : compact_product_size(compact_size_triple(pk, pm, pn)) + , compact_block_size(0) + , use_default_block_size(true) + , gflops(0) + {} + + void run(); +}; + +ostream& operator<<(ostream& s, const benchmark_t& b) +{ + s << hex << b.compact_product_size << dec; + if (b.use_default_block_size) { + size_triple_t t(b.compact_product_size); + Index k = t.k, m = t.m, n = t.n; + internal::computeProductBlockingSizes(k, m, n); + s << " default(" << k << ", " << m << ", " << n << ")"; + } else { + s << " " << hex << b.compact_block_size << dec; + } + s << " " << b.gflops; + return s; +} + +// We sort first by increasing benchmark parameters, +// then by decreasing performance. +bool operator<(const benchmark_t& b1, const benchmark_t& b2) +{ + return b1.compact_product_size < b2.compact_product_size || + (b1.compact_product_size == b2.compact_product_size && ( + (b1.compact_block_size < b2.compact_block_size || ( + b1.compact_block_size == b2.compact_block_size && + b1.gflops > b2.gflops)))); +} + +void benchmark_t::run() +{ + size_triple_t productsizes(compact_product_size); + + if (use_default_block_size) { + eigen_use_specific_block_size = false; + } else { + // feed eigen with our custom blocking params + eigen_use_specific_block_size = true; + size_triple_t blocksizes(compact_block_size); + eigen_block_size_k = blocksizes.k; + eigen_block_size_m = blocksizes.m; + eigen_block_size_n = blocksizes.n; + } + + // set up the matrix pool + + const size_t combined_three_matrices_sizes = + sizeof(Scalar) * + (productsizes.k * productsizes.m + + productsizes.k * productsizes.n + + productsizes.m * productsizes.n); + + // 64 M is large enough that nobody has a cache bigger than that, + // while still being small enough that everybody has this much RAM, + // so conveniently we don't need to special-case platforms here. + const size_t unlikely_large_cache_size = 64 << 20; + + const size_t working_set_size = + min_working_set_size ? min_working_set_size : unlikely_large_cache_size; + + const size_t matrix_pool_size = + 1 + working_set_size / combined_three_matrices_sizes; + + MatrixType *lhs = new MatrixType[matrix_pool_size]; + MatrixType *rhs = new MatrixType[matrix_pool_size]; + MatrixType *dst = new MatrixType[matrix_pool_size]; + + for (size_t i = 0; i < matrix_pool_size; i++) { + lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k); + rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n); + dst[i] = MatrixType::Zero(productsizes.m, productsizes.n); + } + + // main benchmark loop + + int iters_at_a_time = 1; + float time_per_iter = 0.0f; + size_t matrix_index = 0; + while (true) { + + double starttime = timer.getCpuTime(); + for (int i = 0; i < iters_at_a_time; i++) { + dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index]; + matrix_index++; + if (matrix_index == matrix_pool_size) { + matrix_index = 0; + } + } + double endtime = timer.getCpuTime(); + + const float timing = float(endtime - starttime); + + if (timing >= min_accurate_time) { + time_per_iter = timing / iters_at_a_time; + break; + } + + iters_at_a_time *= 2; + } + + delete[] lhs; + delete[] rhs; + delete[] dst; + + gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter; +} + +void print_cpuinfo() +{ +#ifdef __linux__ + cout << "contents of /proc/cpuinfo:" << endl; + string line; + ifstream cpuinfo("/proc/cpuinfo"); + if (cpuinfo.is_open()) { + while (getline(cpuinfo, line)) { + cout << line << endl; + } + cpuinfo.close(); + } + cout << endl; +#elif defined __APPLE__ + cout << "output of sysctl hw:" << endl; + system("sysctl hw"); + cout << endl; +#endif +} + +template +string type_name() +{ + return "unknown"; +} + +template<> +string type_name() +{ + return "float"; +} + +template<> +string type_name() +{ + return "double"; +} + +struct action_t +{ + virtual const char* invokation_name() const { abort(); return nullptr; } + virtual void run() const { abort(); } + virtual ~action_t() {} +}; + +void show_usage_and_exit(int /*argc*/, char* argv[], + const vector>& available_actions) +{ + cerr << "usage: " << argv[0] << " [options...]" << endl << endl; + cerr << "available actions:" << endl << endl; + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + cerr << " " << (*it)->invokation_name() << endl; + } + cerr << endl; + cerr << "options:" << endl << endl; + cerr << " --min-working-set-size=N:" << endl; + cerr << " Set the minimum working set size to N bytes." << endl; + cerr << " This is rounded up as needed to a multiple of matrix size." << endl; + cerr << " A larger working set lowers the chance of a warm cache." << endl; + cerr << " The default value 0 means use a large enough working" << endl; + cerr << " set to likely outsize caches." << endl; + cerr << " A value of 1 (that is, 1 byte) would mean don't do anything to" << endl; + cerr << " avoid warm caches." << endl; + exit(1); +} + +float measure_clock_speed() +{ + cerr << "Measuring clock speed... \r" << flush; + + vector all_gflops; + for (int i = 0; i < 8; i++) { + benchmark_t b(1024, 1024, 1024); + b.run(); + all_gflops.push_back(b.gflops); + } + + sort(all_gflops.begin(), all_gflops.end()); + float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5]; + + // multiply by an arbitrary constant to discourage trying doing anything with the + // returned values besides just comparing them with each other. + float result = stable_estimate * 123.456f; + + return result; +} + +struct human_duration_t +{ + int seconds; + human_duration_t(int s) : seconds(s) {} +}; + +ostream& operator<<(ostream& s, const human_duration_t& d) +{ + int remainder = d.seconds; + if (remainder > 3600) { + int hours = remainder / 3600; + s << hours << " h "; + remainder -= hours * 3600; + } + if (remainder > 60) { + int minutes = remainder / 60; + s << minutes << " min "; + remainder -= minutes * 60; + } + if (d.seconds < 600) { + s << remainder << " s"; + } + return s; +} + +const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data"; + +void serialize_benchmarks(const char* filename, const vector& benchmarks, size_t first_benchmark_to_run) +{ + FILE* file = fopen(filename, "w"); + if (!file) { + cerr << "Could not open file " << filename << " for writing." << endl; + cerr << "Do you have write permissions on the current working directory?" << endl; + exit(1); + } + size_t benchmarks_vector_size = benchmarks.size(); + fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file); + fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file); + fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file); + fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file); + fclose(file); +} + +bool deserialize_benchmarks(const char* filename, vector& benchmarks, size_t& first_benchmark_to_run) +{ + FILE* file = fopen(filename, "r"); + if (!file) { + return false; + } + if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) { + return false; + } + size_t benchmarks_vector_size = 0; + if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) { + return false; + } + if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) { + return false; + } + benchmarks.resize(benchmarks_vector_size); + if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) { + return false; + } + unlink(filename); + return true; +} + +void try_run_some_benchmarks( + vector& benchmarks, + double time_start, + size_t& first_benchmark_to_run) +{ + if (first_benchmark_to_run == benchmarks.size()) { + return; + } + + double time_last_progress_update = 0; + double time_last_clock_speed_measurement = 0; + double time_now = 0; + + size_t benchmark_index = first_benchmark_to_run; + + while (true) { + float ratio_done = float(benchmark_index) / benchmarks.size(); + time_now = timer.getRealTime(); + + // We check clock speed every minute and at the end. + if (benchmark_index == benchmarks.size() || + time_now > time_last_clock_speed_measurement + 60.0f) + { + time_last_clock_speed_measurement = time_now; + + // Ensure that clock speed is as expected + float current_clock_speed = measure_clock_speed(); + + // The tolerance needs to be smaller than the relative difference between + // clock speeds that a device could operate under. + // It seems unlikely that a device would be throttling clock speeds by + // amounts smaller than 2%. + // With a value of 1%, I was getting within noise on a Sandy Bridge. + const float clock_speed_tolerance = 0.02f; + + if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) { + // Clock speed is now higher than we previously measured. + // Either our initial measurement was inaccurate, which won't happen + // too many times as we are keeping the best clock speed value and + // and allowing some tolerance; or something really weird happened, + // which invalidates all benchmark results collected so far. + // Either way, we better restart all over again now. + if (benchmark_index) { + cerr << "Restarting at " << 100.0f * ratio_done + << " % because clock speed increased. " << endl; + } + max_clock_speed = current_clock_speed; + first_benchmark_to_run = 0; + return; + } + + bool rerun_last_tests = false; + + if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { + cerr << "Measurements completed so far: " + << 100.0f * ratio_done + << " % " << endl; + cerr << "Clock speed seems to be only " + << current_clock_speed/max_clock_speed + << " times what it used to be." << endl; + + unsigned int seconds_to_sleep_if_lower_clock_speed = 1; + + while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) { + if (seconds_to_sleep_if_lower_clock_speed > 32) { + cerr << "Sleeping longer probably won't make a difference." << endl; + cerr << "Serializing benchmarks to " << session_filename << endl; + serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run); + cerr << "Now restart this benchmark, and it should pick up where we left." << endl; + exit(2); + } + rerun_last_tests = true; + cerr << "Sleeping " + << seconds_to_sleep_if_lower_clock_speed + << " s... \r" << endl; + sleep(seconds_to_sleep_if_lower_clock_speed); + current_clock_speed = measure_clock_speed(); + seconds_to_sleep_if_lower_clock_speed *= 2; + } + } + + if (rerun_last_tests) { + cerr << "Redoing the last " + << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size() + << " % because clock speed had been low. " << endl; + return; + } + + // nothing wrong with the clock speed so far, so there won't be a need to rerun + // benchmarks run so far in case we later encounter a lower clock speed. + first_benchmark_to_run = benchmark_index; + } + + if (benchmark_index == benchmarks.size()) { + // We're done! + first_benchmark_to_run = benchmarks.size(); + // Erase progress info + cerr << " " << endl; + return; + } + + // Display progress info on stderr + if (time_now > time_last_progress_update + 1.0f) { + time_last_progress_update = time_now; + cerr << "Measurements... " << 100.0f * ratio_done + << " %, ETA " + << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done) + << " \r" << flush; + } + + // This is where we actually run a benchmark! + benchmarks[benchmark_index].run(); + benchmark_index++; + } +} + +void run_benchmarks(vector& benchmarks) +{ + size_t first_benchmark_to_run; + vector deserialized_benchmarks; + bool use_deserialized_benchmarks = false; + if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) { + cerr << "Found serialized session with " + << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size() + << " % already done" << endl; + if (deserialized_benchmarks.size() == benchmarks.size() && + first_benchmark_to_run > 0 && + first_benchmark_to_run < benchmarks.size()) + { + use_deserialized_benchmarks = true; + } + } + + if (use_deserialized_benchmarks) { + benchmarks = deserialized_benchmarks; + } else { + // not using deserialized benchmarks, starting from scratch + first_benchmark_to_run = 0; + + // Randomly shuffling benchmarks allows us to get accurate enough progress info, + // as now the cheap/expensive benchmarks are randomly mixed so they average out. + // It also means that if data is corrupted for some time span, the odds are that + // not all repetitions of a given benchmark will be corrupted. + random_shuffle(benchmarks.begin(), benchmarks.end()); + } + + for (int i = 0; i < 4; i++) { + max_clock_speed = max(max_clock_speed, measure_clock_speed()); + } + + double time_start = 0.0; + while (first_benchmark_to_run < benchmarks.size()) { + if (first_benchmark_to_run == 0) { + time_start = timer.getRealTime(); + } + try_run_some_benchmarks(benchmarks, + time_start, + first_benchmark_to_run); + } + + // Sort timings by increasing benchmark parameters, and decreasing gflops. + // The latter is very important. It means that we can ignore all but the first + // benchmark with given parameters. + sort(benchmarks.begin(), benchmarks.end()); + + // Collect best (i.e. now first) results for each parameter values. + vector best_benchmarks; + for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { + if (best_benchmarks.empty() || + best_benchmarks.back().compact_product_size != it->compact_product_size || + best_benchmarks.back().compact_block_size != it->compact_block_size) + { + best_benchmarks.push_back(*it); + } + } + + // keep and return only the best benchmarks + benchmarks = best_benchmarks; +} + +struct measure_all_pot_sizes_action_t : action_t +{ + virtual const char* invokation_name() const { return "all-pot-sizes"; } + virtual void run() const + { + vector benchmarks; + for (int repetition = 0; repetition < measurement_repetitions; repetition++) { + for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { + for (size_t msize = minsize; msize <= maxsize; msize *= 2) { + for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { + for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) { + for (size_t mblock = minsize; mblock <= msize; mblock *= 2) { + for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) { + benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock); + } + } + } + } + } + } + } + + run_benchmarks(benchmarks); + + cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl; + for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { + cout << *it << endl; + } + } +}; + +struct measure_default_sizes_action_t : action_t +{ + virtual const char* invokation_name() const { return "default-sizes"; } + virtual void run() const + { + vector benchmarks; + for (int repetition = 0; repetition < measurement_repetitions; repetition++) { + for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) { + for (size_t msize = minsize; msize <= maxsize; msize *= 2) { + for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) { + benchmarks.emplace_back(ksize, msize, nsize); + } + } + } + } + + run_benchmarks(benchmarks); + + cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl; + for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) { + cout << *it << endl; + } + } +}; + +int main(int argc, char* argv[]) +{ + double time_start = timer.getRealTime(); + cout.precision(4); + cerr.precision(4); + + vector> available_actions; + available_actions.emplace_back(new measure_all_pot_sizes_action_t); + available_actions.emplace_back(new measure_default_sizes_action_t); + + auto action = available_actions.end(); + + if (argc <= 1) { + show_usage_and_exit(argc, argv, available_actions); + } + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + if (!strcmp(argv[1], (*it)->invokation_name())) { + action = it; + break; + } + } + + if (action == available_actions.end()) { + show_usage_and_exit(argc, argv, available_actions); + } + + for (int i = 2; i < argc; i++) { + if (argv[i] == strstr(argv[i], "--min-working-set-size=")) { + const char* equals_sign = strchr(argv[i], '='); + min_working_set_size = strtoul(equals_sign+1, nullptr, 10); + } else { + cerr << "unrecognized option: " << argv[i] << endl << endl; + show_usage_and_exit(argc, argv, available_actions); + } + } + + print_cpuinfo(); + + cout << "benchmark parameters:" << endl; + cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl; + cout << "scalar type: " << type_name() << endl; + cout << "packet size: " << internal::packet_traits::size << endl; + cout << "minsize = " << minsize << endl; + cout << "maxsize = " << maxsize << endl; + cout << "measurement_repetitions = " << measurement_repetitions << endl; + cout << "min_accurate_time = " << min_accurate_time << endl; + cout << "min_working_set_size = " << min_working_set_size; + if (min_working_set_size == 0) { + cout << " (try to outsize caches)"; + } + cout << endl << endl; + + (*action)->run(); + + double time_end = timer.getRealTime(); + cerr << "Finished in " << human_duration_t(time_end - time_start) << endl; +} diff --git a/external/eigen3/bench/btl/CMakeLists.txt b/external/eigen3/bench/btl/CMakeLists.txt index 119b470..38ff9f4 100644 --- a/external/eigen3/bench/btl/CMakeLists.txt +++ b/external/eigen3/bench/btl/CMakeLists.txt @@ -11,29 +11,24 @@ SET(CMAKE_INCLUDE_CURRENT_DIR ON) string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER}) IF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) - SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG") - SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG") - IF(NOT BTL_NOVEC) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2") - SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -msse2") - ELSE(NOT BTL_NOVEC) + SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_CXX_FLAGS}") + SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_Fortran_FLAGS}") + IF(BTL_NOVEC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") - ENDIF(NOT BTL_NOVEC) + ENDIF(BTL_NOVEC) ENDIF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC) IF(MSVC) SET(CMAKE_CXX_FLAGS " /O2 /Ot /GL /fp:fast -DNDEBUG") # SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG") - IF(NOT BTL_NOVEC) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2") - ELSE(NOT BTL_NOVEC) + IF(BTL_NOVEC) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE") - ENDIF(NOT BTL_NOVEC) + ENDIF(BTL_NOVEC) ENDIF(MSVC) if(IS_ICPC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fast") - set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fast") + set(CMAKE_CXX_FLAGS "-fast ${CMAKE_CXX_FLAGS}") + set(CMAKE_Fortran_FLAGS "-fast ${CMAKE_Fortran_FLAGS}") endif(IS_ICPC) include_directories( @@ -48,6 +43,12 @@ include_directories( # set(DEFAULT_LIBRARIES ${MKL_LIBRARIES}) # endif (MKL_FOUND) +find_library(EIGEN_BTL_RT_LIBRARY rt) +# if we cannot find it easily, then we don't need it! +if(NOT EIGEN_BTL_RT_LIBRARY) + set(EIGEN_BTL_RT_LIBRARY "") +endif() + MACRO(BTL_ADD_BENCH targetname) foreach(_current_var ${ARGN}) @@ -70,7 +71,7 @@ MACRO(BTL_ADD_BENCH targetname) IF(BUILD_${targetname}) ADD_EXECUTABLE(${targetname} ${_sources}) ADD_TEST(${targetname} "${targetname}") - target_link_libraries(${targetname} ${DEFAULT_LIBRARIES} rt) + target_link_libraries(${targetname} ${DEFAULT_LIBRARIES} ${EIGEN_BTL_RT_LIBRARY}) ENDIF(BUILD_${targetname}) ENDMACRO(BTL_ADD_BENCH) @@ -91,6 +92,7 @@ ENABLE_TESTING() add_subdirectory(libs/eigen3) add_subdirectory(libs/eigen2) +add_subdirectory(libs/tensors) add_subdirectory(libs/BLAS) add_subdirectory(libs/ublas) add_subdirectory(libs/gmm) @@ -98,6 +100,7 @@ add_subdirectory(libs/mtl4) add_subdirectory(libs/blitz) add_subdirectory(libs/tvmet) add_subdirectory(libs/STL) +add_subdirectory(libs/blaze) add_subdirectory(data) diff --git a/external/eigen3/bench/btl/actions/action_axpby.hh b/external/eigen3/bench/btl/actions/action_axpby.hh index 98511ab..dadd0cc 100644 --- a/external/eigen3/bench/btl/actions/action_axpby.hh +++ b/external/eigen3/bench/btl/actions/action_axpby.hh @@ -33,7 +33,7 @@ class Action_axpby { public : // Ctor - Action_axpby( int size ):_size(size),_alpha(0.5),_beta(0.95) + Action_axpby( int size ):_alpha(0.5),_beta(0.95),_size(size) { MESSAGE("Action_axpby Ctor"); diff --git a/external/eigen3/bench/btl/actions/action_axpy.hh b/external/eigen3/bench/btl/actions/action_axpy.hh index e4cb3a5..261be4c 100644 --- a/external/eigen3/bench/btl/actions/action_axpy.hh +++ b/external/eigen3/bench/btl/actions/action_axpy.hh @@ -35,7 +35,7 @@ public : // Ctor - Action_axpy( int size ):_size(size),_coef(1.0) + Action_axpy( int size ):_coef(1.0),_size(size) { MESSAGE("Action_axpy Ctor"); diff --git a/external/eigen3/bench/btl/cmake/FindACML.cmake b/external/eigen3/bench/btl/cmake/FindACML.cmake index f45ae1b..4989fa2 100644 --- a/external/eigen3/bench/btl/cmake/FindACML.cmake +++ b/external/eigen3/bench/btl/cmake/FindACML.cmake @@ -17,6 +17,7 @@ find_file(ACML_LIBRARIES libacml_mp.so PATHS /usr/lib + /usr/lib64 $ENV{ACMLDIR}/lib ${LIB_INSTALL_DIR} ) @@ -35,6 +36,7 @@ if(NOT ACML_LIBRARIES) libacml.so libacml_mv.so PATHS /usr/lib + /usr/lib64 $ENV{ACMLDIR}/lib ${LIB_INSTALL_DIR} ) diff --git a/external/eigen3/bench/btl/cmake/FindATLAS.cmake b/external/eigen3/bench/btl/cmake/FindATLAS.cmake index 6b90652..4136a98 100644 --- a/external/eigen3/bench/btl/cmake/FindATLAS.cmake +++ b/external/eigen3/bench/btl/cmake/FindATLAS.cmake @@ -3,33 +3,25 @@ if (ATLAS_LIBRARIES) set(ATLAS_FIND_QUIETLY TRUE) endif (ATLAS_LIBRARIES) -find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_library(ATLAS_LIB atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_file(ATLAS_LIB libatlas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_LIB satlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_file(ATLAS_CBLAS libcblas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_library(ATLAS_CBLAS cblas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_file(ATLAS_LAPACK NAMES liblapack_atlas.so.3 liblapack.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_library(ATLAS_LAPACK NAMES lapack_atlas lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_file(ATLAS_LAPACK liblapack_atlas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -find_library(ATLAS_LAPACK lapack_atlas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) - -if(NOT ATLAS_LAPACK) - find_file(ATLAS_LAPACK liblapack.so.3 PATHS /usr/lib/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) - find_library(ATLAS_LAPACK lapack PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) -endif(NOT ATLAS_LAPACK) - -find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) +find_file(ATLAS_F77BLAS libf77blas.so.3 PATHS /usr/lib /usr/lib/atlas /usr/lib64 /usr/lib64/atlas $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) find_library(ATLAS_F77BLAS f77blas PATHS $ENV{ATLASDIR} ${LIB_INSTALL_DIR}) if(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) - set(ATLAS_LIBRARIES ${ATLAS_LAPACK} ${ATLAS_CBLAS} ${ATLAS_F77BLAS} ${ATLAS_LIB}) + set(ATLAS_LIBRARIES ${ATLAS_LAPACK} ${ATLAS_LIB}) # search the default lapack lib link to it find_file(ATLAS_REFERENCE_LAPACK liblapack.so.3 PATHS /usr/lib /usr/lib64) find_library(ATLAS_REFERENCE_LAPACK NAMES lapack) - if(ATLAS_REFERENCE_LAPACK) - set(ATLAS_LIBRARIES ${ATLAS_LIBRARIES} ${ATLAS_REFERENCE_LAPACK}) - endif() +# if(ATLAS_REFERENCE_LAPACK) +# set(ATLAS_LIBRARIES ${ATLAS_LIBRARIES} ${ATLAS_REFERENCE_LAPACK}) +# endif() endif(ATLAS_LIB AND ATLAS_CBLAS AND ATLAS_LAPACK AND ATLAS_F77BLAS) diff --git a/external/eigen3/bench/btl/cmake/FindBLAZE.cmake b/external/eigen3/bench/btl/cmake/FindBLAZE.cmake new file mode 100644 index 0000000..dba4c89 --- /dev/null +++ b/external/eigen3/bench/btl/cmake/FindBLAZE.cmake @@ -0,0 +1,31 @@ +# - Try to find eigen2 headers +# Once done this will define +# +# BLAZE_FOUND - system has blaze lib +# BLAZE_INCLUDE_DIR - the blaze include directory +# +# Copyright (C) 2008 Gael Guennebaud +# Adapted from FindEigen.cmake: +# Copyright (c) 2006, 2007 Montel Laurent, +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +if (BLAZE_INCLUDE_DIR) + + # in cache already + set(BLAZE_FOUND TRUE) + +else (BLAZE_INCLUDE_DIR) + +find_path(BLAZE_INCLUDE_DIR NAMES blaze/Blaze.h + PATHS + ${INCLUDE_INSTALL_DIR} + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(BLAZE DEFAULT_MSG BLAZE_INCLUDE_DIR) + +mark_as_advanced(BLAZE_INCLUDE_DIR) + +endif(BLAZE_INCLUDE_DIR) + diff --git a/external/eigen3/bench/btl/cmake/FindCBLAS.cmake b/external/eigen3/bench/btl/cmake/FindCBLAS.cmake index 554f029..ce0f2f2 100644 --- a/external/eigen3/bench/btl/cmake/FindCBLAS.cmake +++ b/external/eigen3/bench/btl/cmake/FindCBLAS.cmake @@ -23,6 +23,7 @@ find_file(CBLAS_LIBRARIES libcblas.so.3 PATHS /usr/lib + /usr/lib64 $ENV{CBLASDIR}/lib ${LIB_INSTALL_DIR} ) diff --git a/external/eigen3/bench/btl/cmake/FindGOTO.cmake b/external/eigen3/bench/btl/cmake/FindGOTO.cmake deleted file mode 100644 index 67ea093..0000000 --- a/external/eigen3/bench/btl/cmake/FindGOTO.cmake +++ /dev/null @@ -1,15 +0,0 @@ - -if (GOTO_LIBRARIES) - set(GOTO_FIND_QUIETLY TRUE) -endif (GOTO_LIBRARIES) - -find_library(GOTO_LIBRARIES goto PATHS $ENV{GOTODIR} ${LIB_INSTALL_DIR}) - -if(GOTO_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX) - set(GOTO_LIBRARIES ${GOTO_LIBRARIES} "-lpthread -lgfortran") -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(GOTO DEFAULT_MSG GOTO_LIBRARIES) - -mark_as_advanced(GOTO_LIBRARIES) diff --git a/external/eigen3/bench/btl/cmake/FindGOTO2.cmake b/external/eigen3/bench/btl/cmake/FindGOTO2.cmake deleted file mode 100644 index baa68d2..0000000 --- a/external/eigen3/bench/btl/cmake/FindGOTO2.cmake +++ /dev/null @@ -1,25 +0,0 @@ - -if (GOTO2_LIBRARIES) - set(GOTO2_FIND_QUIETLY TRUE) -endif (GOTO2_LIBRARIES) -# -# find_path(GOTO_INCLUDES -# NAMES -# cblas.h -# PATHS -# $ENV{GOTODIR}/include -# ${INCLUDE_INSTALL_DIR} -# ) - -find_file(GOTO2_LIBRARIES libgoto2.so PATHS /usr/lib $ENV{GOTO2DIR} ${LIB_INSTALL_DIR}) -find_library(GOTO2_LIBRARIES goto2 PATHS $ENV{GOTO2DIR} ${LIB_INSTALL_DIR}) - -if(GOTO2_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX) - set(GOTO2_LIBRARIES ${GOTO2_LIBRARIES} "-lpthread -lgfortran") -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(GOTO2 DEFAULT_MSG - GOTO2_LIBRARIES) - -mark_as_advanced(GOTO2_LIBRARIES) diff --git a/external/eigen3/bench/btl/cmake/FindOPENBLAS.cmake b/external/eigen3/bench/btl/cmake/FindOPENBLAS.cmake new file mode 100644 index 0000000..2a09194 --- /dev/null +++ b/external/eigen3/bench/btl/cmake/FindOPENBLAS.cmake @@ -0,0 +1,17 @@ + +if (OPENBLAS_LIBRARIES) + set(OPENBLAS_FIND_QUIETLY TRUE) +endif (OPENBLAS_LIBRARIES) + +find_file(OPENBLAS_LIBRARIES NAMES libopenblas.so libopenblas.so.0 PATHS /usr/lib /usr/lib64 $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR}) +find_library(OPENBLAS_LIBRARIES openblas PATHS $ENV{OPENBLASDIR} ${LIB_INSTALL_DIR}) + +if(OPENBLAS_LIBRARIES AND CMAKE_COMPILER_IS_GNUCXX) + set(OPENBLAS_LIBRARIES ${OPENBLAS_LIBRARIES} "-lpthread -lgfortran") +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(OPENBLAS DEFAULT_MSG + OPENBLAS_LIBRARIES) + +mark_as_advanced(OPENBLAS_LIBRARIES) diff --git a/external/eigen3/bench/btl/data/action_settings.txt b/external/eigen3/bench/btl/data/action_settings.txt index e32213e..39d2b5d 100644 --- a/external/eigen3/bench/btl/data/action_settings.txt +++ b/external/eigen3/bench/btl/data/action_settings.txt @@ -1,19 +1,19 @@ -aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:3000 -ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:3000 -atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:3000 +aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:5000 +ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:5000 +atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:5000 axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000 axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000 -matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:3000 -matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:3000 -trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:3000 -trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:3000 -trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:3000 -cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:3000 -complete_lu_decomp ; "{/*1.5 Complete LU decomposition}" ; "matrix size" ; 4:3000 -partial_lu_decomp ; "{/*1.5 Partial LU decomposition}" ; "matrix size" ; 4:3000 -tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:3000 -hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:3000 -symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:3000 -syr2 ; "{/*1.5 symmretric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:3000 -ger ; "{/*1.5 general rank-1 update (A += u v^T)}" ; "matrix size" ; 4:3000 -rot ; "{/*1.5 apply rotation in the plane}" ; "vector size" ; 4:1000000 \ No newline at end of file +matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:5000 +matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:5000 +trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:5000 +trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:5000 +trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:5000 +cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:5000 +complete_lu_decomp ; "{/*1.5 Complete LU decomposition}" ; "matrix size" ; 4:5000 +partial_lu_decomp ; "{/*1.5 Partial LU decomposition}" ; "matrix size" ; 4:5000 +tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:5000 +hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:5000 +symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:5000 +syr2 ; "{/*1.5 symmretric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:5000 +ger ; "{/*1.5 general rank-1 update (A += u v^T)}" ; "matrix size" ; 4:5000 +rot ; "{/*1.5 apply rotation in the plane}" ; "vector size" ; 4:1000000 diff --git a/external/eigen3/bench/btl/data/perlib_plot_settings.txt b/external/eigen3/bench/btl/data/perlib_plot_settings.txt index 6844bab..f023cfe 100644 --- a/external/eigen3/bench/btl/data/perlib_plot_settings.txt +++ b/external/eigen3/bench/btl/data/perlib_plot_settings.txt @@ -10,7 +10,7 @@ ublas ; with lines lw 3 lt 1 lc rgbcolor "#00b7ff" mtl4 ; with lines lw 3 lt 1 lc rgbcolor "#d18847" blitz ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff" F77 ; with lines lw 3 lt 3 lc rgbcolor "#e6e64c" -GOTO ; with lines lw 3 lt 3 lc rgbcolor "#C05600" -GOTO2 ; with lines lw 3 lt 1 lc rgbcolor "#C05600" +OPENBLAS ; with lines lw 3 lt 1 lc rgbcolor "#C05600" C ; with lines lw 3 lt 3 lc rgbcolor "#e6bd96" ACML ; with lines lw 2 lt 3 lc rgbcolor "#e6e64c" +blaze ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff" diff --git a/external/eigen3/bench/btl/generic_bench/bench.hh b/external/eigen3/bench/btl/generic_bench/bench.hh index 005c363..7b7b951 100644 --- a/external/eigen3/bench/btl/generic_bench/bench.hh +++ b/external/eigen3/bench/btl/generic_bench/bench.hh @@ -102,8 +102,8 @@ BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ) // merge the two data std::vector newSizes; std::vector newFlops; - int i=0; - int j=0; + unsigned int i=0; + unsigned int j=0; while (i config = BtlString(_config).split(" \t\n"); - for (int i = 0; i BTL_DONT_INLINE void init_matrix(Vector & A, int size){ A.resize(size); - for (int row=0; row(A[row],size,row); } } @@ -50,11 +50,11 @@ BTL_DONT_INLINE void init_matrix(Vector & A, int size){ template BTL_DONT_INLINE void init_matrix_symm(Matrix& A, int size){ A.resize(size); - for (int row=0; row @@ -87,6 +87,48 @@ }; // Portable_Timer +#elif defined(__APPLE__) +#include +#include + + +class Portable_Timer +{ + public: + + Portable_Timer() + { + } + + void start() + { + m_start_time = double(mach_absolute_time())*1e-9;; + + } + + void stop() + { + m_stop_time = double(mach_absolute_time())*1e-9;; + + } + + double elapsed() + { + return user_time(); + } + + double user_time() + { + return m_stop_time - m_start_time; + } + + +private: + + double m_stop_time, m_start_time; + +}; // Portable_Timer (Apple) + #else #include @@ -138,7 +180,7 @@ private: int m_clkid; double m_stop_time, m_start_time; -}; // Portable_Timer +}; // Portable_Timer (Linux) #endif diff --git a/external/eigen3/bench/btl/generic_bench/utils/size_lin_log.hh b/external/eigen3/bench/btl/generic_bench/utils/size_lin_log.hh index bca3932..bbc9f54 100644 --- a/external/eigen3/bench/btl/generic_bench/utils/size_lin_log.hh +++ b/external/eigen3/bench/btl/generic_bench/utils/size_lin_log.hh @@ -23,7 +23,7 @@ #include "size_log.hh" template -void size_lin_log(const int nb_point, const int size_min, const int size_max, Vector & X) +void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector & X) { int ten=10; int nine=9; diff --git a/external/eigen3/bench/btl/libs/BLAS/CMakeLists.txt b/external/eigen3/bench/btl/libs/BLAS/CMakeLists.txt index de42fe0..0272cca 100644 --- a/external/eigen3/bench/btl/libs/BLAS/CMakeLists.txt +++ b/external/eigen3/bench/btl/libs/BLAS/CMakeLists.txt @@ -18,27 +18,14 @@ if (MKL_FOUND) endif (MKL_FOUND) -find_package(GOTO2) -if (GOTO2_FOUND) - btl_add_bench(btl_goto2 main.cpp) - if(BUILD_btl_goto2) - target_link_libraries(btl_goto2 ${GOTO_LIBRARIES} ) - set_target_properties(btl_goto2 PROPERTIES COMPILE_FLAGS "-DCBLASNAME=GOTO2") - endif(BUILD_btl_goto2) -endif (GOTO2_FOUND) - -find_package(GOTO) -if (GOTO_FOUND) - if(GOTO2_FOUND) - btl_add_bench(btl_goto main.cpp OFF) - else() - btl_add_bench(btl_goto main.cpp) - endif() - if(BUILD_btl_goto) - target_link_libraries(btl_goto ${GOTO_LIBRARIES} ) - set_target_properties(btl_goto PROPERTIES COMPILE_FLAGS "-DCBLASNAME=GOTO") - endif(BUILD_btl_goto) -endif (GOTO_FOUND) +find_package(OPENBLAS) +if (OPENBLAS_FOUND) + btl_add_bench(btl_openblas main.cpp) + if(BUILD_btl_openblas) + target_link_libraries(btl_openblas ${OPENBLAS_LIBRARIES} ) + set_target_properties(btl_openblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=OPENBLAS") + endif(BUILD_btl_openblas) +endif (OPENBLAS_FOUND) find_package(ACML) if (ACML_FOUND) diff --git a/external/eigen3/bench/btl/libs/BLAS/blas_interface_impl.hh b/external/eigen3/bench/btl/libs/BLAS/blas_interface_impl.hh index 0e84df0..fc4ba2a 100644 --- a/external/eigen3/bench/btl/libs/BLAS/blas_interface_impl.hh +++ b/external/eigen3/bench/btl/libs/BLAS/blas_interface_impl.hh @@ -75,7 +75,6 @@ public : static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ int N2 = N*N; BLAS_FUNC(copy)(&N2, X, &intone, C, &intone); - char uplo = 'L'; int info = 0; int * ipiv = (int*)alloca(sizeof(int)*N); BLAS_FUNC(getrf)(&N, &N, C, &N, ipiv, &info); @@ -92,7 +91,7 @@ public : BLAS_FUNC(trsm)(&right, &lower, ¬rans, &nonunit, &N, &N, &fone, L, &N, X, &N); } - static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){ + static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & /*X*/, int N){ BLAS_FUNC(trmm)(&left, &lower, ¬rans,&nonunit, &N,&N,&fone,A,&N,B,&N); } @@ -101,7 +100,6 @@ public : static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ int N2 = N*N; BLAS_FUNC(copy)(&N2, X, &intone, C, &intone); - char uplo = 'L'; int info = 0; int * ipiv = (int*)alloca(sizeof(int)*N); int * jpiv = (int*)alloca(sizeof(int)*N); @@ -134,8 +132,6 @@ public : } char uplo = 'U'; int info = 0; - int ilo = 1; - int ihi = N; int bsize = 64; int worksize = N*bsize; SCALAR* d = new SCALAR[3*N+worksize]; diff --git a/external/eigen3/bench/btl/libs/BLAS/c_interface_base.h b/external/eigen3/bench/btl/libs/BLAS/c_interface_base.h index 515d8dc..de61380 100644 --- a/external/eigen3/bench/btl/libs/BLAS/c_interface_base.h +++ b/external/eigen3/bench/btl/libs/BLAS/c_interface_base.h @@ -17,12 +17,12 @@ public: typedef real* gene_matrix; typedef real* gene_vector; - static void free_matrix(gene_matrix & A, int N){ - delete A; + static void free_matrix(gene_matrix & A, int /*N*/){ + delete[] A; } static void free_vector(gene_vector & B){ - delete B; + delete[] B; } static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ diff --git a/external/eigen3/bench/btl/libs/BLAS/main.cpp b/external/eigen3/bench/btl/libs/BLAS/main.cpp index 8347c9f..564d55e 100644 --- a/external/eigen3/bench/btl/libs/BLAS/main.cpp +++ b/external/eigen3/bench/btl/libs/BLAS/main.cpp @@ -56,13 +56,13 @@ int main() bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); #ifdef HAS_LAPACK - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); #endif //bench > >(MIN_LU,MAX_LU,NB_POINT); diff --git a/external/eigen3/bench/btl/libs/STL/STL_interface.hh b/external/eigen3/bench/btl/libs/STL/STL_interface.hh index 93e76bd..ef4cc92 100644 --- a/external/eigen3/bench/btl/libs/STL/STL_interface.hh +++ b/external/eigen3/bench/btl/libs/STL/STL_interface.hh @@ -44,9 +44,9 @@ public : return "STL"; } - static void free_matrix(gene_matrix & A, int N){} + static void free_matrix(gene_matrix & /*A*/, int /*N*/){} - static void free_vector(gene_vector & B){} + static void free_vector(gene_vector & /*B*/){} static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ A = A_stl; diff --git a/external/eigen3/bench/btl/libs/blaze/CMakeLists.txt b/external/eigen3/bench/btl/libs/blaze/CMakeLists.txt new file mode 100644 index 0000000..e99a085 --- /dev/null +++ b/external/eigen3/bench/btl/libs/blaze/CMakeLists.txt @@ -0,0 +1,13 @@ + +find_package(BLAZE) +find_package(Boost COMPONENTS system) +if (BLAZE_FOUND AND Boost_FOUND) + include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS}) + btl_add_bench(btl_blaze main.cpp) + # Note: The newest blaze version requires C++14. + # Ideally, we should set this depending on the version of Blaze we found + set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14) + if(BUILD_btl_blaze) + target_link_libraries(btl_blaze ${Boost_LIBRARIES}) + endif() +endif () diff --git a/external/eigen3/bench/btl/libs/blaze/blaze_interface.hh b/external/eigen3/bench/btl/libs/blaze/blaze_interface.hh new file mode 100644 index 0000000..ee15239 --- /dev/null +++ b/external/eigen3/bench/btl/libs/blaze/blaze_interface.hh @@ -0,0 +1,140 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#ifndef BLAZE_INTERFACE_HH +#define BLAZE_INTERFACE_HH + +#include +#include +// using namespace blaze; + +#include + +template +class blaze_interface { + +public : + + typedef real real_type ; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef blaze::DynamicMatrix gene_matrix; + typedef blaze::DynamicVector gene_vector; + + static inline std::string name() { return "blaze"; } + + static void free_matrix(gene_matrix & A, int N){ + return ; + } + + static void free_vector(gene_vector & B){ + return ; + } + + static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.resize(A_stl[0].size(), A_stl.size()); + + for (int j=0; j ipvt(N); +// lu_factor(R, ipvt); +// } + +// static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){ +// X = lower_trisolve(L, B); +// } + + static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ + cible = source; + } + + static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){ + cible = source; + } + +}; + +#endif diff --git a/external/eigen3/bench/btl/libs/blaze/main.cpp b/external/eigen3/bench/btl/libs/blaze/main.cpp new file mode 100644 index 0000000..80e8f4e --- /dev/null +++ b/external/eigen3/bench/btl/libs/blaze/main.cpp @@ -0,0 +1,40 @@ +//===================================================== +// Copyright (C) 2008 Gael Guennebaud +//===================================================== +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License +// as published by the Free Software Foundation; either version 2 +// of the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +// +#include "utilities.h" +#include "blaze_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + bench > >(MIN_MV,MAX_MV,NB_POINT); + bench > >(MIN_MV,MAX_MV,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} + + diff --git a/external/eigen3/bench/btl/libs/eigen2/eigen2_interface.hh b/external/eigen3/bench/btl/libs/eigen2/eigen2_interface.hh index 47fe581..1deabda 100644 --- a/external/eigen3/bench/btl/libs/eigen2/eigen2_interface.hh +++ b/external/eigen3/bench/btl/libs/eigen2/eigen2_interface.hh @@ -47,7 +47,7 @@ public : { #if defined(EIGEN_VECTORIZE_SSE) if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; - #elif defined(EIGEN_VECTORIZE_ALTIVEC) + #elif defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) if (SIZE==Dynamic) return "eigen2"; else return "tiny_eigen2"; #else if (SIZE==Dynamic) return "eigen2_novec"; else return "tiny_eigen2_novec"; diff --git a/external/eigen3/bench/btl/libs/eigen3/eigen3_interface.hh b/external/eigen3/bench/btl/libs/eigen3/eigen3_interface.hh index 31bcc1f..b821fd7 100644 --- a/external/eigen3/bench/btl/libs/eigen3/eigen3_interface.hh +++ b/external/eigen3/bench/btl/libs/eigen3/eigen3_interface.hh @@ -45,15 +45,15 @@ public : return EIGEN_MAKESTRING(BTL_PREFIX); } - static void free_matrix(gene_matrix & A, int N) {} + static void free_matrix(gene_matrix & /*A*/, int /*N*/) {} - static void free_vector(gene_vector & B) {} + static void free_vector(gene_vector & /*B*/) {} static BTL_DONT_INLINE void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ A.resize(A_stl[0].size(), A_stl.size()); - for (int j=0; j().setZero(); X.template selfadjointView().rankUpdate(A); } - static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){ + static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int /*N*/){ X.noalias() = A*B; } - static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){ + static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int /*N*/){ X.noalias() = (A.template selfadjointView() * B); // internal::product_selfadjoint_vector(N,A.data(),N, B.data(), 1, X.data(), 1); } @@ -155,54 +155,54 @@ public : } } - static EIGEN_DONT_INLINE void syr2(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){ + static EIGEN_DONT_INLINE void syr2(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){ // internal::product_selfadjoint_rank2_update(N,A.data(),N, X.data(), 1, Y.data(), 1, -1); for(int j=0; j(c,s)); } - static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){ + static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int /*N*/){ X.noalias() = (A.transpose()*B); } - static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){ + static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int /*N*/){ Y += coef * X; } - static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){ + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int /*N*/){ Y = a*X + b*Y; } - static EIGEN_DONT_INLINE void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){ + static EIGEN_DONT_INLINE void copy_matrix(const gene_matrix & source, gene_matrix & cible, int /*N*/){ cible = source; } - static EIGEN_DONT_INLINE void copy_vector(const gene_vector & source, gene_vector & cible, int N){ + static EIGEN_DONT_INLINE void copy_vector(const gene_vector & source, gene_vector & cible, int /*N*/){ cible = source; } - static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){ + static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int /*N*/){ X = L.template triangularView().solve(B); } - static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){ + static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int /*N*/){ X = L.template triangularView().solve(B); } - static inline void trmm(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){ + static inline void trmm(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int /*N*/){ X.noalias() = L.template triangularView() * B; } - static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){ + static inline void cholesky(const gene_matrix & X, gene_matrix & C, int /*N*/){ C = X; internal::llt_inplace::blocked(C); //C = X.llt().matrixL(); @@ -211,11 +211,11 @@ public : // Cholesky::computeInPlaceBlock(C); } - static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ + static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int /*N*/){ C = X.fullPivLu().matrixLU(); } - static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ + static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){ Matrix piv(N); DenseIndex nb; C = X; @@ -223,13 +223,13 @@ public : // C = X.partialPivLu().matrixLU(); } - static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ + static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){ typename Tridiagonalization::CoeffVectorType aux(N-1); C = X; internal::tridiagonalization_inplace(C, aux); } - static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){ + static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int /*N*/){ C = HessenbergDecomposition(X).packedMatrix(); } diff --git a/external/eigen3/bench/btl/libs/eigen3/main_adv.cpp b/external/eigen3/bench/btl/libs/eigen3/main_adv.cpp index efe5857..9586535 100644 --- a/external/eigen3/bench/btl/libs/eigen3/main_adv.cpp +++ b/external/eigen3/bench/btl/libs/eigen3/main_adv.cpp @@ -29,14 +29,14 @@ BTL_MAIN; int main() { - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); +// bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); - bench > >(MIN_MM,MAX_MM,NB_POINT); +// bench > >(MIN_LU,MAX_LU,NB_POINT); + bench > >(MIN_LU,MAX_LU,NB_POINT); return 0; } diff --git a/external/eigen3/bench/btl/libs/tensors/CMakeLists.txt b/external/eigen3/bench/btl/libs/tensors/CMakeLists.txt new file mode 100644 index 0000000..09d6d8e --- /dev/null +++ b/external/eigen3/bench/btl/libs/tensors/CMakeLists.txt @@ -0,0 +1,44 @@ + + +if((NOT TENSOR_INCLUDE_DIR) AND Eigen_SOURCE_DIR) + # unless TENSOR_INCLUDE_DIR is defined, let's use current Eigen version + set(TENSOR_INCLUDE_DIR ${Eigen_SOURCE_DIR}) + set(TENSOR_FOUND TRUE) +else() + find_package(Tensor) +endif() + +if (TENSOR_FOUND) + + include_directories(${TENSOR_INCLUDE_DIR}) + btl_add_bench(btl_tensor_linear main_linear.cpp) + btl_add_bench(btl_tensor_vecmat main_vecmat.cpp) + btl_add_bench(btl_tensor_matmat main_matmat.cpp) + + btl_add_target_property(btl_tensor_linear COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=tensor") + btl_add_target_property(btl_tensor_vecmat COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=tensor") + btl_add_target_property(btl_tensor_matmat COMPILE_FLAGS "-fno-exceptions -DBTL_PREFIX=tensor") + + option(BTL_BENCH_NOGCCVEC "also bench Eigen explicit vec without GCC's auto vec" OFF) + if(CMAKE_COMPILER_IS_GNUCXX AND BTL_BENCH_NOGCCVEC) + btl_add_bench(btl_tensor_nogccvec_linear main_linear.cpp) + btl_add_bench(btl_tensor_nogccvec_vecmat main_vecmat.cpp) + btl_add_bench(btl_tensor_nogccvec_matmat main_matmat.cpp) + + btl_add_target_property(btl_tensor_nogccvec_linear COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=tensor_nogccvec") + btl_add_target_property(btl_tensor_nogccvec_vecmat COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=tensor_nogccvec") + btl_add_target_property(btl_tensor_nogccvec_matmat COMPILE_FLAGS "-fno-exceptions -fno-tree-vectorize -DBTL_PREFIX=tensor_nogccvec") + endif() + + + if(NOT BTL_NOVEC) + btl_add_bench(btl_tensor_novec_linear main_linear.cpp OFF) + btl_add_bench(btl_tensor_novec_vecmat main_vecmat.cpp OFF) + btl_add_bench(btl_tensor_novec_matmat main_matmat.cpp OFF) + btl_add_target_property(btl_tensor_novec_linear COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=tensor_novec") + btl_add_target_property(btl_tensor_novec_vecmat COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=tensor_novec") + btl_add_target_property(btl_tensor_novec_matmat COMPILE_FLAGS "-fno-exceptions -DEIGEN_DONT_VECTORIZE -DBTL_PREFIX=tensor_novec") + + endif(NOT BTL_NOVEC) + +endif (TENSOR_FOUND) diff --git a/external/eigen3/bench/btl/libs/tensors/main_linear.cpp b/external/eigen3/bench/btl/libs/tensors/main_linear.cpp new file mode 100644 index 0000000..e257f1e --- /dev/null +++ b/external/eigen3/bench/btl/libs/tensors/main_linear.cpp @@ -0,0 +1,23 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "utilities.h" +#include "tensor_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + bench > >(MIN_AXPY,MAX_AXPY,NB_POINT); + + return 0; +} diff --git a/external/eigen3/bench/btl/libs/tensors/main_matmat.cpp b/external/eigen3/bench/btl/libs/tensors/main_matmat.cpp new file mode 100644 index 0000000..675fcfc --- /dev/null +++ b/external/eigen3/bench/btl/libs/tensors/main_matmat.cpp @@ -0,0 +1,21 @@ +//===================================================== +// Copyright (C) 2014 Benoit Steiner +//===================================================== +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// +#include "utilities.h" +#include "tensor_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_MM,MAX_MM,NB_POINT); + + return 0; +} diff --git a/external/eigen3/bench/btl/libs/tensors/main_vecmat.cpp b/external/eigen3/bench/btl/libs/tensors/main_vecmat.cpp new file mode 100644 index 0000000..1af00c8 --- /dev/null +++ b/external/eigen3/bench/btl/libs/tensors/main_vecmat.cpp @@ -0,0 +1,21 @@ +//===================================================== +// Copyright (C) 2014 Benoit Steiner +//===================================================== +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// +#include "utilities.h" +#include "tensor_interface.hh" +#include "bench.hh" +#include "basic_actions.hh" + +BTL_MAIN; + +int main() +{ + bench > >(MIN_MV,MAX_MV,NB_POINT); + + return 0; +} diff --git a/external/eigen3/bench/btl/libs/tensors/tensor_interface.hh b/external/eigen3/bench/btl/libs/tensors/tensor_interface.hh new file mode 100644 index 0000000..97b8e0f --- /dev/null +++ b/external/eigen3/bench/btl/libs/tensors/tensor_interface.hh @@ -0,0 +1,105 @@ +//===================================================== +// Copyright (C) 2014 Benoit Steiner +//===================================================== +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// +#ifndef TENSOR_INTERFACE_HH +#define TENSOR_INTERFACE_HH + +#include +#include +#include "btl.hh" + +using namespace Eigen; + +template +class tensor_interface +{ +public : + typedef real real_type; + typedef typename Eigen::Tensor::Index Index; + + typedef std::vector stl_vector; + typedef std::vector stl_matrix; + + typedef Eigen::Tensor gene_matrix; + typedef Eigen::Tensor gene_vector; + + + static inline std::string name( void ) + { + return EIGEN_MAKESTRING(BTL_PREFIX); + } + + static void free_matrix(gene_matrix & /*A*/, int /*N*/) {} + + static void free_vector(gene_vector & /*B*/) {} + + static BTL_DONT_INLINE void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){ + A.resize(Eigen::array(A_stl[0].size(), A_stl.size())); + + for (unsigned int j=0; j(i,j)) = A_stl[j][i]; + } + } + } + + static BTL_DONT_INLINE void vector_from_stl(gene_vector & B, stl_vector & B_stl){ + B.resize(B_stl.size()); + + for (unsigned int i=0; i(i,j)); + } + } + } + + static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int /*N*/){ + typedef typename Eigen::Tensor::DimensionPair DimPair; + const Eigen::array dims(DimPair(1, 0)); + X/*.noalias()*/ = A.contract(B, dims); + } + + static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int /*N*/){ + typedef typename Eigen::Tensor::DimensionPair DimPair; + const Eigen::array dims(DimPair(1, 0)); + X/*.noalias()*/ = A.contract(B, dims); + } + + static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int /*N*/){ + Y += X.constant(coef) * X; + } + + static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int /*N*/){ + Y = X.constant(a)*X + Y.constant(b)*Y; + } + + static EIGEN_DONT_INLINE void copy_matrix(const gene_matrix & source, gene_matrix & cible, int /*N*/){ + cible = source; + } + + static EIGEN_DONT_INLINE void copy_vector(const gene_vector & source, gene_vector & cible, int /*N*/){ + cible = source; + } +}; + +#endif diff --git a/external/eigen3/bench/dense_solvers.cpp b/external/eigen3/bench/dense_solvers.cpp new file mode 100644 index 0000000..24343dc --- /dev/null +++ b/external/eigen3/bench/dense_solvers.cpp @@ -0,0 +1,186 @@ +#include +#include "BenchTimer.h" +#include +#include +#include +#include +#include +using namespace Eigen; + +std::map > results; +std::vector labels; +std::vector sizes; + +template +EIGEN_DONT_INLINE +void compute_norm_equation(Solver &solver, const MatrixType &A) { + if(A.rows()!=A.cols()) + solver.compute(A.transpose()*A); + else + solver.compute(A); +} + +template +EIGEN_DONT_INLINE +void compute(Solver &solver, const MatrixType &A) { + solver.compute(A); +} + +template +void bench(int id, int rows, int size = Size) +{ + typedef Matrix Mat; + typedef Matrix MatDyn; + typedef Matrix MatSquare; + Mat A(rows,size); + A.setRandom(); + if(rows==size) + A = A*A.adjoint(); + BenchTimer t_llt, t_ldlt, t_lu, t_fplu, t_qr, t_cpqr, t_cod, t_fpqr, t_jsvd, t_bdcsvd; + + int svd_opt = ComputeThinU|ComputeThinV; + + int tries = 5; + int rep = 1000/size; + if(rep==0) rep = 1; +// rep = rep*rep; + + LLT llt(size); + LDLT ldlt(size); + PartialPivLU lu(size); + FullPivLU fplu(size,size); + HouseholderQR qr(A.rows(),A.cols()); + ColPivHouseholderQR cpqr(A.rows(),A.cols()); + CompleteOrthogonalDecomposition cod(A.rows(),A.cols()); + FullPivHouseholderQR fpqr(A.rows(),A.cols()); + JacobiSVD jsvd(A.rows(),A.cols()); + BDCSVD bdcsvd(A.rows(),A.cols()); + + BENCH(t_llt, tries, rep, compute_norm_equation(llt,A)); + BENCH(t_ldlt, tries, rep, compute_norm_equation(ldlt,A)); + BENCH(t_lu, tries, rep, compute_norm_equation(lu,A)); + if(size<=1000) + BENCH(t_fplu, tries, rep, compute_norm_equation(fplu,A)); + BENCH(t_qr, tries, rep, compute(qr,A)); + BENCH(t_cpqr, tries, rep, compute(cpqr,A)); + BENCH(t_cod, tries, rep, compute(cod,A)); + if(size*rows<=10000000) + BENCH(t_fpqr, tries, rep, compute(fpqr,A)); + if(size<500) // JacobiSVD is really too slow for too large matrices + BENCH(t_jsvd, tries, rep, jsvd.compute(A,svd_opt)); +// if(size*rows<=20000000) + BENCH(t_bdcsvd, tries, rep, bdcsvd.compute(A,svd_opt)); + + results["LLT"][id] = t_llt.best(); + results["LDLT"][id] = t_ldlt.best(); + results["PartialPivLU"][id] = t_lu.best(); + results["FullPivLU"][id] = t_fplu.best(); + results["HouseholderQR"][id] = t_qr.best(); + results["ColPivHouseholderQR"][id] = t_cpqr.best(); + results["CompleteOrthogonalDecomposition"][id] = t_cod.best(); + results["FullPivHouseholderQR"][id] = t_fpqr.best(); + results["JacobiSVD"][id] = t_jsvd.best(); + results["BDCSVD"][id] = t_bdcsvd.best(); +} + + +int main() +{ + labels.push_back("LLT"); + labels.push_back("LDLT"); + labels.push_back("PartialPivLU"); + labels.push_back("FullPivLU"); + labels.push_back("HouseholderQR"); + labels.push_back("ColPivHouseholderQR"); + labels.push_back("CompleteOrthogonalDecomposition"); + labels.push_back("FullPivHouseholderQR"); + labels.push_back("JacobiSVD"); + labels.push_back("BDCSVD"); + + for(int i=0; i(k,sizes[k](0),sizes[k](1)); + } + + cout.width(32); + cout << "solver/size"; + cout << " "; + for(int k=0; k=1e6) cout << "-"; + else cout << r(k); + cout << " "; + } + cout << endl; + } + + // HTML output + cout << "" << endl; + cout << "" << endl; + for(int k=0; k" << sizes[k](0) << "x" << sizes[k](1) << ""; + cout << "" << endl; + for(int i=0; i"; + ArrayXf r = (results[labels[i]]*100000.f).floor()/100.f; + for(int k=0; k=1e6) cout << ""; + else + { + cout << ""; + } + } + cout << "" << endl; + } + cout << "
solver/size
" << labels[i] << "-" << r(k); + if(i>0) + cout << " (x" << numext::round(10.f*results[labels[i]](k)/results["LLT"](k))/10.f << ")"; + if(i<4 && sizes[k](0)!=sizes[k](1)) + cout << " *"; + cout << "
" << endl; + +// cout << "LLT (ms) " << (results["LLT"]*1000.).format(fmt) << "\n"; +// cout << "LDLT (%) " << (results["LDLT"]/results["LLT"]).format(fmt) << "\n"; +// cout << "PartialPivLU (%) " << (results["PartialPivLU"]/results["LLT"]).format(fmt) << "\n"; +// cout << "FullPivLU (%) " << (results["FullPivLU"]/results["LLT"]).format(fmt) << "\n"; +// cout << "HouseholderQR (%) " << (results["HouseholderQR"]/results["LLT"]).format(fmt) << "\n"; +// cout << "ColPivHouseholderQR (%) " << (results["ColPivHouseholderQR"]/results["LLT"]).format(fmt) << "\n"; +// cout << "CompleteOrthogonalDecomposition (%) " << (results["CompleteOrthogonalDecomposition"]/results["LLT"]).format(fmt) << "\n"; +// cout << "FullPivHouseholderQR (%) " << (results["FullPivHouseholderQR"]/results["LLT"]).format(fmt) << "\n"; +// cout << "JacobiSVD (%) " << (results["JacobiSVD"]/results["LLT"]).format(fmt) << "\n"; +// cout << "BDCSVD (%) " << (results["BDCSVD"]/results["LLT"]).format(fmt) << "\n"; +} diff --git a/external/eigen3/bench/eig33.cpp b/external/eigen3/bench/eig33.cpp index 1608b99..47947a9 100644 --- a/external/eigen3/bench/eig33.cpp +++ b/external/eigen3/bench/eig33.cpp @@ -50,7 +50,7 @@ inline void computeRoots(const Matrix& m, Roots& roots) { typedef typename Matrix::Scalar Scalar; const Scalar s_inv3 = 1.0/3.0; - const Scalar s_sqrt3 = internal::sqrt(Scalar(3.0)); + const Scalar s_sqrt3 = std::sqrt(Scalar(3.0)); // The characteristic equation is x^3 - c2*x^2 + c1*x - c0 = 0. The // eigenvalues are the roots to this equation, all guaranteed to be @@ -73,23 +73,13 @@ inline void computeRoots(const Matrix& m, Roots& roots) q = Scalar(0); // Compute the eigenvalues by solving for the roots of the polynomial. - Scalar rho = internal::sqrt(-a_over_3); - Scalar theta = std::atan2(internal::sqrt(-q),half_b)*s_inv3; - Scalar cos_theta = internal::cos(theta); - Scalar sin_theta = internal::sin(theta); - roots(0) = c2_over_3 + Scalar(2)*rho*cos_theta; - roots(1) = c2_over_3 - rho*(cos_theta + s_sqrt3*sin_theta); - roots(2) = c2_over_3 - rho*(cos_theta - s_sqrt3*sin_theta); - - // Sort in increasing order. - if (roots(0) >= roots(1)) - std::swap(roots(0),roots(1)); - if (roots(1) >= roots(2)) - { - std::swap(roots(1),roots(2)); - if (roots(0) >= roots(1)) - std::swap(roots(0),roots(1)); - } + Scalar rho = std::sqrt(-a_over_3); + Scalar theta = std::atan2(std::sqrt(-q),half_b)*s_inv3; + Scalar cos_theta = std::cos(theta); + Scalar sin_theta = std::sin(theta); + roots(2) = c2_over_3 + Scalar(2)*rho*cos_theta; + roots(0) = c2_over_3 - rho*(cos_theta + s_sqrt3*sin_theta); + roots(1) = c2_over_3 - rho*(cos_theta - s_sqrt3*sin_theta); } template @@ -99,9 +89,12 @@ void eigen33(const Matrix& mat, Matrix& evecs, Vector& evals) // Scale the matrix so its entries are in [-1,1]. The scaling is applied // only when at least one matrix entry has magnitude larger than 1. - Scalar scale = mat.cwiseAbs()/*.template triangularView()*/.maxCoeff(); + Scalar shift = mat.trace()/3; + Matrix scaledMat = mat; + scaledMat.diagonal().array() -= shift; + Scalar scale = scaledMat.cwiseAbs()/*.template triangularView()*/.maxCoeff(); scale = std::max(scale,Scalar(1)); - Matrix scaledMat = mat / scale; + scaledMat/=scale; // Compute the eigenvalues // scaledMat.setZero(); @@ -166,6 +159,7 @@ void eigen33(const Matrix& mat, Matrix& evecs, Vector& evals) // Rescale back to the original size. evals *= scale; + evals.array()+=shift; } int main() @@ -173,24 +167,29 @@ int main() BenchTimer t; int tries = 10; int rep = 400000; - typedef Matrix3f Mat; - typedef Vector3f Vec; + typedef Matrix3d Mat; + typedef Vector3d Vec; Mat A = Mat::Random(3,3); A = A.adjoint() * A; +// Mat Q = A.householderQr().householderQ(); +// A = Q * Vec(2.2424567,2.2424566,7.454353).asDiagonal() * Q.transpose(); SelfAdjointEigenSolver eig(A); BENCH(t, tries, rep, eig.compute(A)); - std::cout << "Eigen: " << t.best() << "s\n"; + std::cout << "Eigen iterative: " << t.best() << "s\n"; + + BENCH(t, tries, rep, eig.computeDirect(A)); + std::cout << "Eigen direct : " << t.best() << "s\n"; Mat evecs; Vec evals; BENCH(t, tries, rep, eigen33(A,evecs,evals)); std::cout << "Direct: " << t.best() << "s\n\n"; - std::cerr << "Eigenvalue/eigenvector diffs:\n"; - std::cerr << (evals - eig.eigenvalues()).transpose() << "\n"; - for(int k=0;k<3;++k) - if(evecs.col(k).dot(eig.eigenvectors().col(k))<0) - evecs.col(k) = -evecs.col(k); - std::cerr << evecs - eig.eigenvectors() << "\n\n"; +// std::cerr << "Eigenvalue/eigenvector diffs:\n"; +// std::cerr << (evals - eig.eigenvalues()).transpose() << "\n"; +// for(int k=0;k<3;++k) +// if(evecs.col(k).dot(eig.eigenvectors().col(k))<0) +// evecs.col(k) = -evecs.col(k); +// std::cerr << evecs - eig.eigenvectors() << "\n\n"; } diff --git a/external/eigen3/bench/perf_monitoring/gemm/changesets.txt b/external/eigen3/bench/perf_monitoring/gemm/changesets.txt new file mode 100644 index 0000000..af8eb9b --- /dev/null +++ b/external/eigen3/bench/perf_monitoring/gemm/changesets.txt @@ -0,0 +1,61 @@ +#3.0.1 +#3.1.1 +#3.2.0 +3.2.4 +#5745:37f59e65eb6c +5891:d8652709345d # introduce AVX +#5893:24b4dc92c6d3 # merge +5895:997c2ef9fc8b # introduce FMA +#5904:e1eafd14eaa1 # complex and AVX +5908:f8ee3c721251 # improve packing with ptranspose +#5921:ca808bb456b0 # merge +#5927:8b1001f9e3ac +5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks +#5949:f3488f4e45b2 # merge +#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec +#5992:4a429f5e0483 # merge +before-evaluators +#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products +#6639:c9121c60b5c7 +#6655:06f163b5221f # Properly detect FMA support on ARM +#6677:700e023044e7 # FMA has been wrongly disabled +#6681:11d31dafb0e3 +#6699:5e6e8e10aad1 # merge default to tensors +#6726:ff2d2388e7b9 # merge default to tensors +#6742:0cbd6195e829 # merge default to tensors +#6747:853d2bafeb8f # Generalized the gebp apis +6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation +#6781:9cc5a931b2c6 # generalized gemv +#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product +#6844:039efd86b75c # merge tensor +6845:7333ed40c6ef # change prefetching in gebp +#6856:b5be5e10eb7f # merge index conversion +#6893:c3a64aba7c70 # clean blocking size computation +#6898:6fb31ebe6492 # rotating kernel for ARM +6899:877facace746 # rotating kernel for ARM only +#6904:c250623ae9fa # result_of +6921:915f1b1fc158 # fix prefetching change for ARM +6923:9ff25f6dacc6 # prefetching +6933:52572e60b5d3 # blocking size strategy +6937:c8c042f286b2 # avoid redundant pack_rhs +6981:7e5d6f78da59 # dynamic loop swapping +6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache +6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1. +7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5) +7015:8aad8f35c955 # Refactor computeProductBlockingSizes to make room for the possibility of using lookup tables +7016:a58d253e8c91 # Polish lookup tables generation +7018:9b27294a8186 # actual_panel_rows computation should always be resilient to parameters not consistent with the known L1 cache size, see comment +7019:c758b1e2c073 # Provide a empirical lookup table for blocking sizes measured on a Nexus 5. Only for float, only for Android on ARM 32bit for now. +7085:627e039fba68 # Bug 986: add support for coefficient-based product with 0 depth. +7098:b6f1db9cf9ec # Bug 992: don't select a 3p GEMM path with non-vectorizable scalar types, this hits unsupported paths in symm/triangular products code +7591:09a8e2186610 # 3.3-alpha1 +7650:b0f3c8f43025 # help clang inlining +#8744:74b789ada92a # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs) +8789:efcb912e4356 # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes +8972:81d53c711775 # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path +8985:d935df21a082 # Remove the rotating kernel. +8988:6c2dc56e73b3 # Bug 256: enable vectorization with unaligned loads/stores. +9148:b8b8c421e36c # Relax mixing-type constraints for binary coefficient-wise operators +9174:d228bc282ac9 # merge +9212:c90098affa7b # Fix performance regression introduced in changeset 8aad8f35c955 +9213:9f1c14e4694b # Fix performance regression in dgemm introduced by changeset 81d53c711775 diff --git a/external/eigen3/bench/perf_monitoring/gemm/gemm.cpp b/external/eigen3/bench/perf_monitoring/gemm/gemm.cpp new file mode 100644 index 0000000..614bd47 --- /dev/null +++ b/external/eigen3/bench/perf_monitoring/gemm/gemm.cpp @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +typedef Matrix Mat; + +EIGEN_DONT_INLINE +void gemm(const Mat &A, const Mat &B, Mat &C) +{ + C.noalias() += A * B; +} + +EIGEN_DONT_INLINE +double bench(long m, long n, long k) +{ + Mat A(m,k); + Mat B(k,n); + Mat C(m,n); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e8*4/sizeof(Scalar); + double tm0 = 4, tm1 = 10; + if(NumTraits::IsComplex) + { + up /= 4; + tm0 = 2; + tm1 = 4; + } + + double flops = 2. * m * n * k; + long rep = std::max(1., std::min(100., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, gemm(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +int main(int argc, char **argv) +{ + std::vector results; + + std::ifstream settings("gemm_settings.txt"); + long m, n, k; + while(settings >> m >> n >> k) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench(m, n, k) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/external/eigen3/bench/perf_monitoring/gemm/gemm_settings.txt b/external/eigen3/bench/perf_monitoring/gemm/gemm_settings.txt new file mode 100644 index 0000000..5c43e1c --- /dev/null +++ b/external/eigen3/bench/perf_monitoring/gemm/gemm_settings.txt @@ -0,0 +1,15 @@ +8 8 8 +9 9 9 +24 24 24 +239 239 239 +240 240 240 +2400 24 24 +24 2400 24 +24 24 2400 +24 2400 2400 +2400 24 2400 +2400 2400 24 +2400 2400 64 +4800 23 160 +23 4800 160 +2400 2400 2400 diff --git a/external/eigen3/bench/perf_monitoring/gemm/lazy_gemm.cpp b/external/eigen3/bench/perf_monitoring/gemm/lazy_gemm.cpp new file mode 100644 index 0000000..6dc3701 --- /dev/null +++ b/external/eigen3/bench/perf_monitoring/gemm/lazy_gemm.cpp @@ -0,0 +1,98 @@ +#include +#include +#include +#include +#include "../../BenchTimer.h" +using namespace Eigen; + +#ifndef SCALAR +#error SCALAR must be defined +#endif + +typedef SCALAR Scalar; + +template +EIGEN_DONT_INLINE +void lazy_gemm(const MatA &A, const MatB &B, MatC &C) +{ +// escape((void*)A.data()); +// escape((void*)B.data()); + C.noalias() += A.lazyProduct(B); +// escape((void*)C.data()); +} + +template +EIGEN_DONT_INLINE +double bench() +{ + typedef Matrix MatA; + typedef Matrix MatB; + typedef Matrix MatC; + + MatA A(m,k); + MatB B(k,n); + MatC C(m,n); + A.setRandom(); + B.setRandom(); + C.setZero(); + + BenchTimer t; + + double up = 1e7*4/sizeof(Scalar); + double tm0 = 10, tm1 = 20; + + double flops = 2. * m * n * k; + long rep = std::max(10., std::min(10000., up/flops) ); + long tries = std::max(tm0, std::min(tm1, up/flops) ); + + BENCH(t, tries, rep, lazy_gemm(A,B,C)); + + return 1e-9 * rep * flops / t.best(); +} + +template +double bench_t(int t) +{ + if(t) + return bench(); + else + return bench(); +} + +EIGEN_DONT_INLINE +double bench_mnk(int m, int n, int k, int t) +{ + int id = m*10000 + n*100 + k; + switch(id) { + case 10101 : return bench_t< 1, 1, 1>(t); break; + case 20202 : return bench_t< 2, 2, 2>(t); break; + case 30303 : return bench_t< 3, 3, 3>(t); break; + case 40404 : return bench_t< 4, 4, 4>(t); break; + case 50505 : return bench_t< 5, 5, 5>(t); break; + case 60606 : return bench_t< 6, 6, 6>(t); break; + case 70707 : return bench_t< 7, 7, 7>(t); break; + case 80808 : return bench_t< 8, 8, 8>(t); break; + case 90909 : return bench_t< 9, 9, 9>(t); break; + case 101010 : return bench_t<10,10,10>(t); break; + case 111111 : return bench_t<11,11,11>(t); break; + case 121212 : return bench_t<12,12,12>(t); break; + } + return 0; +} + +int main(int argc, char **argv) +{ + std::vector results; + + std::ifstream settings("lazy_gemm_settings.txt"); + long m, n, k, t; + while(settings >> m >> n >> k >> t) + { + //std::cerr << " Testing " << m << " " << n << " " << k << std::endl; + results.push_back( bench_mnk(m, n, k, t) ); + } + + std::cout << RowVectorXd::Map(results.data(), results.size()); + + return 0; +} diff --git a/external/eigen3/bench/perf_monitoring/gemm/lazy_gemm_settings.txt b/external/eigen3/bench/perf_monitoring/gemm/lazy_gemm_settings.txt new file mode 100644 index 0000000..407d5d4 --- /dev/null +++ b/external/eigen3/bench/perf_monitoring/gemm/lazy_gemm_settings.txt @@ -0,0 +1,15 @@ +1 1 1 0 +2 2 2 0 +3 3 3 0 +4 4 4 0 +4 4 4 1 +5 5 5 0 +6 6 6 0 +7 7 7 0 +7 7 7 1 +8 8 8 0 +9 9 9 0 +10 10 10 0 +11 11 11 0 +12 12 12 0 +12 12 12 1 diff --git a/external/eigen3/bench/perf_monitoring/gemm/make_plot.sh b/external/eigen3/bench/perf_monitoring/gemm/make_plot.sh new file mode 100755 index 0000000..cd3214a --- /dev/null +++ b/external/eigen3/bench/perf_monitoring/gemm/make_plot.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# base name of the bench +# it reads $1.out +# and generates $1.pdf +WHAT=$1 +bench=$2 + +header="rev " +while read line +do + if [ ! -z '$line' ]; then + header="$header \"$line\"" + fi +done < $bench"_settings.txt" + +echo $header > $WHAT.out.header +cat $WHAT.out >> $WHAT.out.header + + +echo "set title '$WHAT'" > $WHAT.gnuplot +echo "set key autotitle columnhead outside " >> $WHAT.gnuplot +echo "set xtics rotate 1" >> $WHAT.gnuplot + +echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot +echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot + +col=`cat $bench"_settings.txt" | wc -l` +echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot +echo " " >> $WHAT.gnuplot + +gnuplot -persist < $WHAT.gnuplot + +# generate a png file +# convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 $WHAT.ps -background white -flatten .$WHAT.png + +# clean +rm $WHAT.out.header $WHAT.gnuplot \ No newline at end of file diff --git a/external/eigen3/bench/perf_monitoring/gemm/run.sh b/external/eigen3/bench/perf_monitoring/gemm/run.sh new file mode 100755 index 0000000..9d6ee40 --- /dev/null +++ b/external/eigen3/bench/perf_monitoring/gemm/run.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# ./run.sh gemm +# ./run.sh lazy_gemm + +# Examples of environment variables to be set: +# PREFIX="haswell-fma-" +# CXX_FLAGS="-mfma" + +# Options: +# -up : enforce the recomputation of existing data, and keep best results as a merging strategy +# -s : recompute selected changesets only and keep bests + +bench=$1 + +if echo "$*" | grep '\-up' > /dev/null; then + update=true +else + update=false +fi + +if echo "$*" | grep '\-s' > /dev/null; then + selected=true +else + selected=false +fi + +global_args="$*" + +if [ $selected == true ]; then + echo "Recompute selected changesets only and keep bests" +elif [ $update == true ]; then + echo "(Re-)Compute all changesets and keep bests" +else + echo "Skip previously computed changesets" +fi + + + +if [ ! -d "eigen_src" ]; then + hg clone https://bitbucket.org/eigen/eigen eigen_src +else + cd eigen_src + hg pull -u + cd .. +fi + +if [ ! -z '$CXX' ]; then + CXX=g++ +fi + +function make_backup +{ + if [ -f "$1.out" ]; then + mv "$1.out" "$1.backup" + fi +} + +function merge +{ + count1=`echo $1 | wc -w` + count2=`echo $2 | wc -w` + + if [ $count1 == $count2 ]; then + a=( $1 ); b=( $2 ) + res="" + for (( i=0 ; i<$count1 ; i++ )); do + ai=${a[$i]}; bi=${b[$i]} + tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l` + res="$res $tmp" + done + echo $res + + else + echo $1 + fi +} + +function test_current +{ + rev=$1 + scalar=$2 + name=$3 + + prev="" + if [ -e "$name.backup" ]; then + prev=`grep $rev "$name.backup" | cut -c 14-` + fi + res=$prev + count_rev=`echo $prev | wc -w` + count_ref=`cat $bench"_settings.txt" | wc -l` + if echo "$global_args" | grep "$rev" > /dev/null; then + rev_found=true + else + rev_found=false + fi +# echo $update et $selected et $rev_found because $rev et "$global_args" +# echo $count_rev et $count_ref + if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then + if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src $bench.cpp -DSCALAR=$scalar -o $name; then + curr=`./$name` + if [ $count_rev == $count_ref ]; then + echo "merge previous $prev" + echo "with new $curr" + else + echo "got $curr" + fi + res=`merge "$curr" "$prev"` +# echo $res + echo "$rev $res" >> $name.out + else + echo "Compilation failed, skip rev $rev" + fi + else + echo "Skip existing results for $rev / $name" + echo "$rev $res" >> $name.out + fi +} + +make_backup $PREFIX"s"$bench +make_backup $PREFIX"d"$bench +make_backup $PREFIX"c"$bench + +cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev +do + if [ ! -z '$rev' ]; then + echo "Testing rev $rev" + cd eigen_src + hg up -C $rev > /dev/null + actual_rev=`hg identify | cut -f1 -d' '` + cd .. + + test_current $actual_rev float $PREFIX"s"$bench + test_current $actual_rev double $PREFIX"d"$bench + test_current $actual_rev "std::complex" $PREFIX"c"$bench + fi + +done + +echo "Float:" +cat $PREFIX"s""$bench.out" +echo " " + +echo "Double:" +cat $PREFIX"d""$bench.out" +echo "" + +echo "Complex:" +cat $PREFIX"c""$bench.out" +echo "" + +./make_plot.sh $PREFIX"s"$bench $bench +./make_plot.sh $PREFIX"d"$bench $bench +./make_plot.sh $PREFIX"c"$bench $bench + + diff --git a/external/eigen3/bench/spbench/CMakeLists.txt b/external/eigen3/bench/spbench/CMakeLists.txt index 6e0e1b1..9327356 100644 --- a/external/eigen3/bench/spbench/CMakeLists.txt +++ b/external/eigen3/bench/spbench/CMakeLists.txt @@ -29,7 +29,7 @@ if(UMFPACK_FOUND AND BLAS_FOUND) set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${BLAS_LIBRARIES}) endif() -find_package(SuperLU) +find_package(SuperLU 4.0) if(SUPERLU_FOUND AND BLAS_FOUND) add_definitions("-DEIGEN_SUPERLU_SUPPORT") include_directories(${SUPERLU_INCLUDES}) @@ -38,25 +38,32 @@ if(SUPERLU_FOUND AND BLAS_FOUND) endif() -find_package(Pastix) -find_package(Scotch) -find_package(Metis) -if(PASTIX_FOUND AND BLAS_FOUND) +find_package(PASTIX QUIET COMPONENTS METIS SCOTCH) +# check that the PASTIX found is a version without MPI +find_path(PASTIX_pastix_nompi.h_INCLUDE_DIRS + NAMES pastix_nompi.h + HINTS ${PASTIX_INCLUDE_DIRS} +) +if (NOT PASTIX_pastix_nompi.h_INCLUDE_DIRS) + message(STATUS "A version of Pastix has been found but pastix_nompi.h does not exist in the include directory." + " Because Eigen tests require a version without MPI, we disable the Pastix backend.") +endif() +if(PASTIX_FOUND AND PASTIX_pastix_nompi.h_INCLUDE_DIRS AND BLAS_FOUND) add_definitions("-DEIGEN_PASTIX_SUPPORT") - include_directories(${PASTIX_INCLUDES}) + include_directories(${PASTIX_INCLUDE_DIRS_DEP}) if(SCOTCH_FOUND) - include_directories(${SCOTCH_INCLUDES}) + include_directories(${SCOTCH_INCLUDE_DIRS}) set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${SCOTCH_LIBRARIES}) elseif(METIS_FOUND) - include_directories(${METIS_INCLUDES}) + include_directories(${METIS_INCLUDE_DIRS}) set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${METIS_LIBRARIES}) endif(SCOTCH_FOUND) - set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES} ${ORDERING_LIBRARIES} ${BLAS_LIBRARIES}) - set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES} ${BLAS_LIBRARIES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES_DEP} ${ORDERING_LIBRARIES}) + set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES_DEP}) endif(PASTIX_FOUND AND BLAS_FOUND) if(METIS_FOUND) - include_directories(${METIS_INCLUDES}) + include_directories(${METIS_INCLUDE_DIRS}) set (SPARSE_LIBS ${SPARSE_LIBS} ${METIS_LIBRARIES}) add_definitions("-DEIGEN_METIS_SUPPORT") endif(METIS_FOUND) diff --git a/external/eigen3/bench/spbench/spbenchstyle.h b/external/eigen3/bench/spbench/spbenchstyle.h index 17a05ce..f6a9817 100644 --- a/external/eigen3/bench/spbench/spbenchstyle.h +++ b/external/eigen3/bench/spbench/spbenchstyle.h @@ -91,4 +91,5 @@ void printBenchStyle(std::ofstream& out) \n\n"; } -#endif \ No newline at end of file + +#endif diff --git a/external/eigen3/bench/tensors/README b/external/eigen3/bench/tensors/README new file mode 100644 index 0000000..3a5fdbe --- /dev/null +++ b/external/eigen3/bench/tensors/README @@ -0,0 +1,21 @@ +The tensor benchmark suite is made of several parts. + +The first part is a generic suite, in which each benchmark comes in 2 flavors: one that runs on CPU, and one that runs on GPU. + +To compile the floating point CPU benchmarks, simply call: +g++ tensor_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu + +To compile the floating point GPU benchmarks, simply call: +nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -use_fast_math -ftz=true -arch compute_35 -o benchmarks_gpu + +We also provide a version of the generic GPU tensor benchmarks that uses half floats (aka fp16) instead of regular floats. To compile these benchmarks, simply call the command line below. You'll need a recent GPU that supports compute capability 5.3 or higher to run them and nvcc 7.5 or higher to compile the code. +nvcc tensor_benchmarks_fp16_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -use_fast_math -ftz=true -arch compute_53 -o benchmarks_fp16_gpu + +last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call +g++ contraction_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu + +To compile the benchmark for SYCL, using ComputeCpp you currently need 2 passes (only for translation units containing device code): +1. The device compilation pass that generates the device code (SYCL kernels and referenced device functions) and glue code needed by the host compiler to reference the device code from host code. +{ComputeCpp_ROOT}/bin/compute++ -I ../../ -I {ComputeCpp_ROOT}/include/ -std=c++11 -mllvm -inline-threshold=1000 -Wno-ignored-attributes -sycl -intelspirmetadata -emit-llvm -no-serial-memop -sycl-compress-name -DBUILD_PLATFORM_SPIR -DNDBUG -O3 -c tensor_benchmarks_sycl.cc +2. The host compilation pass that generates the final host binary. +clang++-3.7 -include tensor_benchmarks_sycl.sycl benchmark_main.cc tensor_benchmarks_sycl.cc -pthread -I ../../ -I {ComputeCpp_ROOT}/include/ -L {ComputeCpp_ROOT}/lib/ -lComputeCpp -lOpenCL -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -o tensor_benchmark_sycl diff --git a/external/eigen3/bench/tensors/benchmark.h b/external/eigen3/bench/tensors/benchmark.h new file mode 100644 index 0000000..f115b54 --- /dev/null +++ b/external/eigen3/bench/tensors/benchmark.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +namespace testing { +class Benchmark { + public: + Benchmark(const char* name, void (*fn)(int)) { + Register(name, fn, NULL); + } + Benchmark(const char* name, void (*fn_range)(int, int)) { + Register(name, NULL, fn_range); + } + Benchmark* Arg(int x); + Benchmark* Range(int lo, int hi); + const char* Name(); + bool ShouldRun(int argc, char* argv[]); + void Run(); + private: + const char* name_; + void (*fn_)(int); + void (*fn_range_)(int, int); + std::vector args_; + void Register(const char* name, void (*fn)(int), void (*fn_range)(int, int)); + void RunRepeatedlyWithArg(int iterations, int arg); + void RunWithArg(int arg); +}; +} // namespace testing +void SetBenchmarkFlopsProcessed(int64_t); +void StopBenchmarkTiming(); +void StartBenchmarkTiming(); +#define BENCHMARK(f) \ + static ::testing::Benchmark* _benchmark_##f __attribute__((unused)) = \ + (new ::testing::Benchmark(#f, f)) diff --git a/external/eigen3/bench/tensors/benchmark_main.cc b/external/eigen3/bench/tensors/benchmark_main.cc new file mode 100644 index 0000000..1efa0db --- /dev/null +++ b/external/eigen3/bench/tensors/benchmark_main.cc @@ -0,0 +1,237 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "benchmark.h" +#include +#include +#include +#include +#include +#include +#include +#include + +static int64_t g_flops_processed; +static int64_t g_benchmark_total_time_ns; +static int64_t g_benchmark_start_time_ns; +typedef std::map BenchmarkMap; +typedef BenchmarkMap::iterator BenchmarkMapIt; + +BenchmarkMap& gBenchmarks() { + static BenchmarkMap g_benchmarks; + return g_benchmarks; +} + +static int g_name_column_width = 20; + +static int Round(int n) { + int base = 1; + while (base*10 < n) { + base *= 10; + } + if (n < 2*base) { + return 2*base; + } + if (n < 5*base) { + return 5*base; + } + return 10*base; +} + +#ifdef __APPLE__ + #include + static mach_timebase_info_data_t g_time_info; + static void __attribute__((constructor)) init_info() { + mach_timebase_info(&g_time_info); + } +#endif + +static int64_t NanoTime() { +#if defined(__APPLE__) + uint64_t t = mach_absolute_time(); + return t * g_time_info.numer / g_time_info.denom; +#else + struct timespec t; + t.tv_sec = t.tv_nsec = 0; + clock_gettime(CLOCK_MONOTONIC, &t); + return static_cast(t.tv_sec) * 1000000000LL + t.tv_nsec; +#endif +} + +namespace testing { +Benchmark* Benchmark::Arg(int arg) { + args_.push_back(arg); + return this; +} + +Benchmark* Benchmark::Range(int lo, int hi) { + const int kRangeMultiplier = 8; + if (hi < lo) { + int temp = hi; + hi = lo; + lo = temp; + } + while (lo < hi) { + args_.push_back(lo); + lo *= kRangeMultiplier; + } + // We always run the hi number. + args_.push_back(hi); + return this; +} + +const char* Benchmark::Name() { + return name_; +} +bool Benchmark::ShouldRun(int argc, char* argv[]) { + if (argc == 1) { + return true; // With no arguments, we run all benchmarks. + } + // Otherwise, we interpret each argument as a regular expression and + // see if any of our benchmarks match. + for (int i = 1; i < argc; i++) { + regex_t re; + if (regcomp(&re, argv[i], 0) != 0) { + fprintf(stderr, "couldn't compile \"%s\" as a regular expression!\n", argv[i]); + exit(EXIT_FAILURE); + } + int match = regexec(&re, name_, 0, NULL, 0); + regfree(&re); + if (match != REG_NOMATCH) { + return true; + } + } + return false; +} +void Benchmark::Register(const char* name, void (*fn)(int), void (*fn_range)(int, int)) { + name_ = name; + fn_ = fn; + fn_range_ = fn_range; + if (fn_ == NULL && fn_range_ == NULL) { + fprintf(stderr, "%s: missing function\n", name_); + exit(EXIT_FAILURE); + } + gBenchmarks().insert(std::make_pair(name, this)); +} +void Benchmark::Run() { + if (fn_ != NULL) { + RunWithArg(0); + } else { + if (args_.empty()) { + fprintf(stderr, "%s: no args!\n", name_); + exit(EXIT_FAILURE); + } + for (size_t i = 0; i < args_.size(); ++i) { + RunWithArg(args_[i]); + } + } +} +void Benchmark::RunRepeatedlyWithArg(int iterations, int arg) { + g_flops_processed = 0; + g_benchmark_total_time_ns = 0; + g_benchmark_start_time_ns = NanoTime(); + if (fn_ != NULL) { + fn_(iterations); + } else { + fn_range_(iterations, arg); + } + if (g_benchmark_start_time_ns != 0) { + g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns; + } +} +void Benchmark::RunWithArg(int arg) { + // run once in case it's expensive + int iterations = 1; + RunRepeatedlyWithArg(iterations, arg); + while (g_benchmark_total_time_ns < 1e9 && iterations < 1e9) { + int last = iterations; + if (g_benchmark_total_time_ns/iterations == 0) { + iterations = 1e9; + } else { + iterations = 1e9 / (g_benchmark_total_time_ns/iterations); + } + iterations = std::max(last + 1, std::min(iterations + iterations/2, 100*last)); + iterations = Round(iterations); + RunRepeatedlyWithArg(iterations, arg); + } + char throughput[100]; + throughput[0] = '\0'; + if (g_benchmark_total_time_ns > 0 && g_flops_processed > 0) { + double mflops_processed = static_cast(g_flops_processed)/1e6; + double seconds = static_cast(g_benchmark_total_time_ns)/1e9; + snprintf(throughput, sizeof(throughput), " %8.2f MFlops/s", mflops_processed/seconds); + } + char full_name[100]; + if (fn_range_ != NULL) { + if (arg >= (1<<20)) { + snprintf(full_name, sizeof(full_name), "%s/%dM", name_, arg/(1<<20)); + } else if (arg >= (1<<10)) { + snprintf(full_name, sizeof(full_name), "%s/%dK", name_, arg/(1<<10)); + } else { + snprintf(full_name, sizeof(full_name), "%s/%d", name_, arg); + } + } else { + snprintf(full_name, sizeof(full_name), "%s", name_); + } + printf("%-*s %10d %10" PRId64 "%s\n", g_name_column_width, full_name, + iterations, g_benchmark_total_time_ns/iterations, throughput); + fflush(stdout); +} +} // namespace testing +void SetBenchmarkFlopsProcessed(int64_t x) { + g_flops_processed = x; +} +void StopBenchmarkTiming() { + if (g_benchmark_start_time_ns != 0) { + g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns; + } + g_benchmark_start_time_ns = 0; +} +void StartBenchmarkTiming() { + if (g_benchmark_start_time_ns == 0) { + g_benchmark_start_time_ns = NanoTime(); + } +} +int main(int argc, char* argv[]) { + if (gBenchmarks().empty()) { + fprintf(stderr, "No benchmarks registered!\n"); + exit(EXIT_FAILURE); + } + for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) { + int name_width = static_cast(strlen(it->second->Name())); + g_name_column_width = std::max(g_name_column_width, name_width); + } + bool need_header = true; + for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) { + ::testing::Benchmark* b = it->second; + if (b->ShouldRun(argc, argv)) { + if (need_header) { + printf("%-*s %10s %10s\n", g_name_column_width, "", "iterations", "ns/op"); + fflush(stdout); + need_header = false; + } + b->Run(); + } + } + if (need_header) { + fprintf(stderr, "No matching benchmarks!\n"); + fprintf(stderr, "Available benchmarks:\n"); + for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) { + fprintf(stderr, " %s\n", it->second->Name()); + } + exit(EXIT_FAILURE); + } + return 0; +} diff --git a/external/eigen3/bench/tensors/contraction_benchmarks_cpu.cc b/external/eigen3/bench/tensors/contraction_benchmarks_cpu.cc new file mode 100644 index 0000000..f9e57ad --- /dev/null +++ b/external/eigen3/bench/tensors/contraction_benchmarks_cpu.cc @@ -0,0 +1,39 @@ +#define EIGEN_USE_THREADS + +#include + +#include "tensor_benchmarks.h" + +#define CREATE_THREAD_POOL(threads) \ +Eigen::ThreadPool pool(threads); \ +Eigen::ThreadPoolDevice device(&pool, threads); + + +// Contractions for number of threads ranging from 1 to 32 +// Dimensions are Rows, Cols, Depth +#define BM_ContractionCPU(D1, D2, D3) \ + static void BM_##Contraction##_##D1##x##D2##x##D3(int iters, int Threads) { \ + StopBenchmarkTiming(); \ + CREATE_THREAD_POOL(Threads); \ + BenchmarkSuite suite(device, D1, D2, D3); \ + suite.contraction(iters); \ + } \ + BENCHMARK_RANGE(BM_##Contraction##_##D1##x##D2##x##D3, 1, 32); + + +// Vector Matrix and Matrix Vector products +BM_ContractionCPU(1, 2000, 500); +BM_ContractionCPU(2000, 1, 500); + +// Various skinny matrices +BM_ContractionCPU(250, 3, 512); +BM_ContractionCPU(1500, 3, 512); + +BM_ContractionCPU(512, 800, 4); +BM_ContractionCPU(512, 80, 800); +BM_ContractionCPU(512, 80, 13522); +BM_ContractionCPU(1, 80, 13522); + +BM_ContractionCPU(3200, 512, 4); +BM_ContractionCPU(3200, 512, 80); +BM_ContractionCPU(3200, 80, 512); diff --git a/external/eigen3/bench/tensors/tensor_benchmarks.h b/external/eigen3/bench/tensors/tensor_benchmarks.h new file mode 100644 index 0000000..c2fb3de --- /dev/null +++ b/external/eigen3/bench/tensors/tensor_benchmarks.h @@ -0,0 +1,478 @@ +#ifndef THIRD_PARTY_EIGEN3_TENSOR_BENCHMARKS_H_ +#define THIRD_PARTY_EIGEN3_TENSOR_BENCHMARKS_H_ + +typedef int TensorIndex; +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int + +#include "unsupported/Eigen/CXX11/Tensor" +#include "benchmark.h" + +#define BENCHMARK_RANGE(bench, lo, hi) \ + BENCHMARK(bench)->Range(lo, hi) + +using Eigen::Tensor; +using Eigen::TensorMap; + +// TODO(bsteiner): also templatize on the input type since we have users +// for int8 as well as floats. +template class BenchmarkSuite { + public: + BenchmarkSuite(const Device& device, size_t m, size_t k, size_t n) + : m_(m), k_(k), n_(n), device_(device) { + initialize(); + } + + BenchmarkSuite(const Device& device, size_t m) + : m_(m), k_(m), n_(m), device_(device) { + initialize(); + } + + ~BenchmarkSuite() { + device_.deallocate(a_); + device_.deallocate(b_); + device_.deallocate(c_); + } + + void memcpy(int num_iters) { + eigen_assert(m_ == k_ && k_ == n_); + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + device_.memcpy(c_, a_, m_ * m_ * sizeof(T)); + } + // Record the number of values copied per second + finalizeBenchmark(static_cast(m_) * m_ * num_iters); + } + + void typeCasting(int num_iters) { + eigen_assert(m_ == n_); + Eigen::array sizes; + if (sizeof(T) >= sizeof(int)) { + sizes[0] = m_; + sizes[1] = k_; + } else { + sizes[0] = m_ * sizeof(T) / sizeof(int); + sizes[1] = k_ * sizeof(T) / sizeof(int); + } + const TensorMap, Eigen::Aligned> A((int*)a_, sizes); + TensorMap, Eigen::Aligned> B(b_, sizes); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + B.device(device_) = A.template cast(); + } + // Record the number of values copied per second + finalizeBenchmark(static_cast(m_) * k_ * num_iters); + } + + void random(int num_iters) { + eigen_assert(m_ == k_ && k_ == n_); + Eigen::array sizes; + sizes[0] = m_; + sizes[1] = m_; + TensorMap, Eigen::Aligned> C(c_, sizes); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = C.random(); + } + // Record the number of random numbers generated per second + finalizeBenchmark(static_cast(m_) * m_ * num_iters); + } + + void slicing(int num_iters) { + eigen_assert(m_ == k_ && k_ == n_); + Eigen::array sizes; + sizes[0] = m_; + sizes[1] = m_; + const TensorMap, Eigen::Aligned> A(a_, sizes); + const TensorMap, Eigen::Aligned> B(b_, sizes); + TensorMap, Eigen::Aligned> C(c_, sizes); + + const Eigen::DSizes quarter_sizes(m_/2, m_/2); + const Eigen::DSizes first_quadrant(0, 0); + const Eigen::DSizes second_quadrant(0, m_/2); + const Eigen::DSizes third_quadrant(m_/2, 0); + const Eigen::DSizes fourth_quadrant(m_/2, m_/2); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.slice(first_quadrant, quarter_sizes).device(device_) = + A.slice(first_quadrant, quarter_sizes); + C.slice(second_quadrant, quarter_sizes).device(device_) = + B.slice(second_quadrant, quarter_sizes); + C.slice(third_quadrant, quarter_sizes).device(device_) = + A.slice(third_quadrant, quarter_sizes); + C.slice(fourth_quadrant, quarter_sizes).device(device_) = + B.slice(fourth_quadrant, quarter_sizes); + } + // Record the number of values copied from the rhs slice to the lhs slice + // each second + finalizeBenchmark(static_cast(m_) * m_ * num_iters); + } + + void rowChip(int num_iters) { + Eigen::array input_size; + input_size[0] = k_; + input_size[1] = n_; + const TensorMap, Eigen::Aligned> B(b_, input_size); + Eigen::array output_size; + output_size[0] = n_; + TensorMap, Eigen::Aligned> C(c_, output_size); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.chip(iter % k_, 0); + } + // Record the number of values copied from the rhs chip to the lhs. + finalizeBenchmark(static_cast(n_) * num_iters); + } + + void colChip(int num_iters) { + Eigen::array input_size; + input_size[0] = k_; + input_size[1] = n_; + const TensorMap, Eigen::Aligned> B(b_, input_size); + Eigen::array output_size; + output_size[0] = n_; + TensorMap, Eigen::Aligned> C(c_, output_size); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.chip(iter % n_, 1); + } + // Record the number of values copied from the rhs chip to the lhs. + finalizeBenchmark(static_cast(n_) * num_iters); + } + + void shuffling(int num_iters) { + eigen_assert(m_ == n_); + Eigen::array size_a; + size_a[0] = m_; + size_a[1] = k_; + const TensorMap, Eigen::Aligned> A(a_, size_a); + Eigen::array size_b; + size_b[0] = k_; + size_b[1] = m_; + TensorMap, Eigen::Aligned> B(b_, size_b); + + Eigen::array shuffle; + shuffle[0] = 1; + shuffle[1] = 0; + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + B.device(device_) = A.shuffle(shuffle); + } + // Record the number of values shuffled from A and copied to B each second + finalizeBenchmark(static_cast(m_) * k_ * num_iters); + } + + void padding(int num_iters) { + eigen_assert(m_ == k_); + Eigen::array size_a; + size_a[0] = m_; + size_a[1] = k_-3; + const TensorMap, Eigen::Aligned> A(a_, size_a); + Eigen::array size_b; + size_b[0] = k_; + size_b[1] = m_; + TensorMap, Eigen::Aligned> B(b_, size_b); + +#if defined(EIGEN_HAS_INDEX_LIST) + Eigen::IndexPairList, + Eigen::type2indexpair<2, 1> > paddings; +#else + Eigen::array, 2> paddings; + paddings[0] = Eigen::IndexPair(0, 0); + paddings[1] = Eigen::IndexPair(2, 1); +#endif + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + B.device(device_) = A.pad(paddings); + } + // Record the number of values copied from the padded tensor A each second + finalizeBenchmark(static_cast(m_) * k_ * num_iters); + } + + void striding(int num_iters) { + eigen_assert(m_ == k_); + Eigen::array size_a; + size_a[0] = m_; + size_a[1] = k_; + const TensorMap, Eigen::Aligned> A(a_, size_a); + Eigen::array size_b; + size_b[0] = m_; + size_b[1] = k_/2; + TensorMap, Eigen::Aligned> B(b_, size_b); + +#ifndef EIGEN_HAS_INDEX_LIST + Eigen::array strides; + strides[0] = 1; + strides[1] = 2; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList, Eigen::type2index<2> > strides; +#endif + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + B.device(device_) = A.stride(strides); + } + // Record the number of values copied from the padded tensor A each second + finalizeBenchmark(static_cast(m_) * k_ * num_iters); + } + + void broadcasting(int num_iters) { + Eigen::array size_a; + size_a[0] = m_; + size_a[1] = 1; + const TensorMap, Eigen::Aligned> A(a_, size_a); + Eigen::array size_c; + size_c[0] = m_; + size_c[1] = n_; + TensorMap, Eigen::Aligned> C(c_, size_c); + +#ifndef EIGEN_HAS_INDEX_LIST + Eigen::array broadcast; + broadcast[0] = 1; + broadcast[1] = n_; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList, int> broadcast; + broadcast.set(1, n_); +#endif + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = A.broadcast(broadcast); + } + // Record the number of values broadcasted from A and copied to C each second + finalizeBenchmark(static_cast(m_) * n_ * num_iters); + } + + void coeffWiseOp(int num_iters) { + eigen_assert(m_ == k_ && k_ == n_); + Eigen::array sizes; + sizes[0] = m_; + sizes[1] = m_; + const TensorMap, Eigen::Aligned> A(a_, sizes); + const TensorMap, Eigen::Aligned> B(b_, sizes); + TensorMap, Eigen::Aligned> C(c_, sizes); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = A * A.constant(static_cast(3.14)) + B * B.constant(static_cast(2.7)); + } + // Record the number of FLOP executed per second (2 multiplications and + // 1 addition per value) + finalizeBenchmark(static_cast(3) * m_ * m_ * num_iters); + } + + void algebraicFunc(int num_iters) { + eigen_assert(m_ == k_ && k_ == n_); + Eigen::array sizes; + sizes[0] = m_; + sizes[1] = m_; + const TensorMap, Eigen::Aligned> A(a_, sizes); + const TensorMap, Eigen::Aligned> B(b_, sizes); + TensorMap, Eigen::Aligned> C(c_, sizes); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = A.rsqrt() + B.sqrt() * B.square(); + } + // Record the number of FLOP executed per second (assuming one operation + // per value) + finalizeBenchmark(static_cast(m_) * m_ * num_iters); + } + + void transcendentalFunc(int num_iters) { + eigen_assert(m_ == k_ && k_ == n_); + Eigen::array sizes; + sizes[0] = m_; + sizes[1] = m_; + const TensorMap, Eigen::Aligned> A(a_, sizes); + const TensorMap, Eigen::Aligned> B(b_, sizes); + TensorMap, Eigen::Aligned> C(c_, sizes); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = A.exp() + B.log(); + } + // Record the number of FLOP executed per second (assuming one operation + // per value) + finalizeBenchmark(static_cast(m_) * m_ * num_iters); + } + + // Row reduction + void rowReduction(int num_iters) { + Eigen::array input_size; + input_size[0] = k_; + input_size[1] = n_; + const TensorMap, Eigen::Aligned> B(b_, input_size); + Eigen::array output_size; + output_size[0] = n_; + TensorMap, Eigen::Aligned> C(c_, output_size); + +#ifndef EIGEN_HAS_INDEX_LIST + Eigen::array sum_along_dim; + sum_along_dim[0] = 0; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList> sum_along_dim; +#endif + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.sum(sum_along_dim); + } + // Record the number of FLOP executed per second (assuming one operation + // per value) + finalizeBenchmark(static_cast(k_) * n_ * num_iters); + } + + // Column reduction + void colReduction(int num_iters) { + Eigen::array input_size; + input_size[0] = k_; + input_size[1] = n_; + const TensorMap, Eigen::Aligned> B( + b_, input_size); + Eigen::array output_size; + output_size[0] = k_; + TensorMap, Eigen::Aligned> C( + c_, output_size); + +#ifndef EIGEN_HAS_INDEX_LIST + Eigen::array sum_along_dim; + sum_along_dim[0] = 1; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList> sum_along_dim; +#endif + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.sum(sum_along_dim); + } + // Record the number of FLOP executed per second (assuming one operation + // per value) + finalizeBenchmark(static_cast(k_) * n_ * num_iters); + } + + // Full reduction + void fullReduction(int num_iters) { + Eigen::array input_size; + input_size[0] = k_; + input_size[1] = n_; + const TensorMap, Eigen::Aligned> B( + b_, input_size); + Eigen::array output_size; + TensorMap, Eigen::Aligned> C( + c_, output_size); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.sum(); + } + // Record the number of FLOP executed per second (assuming one operation + // per value) + finalizeBenchmark(static_cast(k_) * n_ * num_iters); + } + + // do a contraction which is equivalent to a matrix multiplication + void contraction(int num_iters) { + Eigen::array sizeA; + sizeA[0] = m_; + sizeA[1] = k_; + Eigen::array sizeB; + sizeB[0] = k_; + sizeB[1] = n_; + Eigen::array sizeC; + sizeC[0] = m_; + sizeC[1] = n_; + + const TensorMap, Eigen::Aligned> A(a_, sizeA); + const TensorMap, Eigen::Aligned> B(b_, sizeB); + TensorMap, Eigen::Aligned> C(c_, sizeC); + + typedef typename Tensor::DimensionPair DimPair; + Eigen::array dims; + dims[0] = DimPair(1, 0); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = A.contract(B, dims); + } + // Record the number of FLOP executed per second (size_ multiplications and + // additions for each value in the resulting tensor) + finalizeBenchmark(static_cast(2) * m_ * n_ * k_ * num_iters); + } + + void convolution(int num_iters, int kernel_x, int kernel_y) { + Eigen::array input_sizes; + input_sizes[0] = m_; + input_sizes[1] = n_; + TensorMap, Eigen::Aligned> A(a_, input_sizes); + Eigen::array kernel_sizes; + kernel_sizes[0] = kernel_x; + kernel_sizes[1] = kernel_y; + TensorMap, Eigen::Aligned> B(b_, kernel_sizes); + Eigen::array result_sizes; + result_sizes[0] = m_ - kernel_x + 1; + result_sizes[1] = n_ - kernel_y + 1; + TensorMap, Eigen::Aligned> C(c_, result_sizes); + Eigen::array dims; + dims[0] = 0; + dims[1] = 1; + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = A.convolve(B, dims); + } + // Record the number of FLOP executed per second (kernel_size + // multiplications and additions for each value in the resulting tensor) + finalizeBenchmark(static_cast(2) * + (m_ - kernel_x + 1) * (n_ - kernel_y + 1) * kernel_x * kernel_y * num_iters); + } + + private: + void initialize() { + a_ = (T *) device_.allocate(m_ * k_ * sizeof(T)); + b_ = (T *) device_.allocate(k_ * n_ * sizeof(T)); + c_ = (T *) device_.allocate(m_ * n_ * sizeof(T)); + + // Initialize the content of the memory pools to prevent asan from + // complaining. + device_.memset(a_, 12, m_ * k_ * sizeof(T)); + device_.memset(b_, 23, k_ * n_ * sizeof(T)); + device_.memset(c_, 31, m_ * n_ * sizeof(T)); + + //BenchmarkUseRealTime(); + } + + inline void finalizeBenchmark(int64_t num_items) { +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + if (Eigen::internal::is_same::value) { + device_.synchronize(); + } +#endif + StopBenchmarkTiming(); + SetBenchmarkFlopsProcessed(num_items); + } + + + TensorIndex m_; + TensorIndex k_; + TensorIndex n_; + T* a_; + T* b_; + T* c_; + Device device_; +}; +#endif // THIRD_PARTY_EIGEN3_TENSOR_BENCHMARKS_H_ diff --git a/external/eigen3/bench/tensors/tensor_benchmarks_cpu.cc b/external/eigen3/bench/tensors/tensor_benchmarks_cpu.cc new file mode 100644 index 0000000..8947f4b --- /dev/null +++ b/external/eigen3/bench/tensors/tensor_benchmarks_cpu.cc @@ -0,0 +1,168 @@ +#define EIGEN_USE_THREADS + +#include + +#include "tensor_benchmarks.h" + +#define CREATE_THREAD_POOL(threads) \ +Eigen::ThreadPool pool(threads); \ +Eigen::ThreadPoolDevice device(&pool, threads); + +// Simple functions +#define BM_FuncCPU(FUNC, THREADS) \ + static void BM_##FUNC##_##THREADS##T(int iters, int N) { \ + StopBenchmarkTiming(); \ + CREATE_THREAD_POOL(THREADS); \ + BenchmarkSuite suite(device, N); \ + suite.FUNC(iters); \ + } \ + BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000); + +BM_FuncCPU(memcpy, 4); +BM_FuncCPU(memcpy, 8); +BM_FuncCPU(memcpy, 12); + +BM_FuncCPU(typeCasting, 4); +BM_FuncCPU(typeCasting, 8); +BM_FuncCPU(typeCasting, 12); + +BM_FuncCPU(random, 4); +BM_FuncCPU(random, 8); +BM_FuncCPU(random, 12); + +BM_FuncCPU(slicing, 4); +BM_FuncCPU(slicing, 8); +BM_FuncCPU(slicing, 12); + +BM_FuncCPU(rowChip, 4); +BM_FuncCPU(rowChip, 8); +BM_FuncCPU(rowChip, 12); + +BM_FuncCPU(colChip, 4); +BM_FuncCPU(colChip, 8); +BM_FuncCPU(colChip, 12); + +BM_FuncCPU(shuffling, 4); +BM_FuncCPU(shuffling, 8); +BM_FuncCPU(shuffling, 12); + +BM_FuncCPU(padding, 4); +BM_FuncCPU(padding, 8); +BM_FuncCPU(padding, 12); + +BM_FuncCPU(striding, 4); +BM_FuncCPU(striding, 8); +BM_FuncCPU(striding, 12); + +BM_FuncCPU(broadcasting, 4); +BM_FuncCPU(broadcasting, 8); +BM_FuncCPU(broadcasting, 12); + +BM_FuncCPU(coeffWiseOp, 4); +BM_FuncCPU(coeffWiseOp, 8); +BM_FuncCPU(coeffWiseOp, 12); + +BM_FuncCPU(algebraicFunc, 4); +BM_FuncCPU(algebraicFunc, 8); +BM_FuncCPU(algebraicFunc, 12); + +BM_FuncCPU(transcendentalFunc, 4); +BM_FuncCPU(transcendentalFunc, 8); +BM_FuncCPU(transcendentalFunc, 12); + +BM_FuncCPU(rowReduction, 4); +BM_FuncCPU(rowReduction, 8); +BM_FuncCPU(rowReduction, 12); + +BM_FuncCPU(colReduction, 4); +BM_FuncCPU(colReduction, 8); +BM_FuncCPU(colReduction, 12); + + +// Contractions +#define BM_FuncWithInputDimsCPU(FUNC, D1, D2, D3, THREADS) \ + static void BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T(int iters, int N) { \ + StopBenchmarkTiming(); \ + if (THREADS == 1) { \ + Eigen::DefaultDevice device; \ + BenchmarkSuite suite(device, D1, D2, D3); \ + suite.FUNC(iters); \ + } else { \ + CREATE_THREAD_POOL(THREADS); \ + BenchmarkSuite suite(device, D1, D2, D3); \ + suite.FUNC(iters); \ + } \ + } \ + BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000); + + +BM_FuncWithInputDimsCPU(contraction, N, N, N, 1); +BM_FuncWithInputDimsCPU(contraction, N, N, N, 4); +BM_FuncWithInputDimsCPU(contraction, N, N, N, 8); +BM_FuncWithInputDimsCPU(contraction, N, N, N, 12); +BM_FuncWithInputDimsCPU(contraction, N, N, N, 16); + +BM_FuncWithInputDimsCPU(contraction, 64, N, N, 1); +BM_FuncWithInputDimsCPU(contraction, 64, N, N, 4); +BM_FuncWithInputDimsCPU(contraction, 64, N, N, 8); +BM_FuncWithInputDimsCPU(contraction, 64, N, N, 12); +BM_FuncWithInputDimsCPU(contraction, 64, N, N, 16); + +BM_FuncWithInputDimsCPU(contraction, N, 64, N, 1); +BM_FuncWithInputDimsCPU(contraction, N, 64, N, 4); +BM_FuncWithInputDimsCPU(contraction, N, 64, N, 8); +BM_FuncWithInputDimsCPU(contraction, N, 64, N, 12); +BM_FuncWithInputDimsCPU(contraction, N, 64, N, 16); + +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 1); +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 4); +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 8); +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 12); +BM_FuncWithInputDimsCPU(contraction, N, N, 64, 16); + +BM_FuncWithInputDimsCPU(contraction, 1, N, N, 1); +BM_FuncWithInputDimsCPU(contraction, 1, N, N, 4); +BM_FuncWithInputDimsCPU(contraction, 1, N, N, 8); +BM_FuncWithInputDimsCPU(contraction, 1, N, N, 12); +BM_FuncWithInputDimsCPU(contraction, 1, N, N, 16); + +BM_FuncWithInputDimsCPU(contraction, N, N, 1, 1); +BM_FuncWithInputDimsCPU(contraction, N, N, 1, 4); +BM_FuncWithInputDimsCPU(contraction, N, N, 1, 8); +BM_FuncWithInputDimsCPU(contraction, N, N, 1, 12); +BM_FuncWithInputDimsCPU(contraction, N, N, 1, 16); + + +// Convolutions +#define BM_FuncWithKernelDimsCPU(FUNC, DIM1, DIM2, THREADS) \ + static void BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T(int iters, int N) { \ + StopBenchmarkTiming(); \ + CREATE_THREAD_POOL(THREADS); \ + BenchmarkSuite suite(device, N); \ + suite.FUNC(iters, DIM1, DIM2); \ + } \ + BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000); + +BM_FuncWithKernelDimsCPU(convolution, 7, 1, 4); +BM_FuncWithKernelDimsCPU(convolution, 7, 1, 8); +BM_FuncWithKernelDimsCPU(convolution, 7, 1, 12); + +BM_FuncWithKernelDimsCPU(convolution, 1, 7, 4); +BM_FuncWithKernelDimsCPU(convolution, 1, 7, 8); +BM_FuncWithKernelDimsCPU(convolution, 1, 7, 12); + +BM_FuncWithKernelDimsCPU(convolution, 7, 4, 4); +BM_FuncWithKernelDimsCPU(convolution, 7, 4, 8); +BM_FuncWithKernelDimsCPU(convolution, 7, 4, 12); + +BM_FuncWithKernelDimsCPU(convolution, 4, 7, 4); +BM_FuncWithKernelDimsCPU(convolution, 4, 7, 8); +BM_FuncWithKernelDimsCPU(convolution, 4, 7, 12); + +BM_FuncWithKernelDimsCPU(convolution, 7, 64, 4); +BM_FuncWithKernelDimsCPU(convolution, 7, 64, 8); +BM_FuncWithKernelDimsCPU(convolution, 7, 64, 12); + +BM_FuncWithKernelDimsCPU(convolution, 64, 7, 4); +BM_FuncWithKernelDimsCPU(convolution, 64, 7, 8); +BM_FuncWithKernelDimsCPU(convolution, 64, 7, 12); diff --git a/external/eigen3/bench/tensors/tensor_benchmarks_fp16_gpu.cu b/external/eigen3/bench/tensors/tensor_benchmarks_fp16_gpu.cu new file mode 100644 index 0000000..65784d0 --- /dev/null +++ b/external/eigen3/bench/tensors/tensor_benchmarks_fp16_gpu.cu @@ -0,0 +1,77 @@ +#define EIGEN_USE_GPU + +#include +#include +#include + +#include "tensor_benchmarks.h" + +// Simple functions +#define BM_FuncGPU(FUNC) \ + static void BM_##FUNC(int iters, int N) { \ + StopBenchmarkTiming(); \ + Eigen::CudaStreamDevice stream; \ + Eigen::GpuDevice device(&stream); \ + BenchmarkSuite suite(device, N); \ + cudaDeviceSynchronize(); \ + suite.FUNC(iters); \ + } \ + BENCHMARK_RANGE(BM_##FUNC, 10, 5000); + +BM_FuncGPU(memcpy); +BM_FuncGPU(typeCasting); +//BM_FuncGPU(random); +BM_FuncGPU(slicing); +BM_FuncGPU(rowChip); +BM_FuncGPU(colChip); +BM_FuncGPU(shuffling); +BM_FuncGPU(padding); +BM_FuncGPU(striding); +BM_FuncGPU(broadcasting); +BM_FuncGPU(coeffWiseOp); +BM_FuncGPU(algebraicFunc); +BM_FuncGPU(transcendentalFunc); +BM_FuncGPU(rowReduction); +BM_FuncGPU(colReduction); +BM_FuncGPU(fullReduction); + + +// Contractions +#define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \ + static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \ + StopBenchmarkTiming(); \ + Eigen::CudaStreamDevice stream; \ + Eigen::GpuDevice device(&stream); \ + BenchmarkSuite suite(device, D1, D2, D3); \ + cudaDeviceSynchronize(); \ + suite.FUNC(iters); \ + } \ + BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000); + + +BM_FuncWithInputDimsGPU(contraction, N, N, N); +BM_FuncWithInputDimsGPU(contraction, 64, N, N); +BM_FuncWithInputDimsGPU(contraction, N, 64, N); +BM_FuncWithInputDimsGPU(contraction, N, N, 64); + + +// Convolutions +#define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \ + static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \ + StopBenchmarkTiming(); \ + Eigen::CudaStreamDevice stream; \ + Eigen::GpuDevice device(&stream); \ + BenchmarkSuite suite(device, N); \ + cudaDeviceSynchronize(); \ + suite.FUNC(iters, DIM1, DIM2); \ + } \ + BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000); + +/* +BM_FuncWithKernelDimsGPU(convolution, 7, 1); +BM_FuncWithKernelDimsGPU(convolution, 1, 7); +BM_FuncWithKernelDimsGPU(convolution, 7, 4); +BM_FuncWithKernelDimsGPU(convolution, 4, 7); +BM_FuncWithKernelDimsGPU(convolution, 7, 64); +BM_FuncWithKernelDimsGPU(convolution, 64, 7); +*/ diff --git a/external/eigen3/bench/tensors/tensor_benchmarks_gpu.cu b/external/eigen3/bench/tensors/tensor_benchmarks_gpu.cu new file mode 100644 index 0000000..76d68c5 --- /dev/null +++ b/external/eigen3/bench/tensors/tensor_benchmarks_gpu.cu @@ -0,0 +1,75 @@ +#define EIGEN_USE_GPU + +#include +#include +#include + +#include "tensor_benchmarks.h" + +// Simple functions +#define BM_FuncGPU(FUNC) \ + static void BM_##FUNC(int iters, int N) { \ + StopBenchmarkTiming(); \ + Eigen::CudaStreamDevice stream; \ + Eigen::GpuDevice device(&stream); \ + BenchmarkSuite suite(device, N); \ + cudaDeviceSynchronize(); \ + suite.FUNC(iters); \ + } \ + BENCHMARK_RANGE(BM_##FUNC, 10, 5000); + +BM_FuncGPU(memcpy); +BM_FuncGPU(typeCasting); +BM_FuncGPU(random); +BM_FuncGPU(slicing); +BM_FuncGPU(rowChip); +BM_FuncGPU(colChip); +BM_FuncGPU(shuffling); +BM_FuncGPU(padding); +BM_FuncGPU(striding); +BM_FuncGPU(broadcasting); +BM_FuncGPU(coeffWiseOp); +BM_FuncGPU(algebraicFunc); +BM_FuncGPU(transcendentalFunc); +BM_FuncGPU(rowReduction); +BM_FuncGPU(colReduction); +BM_FuncGPU(fullReduction); + + +// Contractions +#define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \ + static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \ + StopBenchmarkTiming(); \ + Eigen::CudaStreamDevice stream; \ + Eigen::GpuDevice device(&stream); \ + BenchmarkSuite suite(device, D1, D2, D3); \ + cudaDeviceSynchronize(); \ + suite.FUNC(iters); \ + } \ + BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000); + + +BM_FuncWithInputDimsGPU(contraction, N, N, N); +BM_FuncWithInputDimsGPU(contraction, 64, N, N); +BM_FuncWithInputDimsGPU(contraction, N, 64, N); +BM_FuncWithInputDimsGPU(contraction, N, N, 64); + + +// Convolutions +#define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \ + static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \ + StopBenchmarkTiming(); \ + Eigen::CudaStreamDevice stream; \ + Eigen::GpuDevice device(&stream); \ + BenchmarkSuite suite(device, N); \ + cudaDeviceSynchronize(); \ + suite.FUNC(iters, DIM1, DIM2); \ + } \ + BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000); + +BM_FuncWithKernelDimsGPU(convolution, 7, 1); +BM_FuncWithKernelDimsGPU(convolution, 1, 7); +BM_FuncWithKernelDimsGPU(convolution, 7, 4); +BM_FuncWithKernelDimsGPU(convolution, 4, 7); +BM_FuncWithKernelDimsGPU(convolution, 7, 64); +BM_FuncWithKernelDimsGPU(convolution, 64, 7); diff --git a/external/eigen3/bench/tensors/tensor_benchmarks_sycl.cc b/external/eigen3/bench/tensors/tensor_benchmarks_sycl.cc new file mode 100644 index 0000000..7eca4d9 --- /dev/null +++ b/external/eigen3/bench/tensors/tensor_benchmarks_sycl.cc @@ -0,0 +1,37 @@ +#define EIGEN_USE_SYCL + +#include +#include + +#include "tensor_benchmarks.h" + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; +// Simple functions +template +cl::sycl::queue sycl_queue() { + return cl::sycl::queue(device_selector(), [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); +} + +#define BM_FuncGPU(FUNC) \ + static void BM_##FUNC(int iters, int N) { \ + StopBenchmarkTiming(); \ + cl::sycl::queue q = sycl_queue(); \ + Eigen::SyclDevice device(q); \ + BenchmarkSuite suite(device, N); \ + suite.FUNC(iters); \ + } \ + BENCHMARK_RANGE(BM_##FUNC, 10, 5000); + +BM_FuncGPU(broadcasting); +BM_FuncGPU(coeffWiseOp); diff --git a/external/eigen3/blas/CMakeLists.txt b/external/eigen3/blas/CMakeLists.txt index a9bc051..d0efb41 100644 --- a/external/eigen3/blas/CMakeLists.txt +++ b/external/eigen3/blas/CMakeLists.txt @@ -14,23 +14,18 @@ endif() add_custom_target(blas) -set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp xerbla.cpp) - -if(EIGEN_Fortran_COMPILER_WORKS) - -set(EigenBlas_SRCS ${EigenBlas_SRCS} - complexdots.f - srotm.f srotmg.f drotm.f drotmg.f - lsame.f dspmv.f ssbmv.f - chbmv.f sspmv.f - zhbmv.f chpmv.f dsbmv.f - zhpmv.f - dtbmv.f stbmv.f ctbmv.f ztbmv.f -) +set(EigenBlas_SRCS single.cpp double.cpp complex_single.cpp complex_double.cpp xerbla.cpp + f2c/srotm.c f2c/srotmg.c f2c/drotm.c f2c/drotmg.c + f2c/lsame.c f2c/dspmv.c f2c/ssbmv.c f2c/chbmv.c + f2c/sspmv.c f2c/zhbmv.c f2c/chpmv.c f2c/dsbmv.c + f2c/zhpmv.c f2c/dtbmv.c f2c/stbmv.c f2c/ctbmv.c + f2c/ztbmv.c f2c/d_cnjg.c f2c/r_cnjg.c + ) + +if (EIGEN_Fortran_COMPILER_WORKS) + set(EigenBlas_SRCS ${EigenBlas_SRCS} fortran/complexdots.f) else() - -message(WARNING " No fortran compiler has been detected, the blas build will be incomplete.") - + set(EigenBlas_SRCS ${EigenBlas_SRCS} f2c/complexdots.c) endif() add_library(eigen_blas_static ${EigenBlas_SRCS}) diff --git a/external/eigen3/blas/PackedTriangularMatrixVector.h b/external/eigen3/blas/PackedTriangularMatrixVector.h index e9886d5..0039536 100644 --- a/external/eigen3/blas/PackedTriangularMatrixVector.h +++ b/external/eigen3/blas/PackedTriangularMatrixVector.h @@ -18,7 +18,7 @@ struct packed_triangular_matrix_vector_product; template struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, @@ -47,7 +47,7 @@ struct packed_triangular_matrix_vector_product struct packed_triangular_matrix_vector_product { - typedef typename scalar_product_traits::ReturnType ResScalar; + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; enum { IsLower = (Mode & Lower) ==Lower, HasUnitDiag = (Mode & UnitDiag)==UnitDiag, diff --git a/external/eigen3/blas/chbmv.f b/external/eigen3/blas/chbmv.f deleted file mode 100644 index 1b1c330..0000000 --- a/external/eigen3/blas/chbmv.f +++ /dev/null @@ -1,310 +0,0 @@ - SUBROUTINE CHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) -* .. Scalar Arguments .. - COMPLEX ALPHA,BETA - INTEGER INCX,INCY,K,LDA,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - COMPLEX A(LDA,*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* CHBMV performs the matrix-vector operation -* -* y := alpha*A*x + beta*y, -* -* where alpha and beta are scalars, x and y are n element vectors and -* A is an n by n hermitian band matrix, with k super-diagonals. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the band matrix A is being supplied as -* follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* being supplied. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* being supplied. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* K - INTEGER. -* On entry, K specifies the number of super-diagonals of the -* matrix A. K must satisfy 0 .le. K. -* Unchanged on exit. -* -* ALPHA - COMPLEX . -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* A - COMPLEX array of DIMENSION ( LDA, n ). -* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) -* by n part of the array A must contain the upper triangular -* band part of the hermitian matrix, supplied column by -* column, with the leading diagonal of the matrix in row -* ( k + 1 ) of the array, the first super-diagonal starting at -* position 2 in row k, and so on. The top left k by k triangle -* of the array A is not referenced. -* The following program segment will transfer the upper -* triangular part of a hermitian band matrix from conventional -* full matrix storage to band storage: -* -* DO 20, J = 1, N -* M = K + 1 - J -* DO 10, I = MAX( 1, J - K ), J -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) -* by n part of the array A must contain the lower triangular -* band part of the hermitian matrix, supplied column by -* column, with the leading diagonal of the matrix in row 1 of -* the array, the first sub-diagonal starting at position 1 in -* row 2, and so on. The bottom right k by k triangle of the -* array A is not referenced. -* The following program segment will transfer the lower -* triangular part of a hermitian band matrix from conventional -* full matrix storage to band storage: -* -* DO 20, J = 1, N -* M = 1 - J -* DO 10, I = J, MIN( N, J + K ) -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Note that the imaginary parts of the diagonal elements need -* not be set and are assumed to be zero. -* Unchanged on exit. -* -* LDA - INTEGER. -* On entry, LDA specifies the first dimension of A as declared -* in the calling (sub) program. LDA must be at least -* ( k + 1 ). -* Unchanged on exit. -* -* X - COMPLEX array of DIMENSION at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the -* vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* BETA - COMPLEX . -* On entry, BETA specifies the scalar beta. -* Unchanged on exit. -* -* Y - COMPLEX array of DIMENSION at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the -* vector y. On exit, Y is overwritten by the updated vector y. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - COMPLEX ONE - PARAMETER (ONE= (1.0E+0,0.0E+0)) - COMPLEX ZERO - PARAMETER (ZERO= (0.0E+0,0.0E+0)) -* .. -* .. Local Scalars .. - COMPLEX TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC CONJG,MAX,MIN,REAL -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (K.LT.0) THEN - INFO = 3 - ELSE IF (LDA.LT. (K+1)) THEN - INFO = 6 - ELSE IF (INCX.EQ.0) THEN - INFO = 8 - ELSE IF (INCY.EQ.0) THEN - INFO = 11 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('CHBMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN -* -* Set up the start points in X and Y. -* - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF -* -* Start the operations. In this version the elements of the array A -* are accessed sequentially with one pass through A. -* -* First form y := beta*y. -* - IF (BETA.NE.ONE) THEN - IF (INCY.EQ.1) THEN - IF (BETA.EQ.ZERO) THEN - DO 10 I = 1,N - Y(I) = ZERO - 10 CONTINUE - ELSE - DO 20 I = 1,N - Y(I) = BETA*Y(I) - 20 CONTINUE - END IF - ELSE - IY = KY - IF (BETA.EQ.ZERO) THEN - DO 30 I = 1,N - Y(IY) = ZERO - IY = IY + INCY - 30 CONTINUE - ELSE - DO 40 I = 1,N - Y(IY) = BETA*Y(IY) - IY = IY + INCY - 40 CONTINUE - END IF - END IF - END IF - IF (ALPHA.EQ.ZERO) RETURN - IF (LSAME(UPLO,'U')) THEN -* -* Form y when upper triangle of A is stored. -* - KPLUS1 = K + 1 - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - L = KPLUS1 - J - DO 50 I = MAX(1,J-K),J - 1 - Y(I) = Y(I) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(I) - 50 CONTINUE - Y(J) = Y(J) + TEMP1*REAL(A(KPLUS1,J)) + ALPHA*TEMP2 - 60 CONTINUE - ELSE - JX = KX - JY = KY - DO 80 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - IX = KX - IY = KY - L = KPLUS1 - J - DO 70 I = MAX(1,J-K),J - 1 - Y(IY) = Y(IY) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(IX) - IX = IX + INCX - IY = IY + INCY - 70 CONTINUE - Y(JY) = Y(JY) + TEMP1*REAL(A(KPLUS1,J)) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - IF (J.GT.K) THEN - KX = KX + INCX - KY = KY + INCY - END IF - 80 CONTINUE - END IF - ELSE -* -* Form y when lower triangle of A is stored. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 100 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - Y(J) = Y(J) + TEMP1*REAL(A(1,J)) - L = 1 - J - DO 90 I = J + 1,MIN(N,J+K) - Y(I) = Y(I) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(I) - 90 CONTINUE - Y(J) = Y(J) + ALPHA*TEMP2 - 100 CONTINUE - ELSE - JX = KX - JY = KY - DO 120 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - Y(JY) = Y(JY) + TEMP1*REAL(A(1,J)) - L = 1 - J - IX = JX - IY = JY - DO 110 I = J + 1,MIN(N,J+K) - IX = IX + INCX - IY = IY + INCY - Y(IY) = Y(IY) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(IX) - 110 CONTINUE - Y(JY) = Y(JY) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - 120 CONTINUE - END IF - END IF -* - RETURN -* -* End of CHBMV . -* - END diff --git a/external/eigen3/blas/chpmv.f b/external/eigen3/blas/chpmv.f deleted file mode 100644 index 158be5a..0000000 --- a/external/eigen3/blas/chpmv.f +++ /dev/null @@ -1,272 +0,0 @@ - SUBROUTINE CHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY) -* .. Scalar Arguments .. - COMPLEX ALPHA,BETA - INTEGER INCX,INCY,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - COMPLEX AP(*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* CHPMV performs the matrix-vector operation -* -* y := alpha*A*x + beta*y, -* -* where alpha and beta are scalars, x and y are n element vectors and -* A is an n by n hermitian matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - COMPLEX . -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* AP - COMPLEX array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. -* Note that the imaginary parts of the diagonal elements need -* not be set and are assumed to be zero. -* Unchanged on exit. -* -* X - COMPLEX array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* BETA - COMPLEX . -* On entry, BETA specifies the scalar beta. When BETA is -* supplied as zero then Y need not be set on input. -* Unchanged on exit. -* -* Y - COMPLEX array of dimension at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the n -* element vector y. On exit, Y is overwritten by the updated -* vector y. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - COMPLEX ONE - PARAMETER (ONE= (1.0E+0,0.0E+0)) - COMPLEX ZERO - PARAMETER (ZERO= (0.0E+0,0.0E+0)) -* .. -* .. Local Scalars .. - COMPLEX TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC CONJG,REAL -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 6 - ELSE IF (INCY.EQ.0) THEN - INFO = 9 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('CHPMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN -* -* Set up the start points in X and Y. -* - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* -* First form y := beta*y. -* - IF (BETA.NE.ONE) THEN - IF (INCY.EQ.1) THEN - IF (BETA.EQ.ZERO) THEN - DO 10 I = 1,N - Y(I) = ZERO - 10 CONTINUE - ELSE - DO 20 I = 1,N - Y(I) = BETA*Y(I) - 20 CONTINUE - END IF - ELSE - IY = KY - IF (BETA.EQ.ZERO) THEN - DO 30 I = 1,N - Y(IY) = ZERO - IY = IY + INCY - 30 CONTINUE - ELSE - DO 40 I = 1,N - Y(IY) = BETA*Y(IY) - IY = IY + INCY - 40 CONTINUE - END IF - END IF - END IF - IF (ALPHA.EQ.ZERO) RETURN - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form y when AP contains the upper triangle. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - K = KK - DO 50 I = 1,J - 1 - Y(I) = Y(I) + TEMP1*AP(K) - TEMP2 = TEMP2 + CONJG(AP(K))*X(I) - K = K + 1 - 50 CONTINUE - Y(J) = Y(J) + TEMP1*REAL(AP(KK+J-1)) + ALPHA*TEMP2 - KK = KK + J - 60 CONTINUE - ELSE - JX = KX - JY = KY - DO 80 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - IX = KX - IY = KY - DO 70 K = KK,KK + J - 2 - Y(IY) = Y(IY) + TEMP1*AP(K) - TEMP2 = TEMP2 + CONJG(AP(K))*X(IX) - IX = IX + INCX - IY = IY + INCY - 70 CONTINUE - Y(JY) = Y(JY) + TEMP1*REAL(AP(KK+J-1)) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - KK = KK + J - 80 CONTINUE - END IF - ELSE -* -* Form y when AP contains the lower triangle. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 100 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - Y(J) = Y(J) + TEMP1*REAL(AP(KK)) - K = KK + 1 - DO 90 I = J + 1,N - Y(I) = Y(I) + TEMP1*AP(K) - TEMP2 = TEMP2 + CONJG(AP(K))*X(I) - K = K + 1 - 90 CONTINUE - Y(J) = Y(J) + ALPHA*TEMP2 - KK = KK + (N-J+1) - 100 CONTINUE - ELSE - JX = KX - JY = KY - DO 120 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - Y(JY) = Y(JY) + TEMP1*REAL(AP(KK)) - IX = JX - IY = JY - DO 110 K = KK + 1,KK + N - J - IX = IX + INCX - IY = IY + INCY - Y(IY) = Y(IY) + TEMP1*AP(K) - TEMP2 = TEMP2 + CONJG(AP(K))*X(IX) - 110 CONTINUE - Y(JY) = Y(JY) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - KK = KK + (N-J+1) - 120 CONTINUE - END IF - END IF -* - RETURN -* -* End of CHPMV . -* - END diff --git a/external/eigen3/blas/common.h b/external/eigen3/blas/common.h index 2bf642c..61d8344 100644 --- a/external/eigen3/blas/common.h +++ b/external/eigen3/blas/common.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2010 Gael Guennebaud +// Copyright (C) 2009-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -10,18 +10,16 @@ #ifndef EIGEN_BLAS_COMMON_H #define EIGEN_BLAS_COMMON_H -#include -#include +#include "../Eigen/Core" +#include "../Eigen/Jacobi" -#include #include #ifndef SCALAR #error the token SCALAR must be defined to compile this file #endif -#include - +#include "../Eigen/src/misc/blas.h" #define NOTR 0 #define TR 1 @@ -95,6 +93,7 @@ enum typedef Matrix PlainMatrixType; typedef Map, 0, OuterStride<> > MatrixType; +typedef Map, 0, OuterStride<> > ConstMatrixType; typedef Map, 0, InnerStride > StridedVectorType; typedef Map > CompactVectorType; @@ -106,26 +105,45 @@ matrix(T* data, int rows, int cols, int stride) } template -Map, 0, InnerStride > vector(T* data, int size, int incr) +Map, 0, OuterStride<> > +matrix(const T* data, int rows, int cols, int stride) +{ + return Map, 0, OuterStride<> >(data, rows, cols, OuterStride<>(stride)); +} + +template +Map, 0, InnerStride > make_vector(T* data, int size, int incr) { return Map, 0, InnerStride >(data, size, InnerStride(incr)); } template -Map > vector(T* data, int size) +Map, 0, InnerStride > make_vector(const T* data, int size, int incr) +{ + return Map, 0, InnerStride >(data, size, InnerStride(incr)); +} + +template +Map > make_vector(T* data, int size) { return Map >(data, size); } +template +Map > make_vector(const T* data, int size) +{ + return Map >(data, size); +} + template T* get_compact_vector(T* x, int n, int incx) { if(incx==1) return x; - T* ret = new Scalar[n]; - if(incx<0) vector(ret,n) = vector(x,n,-incx).reverse(); - else vector(ret,n) = vector(x,n, incx); + typename Eigen::internal::remove_const::type* ret = new Scalar[n]; + if(incx<0) make_vector(ret,n) = make_vector(x,n,-incx).reverse(); + else make_vector(ret,n) = make_vector(x,n, incx); return ret; } @@ -135,8 +153,8 @@ T* copy_back(T* x_cpy, T* x, int n, int incx) if(x_cpy==x) return 0; - if(incx<0) vector(x,n,-incx).reverse() = vector(x_cpy,n); - else vector(x,n, incx) = vector(x_cpy,n); + if(incx<0) make_vector(x,n,-incx).reverse() = make_vector(x_cpy,n); + else make_vector(x,n, incx) = make_vector(x_cpy,n); return x_cpy; } diff --git a/external/eigen3/blas/ctbmv.f b/external/eigen3/blas/ctbmv.f deleted file mode 100644 index 5a879fa..0000000 --- a/external/eigen3/blas/ctbmv.f +++ /dev/null @@ -1,366 +0,0 @@ - SUBROUTINE CTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,K,LDA,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - COMPLEX A(LDA,*),X(*) -* .. -* -* Purpose -* ======= -* -* CTBMV performs one of the matrix-vector operations -* -* x := A*x, or x := A'*x, or x := conjg( A' )*x, -* -* where x is an n element vector and A is an n by n unit, or non-unit, -* upper or lower triangular band matrix, with ( k + 1 ) diagonals. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the operation to be performed as -* follows: -* -* TRANS = 'N' or 'n' x := A*x. -* -* TRANS = 'T' or 't' x := A'*x. -* -* TRANS = 'C' or 'c' x := conjg( A' )*x. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* K - INTEGER. -* On entry with UPLO = 'U' or 'u', K specifies the number of -* super-diagonals of the matrix A. -* On entry with UPLO = 'L' or 'l', K specifies the number of -* sub-diagonals of the matrix A. -* K must satisfy 0 .le. K. -* Unchanged on exit. -* -* A - COMPLEX array of DIMENSION ( LDA, n ). -* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) -* by n part of the array A must contain the upper triangular -* band part of the matrix of coefficients, supplied column by -* column, with the leading diagonal of the matrix in row -* ( k + 1 ) of the array, the first super-diagonal starting at -* position 2 in row k, and so on. The top left k by k triangle -* of the array A is not referenced. -* The following program segment will transfer an upper -* triangular band matrix from conventional full matrix storage -* to band storage: -* -* DO 20, J = 1, N -* M = K + 1 - J -* DO 10, I = MAX( 1, J - K ), J -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) -* by n part of the array A must contain the lower triangular -* band part of the matrix of coefficients, supplied column by -* column, with the leading diagonal of the matrix in row 1 of -* the array, the first sub-diagonal starting at position 1 in -* row 2, and so on. The bottom right k by k triangle of the -* array A is not referenced. -* The following program segment will transfer a lower -* triangular band matrix from conventional full matrix storage -* to band storage: -* -* DO 20, J = 1, N -* M = 1 - J -* DO 10, I = J, MIN( N, J + K ) -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Note that when DIAG = 'U' or 'u' the elements of the array A -* corresponding to the diagonal elements of the matrix are not -* referenced, but are assumed to be unity. -* Unchanged on exit. -* -* LDA - INTEGER. -* On entry, LDA specifies the first dimension of A as declared -* in the calling (sub) program. LDA must be at least -* ( k + 1 ). -* Unchanged on exit. -* -* X - COMPLEX array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. On exit, X is overwritten with the -* tranformed vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - COMPLEX ZERO - PARAMETER (ZERO= (0.0E+0,0.0E+0)) -* .. -* .. Local Scalars .. - COMPLEX TEMP - INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L - LOGICAL NOCONJ,NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC CONJG,MAX,MIN -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (K.LT.0) THEN - INFO = 5 - ELSE IF (LDA.LT. (K+1)) THEN - INFO = 7 - ELSE IF (INCX.EQ.0) THEN - INFO = 9 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('CTBMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOCONJ = LSAME(TRANS,'T') - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of A are -* accessed sequentially with one pass through A. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x := A*x. -* - IF (LSAME(UPLO,'U')) THEN - KPLUS1 = K + 1 - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - L = KPLUS1 - J - DO 10 I = MAX(1,J-K),J - 1 - X(I) = X(I) + TEMP*A(L+I,J) - 10 CONTINUE - IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J) - END IF - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - L = KPLUS1 - J - DO 30 I = MAX(1,J-K),J - 1 - X(IX) = X(IX) + TEMP*A(L+I,J) - IX = IX + INCX - 30 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J) - END IF - JX = JX + INCX - IF (J.GT.K) KX = KX + INCX - 40 CONTINUE - END IF - ELSE - IF (INCX.EQ.1) THEN - DO 60 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - L = 1 - J - DO 50 I = MIN(N,J+K),J + 1,-1 - X(I) = X(I) + TEMP*A(L+I,J) - 50 CONTINUE - IF (NOUNIT) X(J) = X(J)*A(1,J) - END IF - 60 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 80 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - L = 1 - J - DO 70 I = MIN(N,J+K),J + 1,-1 - X(IX) = X(IX) + TEMP*A(L+I,J) - IX = IX - INCX - 70 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*A(1,J) - END IF - JX = JX - INCX - IF ((N-J).GE.K) KX = KX - INCX - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := A'*x or x := conjg( A' )*x. -* - IF (LSAME(UPLO,'U')) THEN - KPLUS1 = K + 1 - IF (INCX.EQ.1) THEN - DO 110 J = N,1,-1 - TEMP = X(J) - L = KPLUS1 - J - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J) - DO 90 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + A(L+I,J)*X(I) - 90 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*CONJG(A(KPLUS1,J)) - DO 100 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + CONJG(A(L+I,J))*X(I) - 100 CONTINUE - END IF - X(J) = TEMP - 110 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 140 J = N,1,-1 - TEMP = X(JX) - KX = KX - INCX - IX = KX - L = KPLUS1 - J - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J) - DO 120 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + A(L+I,J)*X(IX) - IX = IX - INCX - 120 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*CONJG(A(KPLUS1,J)) - DO 130 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + CONJG(A(L+I,J))*X(IX) - IX = IX - INCX - 130 CONTINUE - END IF - X(JX) = TEMP - JX = JX - INCX - 140 CONTINUE - END IF - ELSE - IF (INCX.EQ.1) THEN - DO 170 J = 1,N - TEMP = X(J) - L = 1 - J - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*A(1,J) - DO 150 I = J + 1,MIN(N,J+K) - TEMP = TEMP + A(L+I,J)*X(I) - 150 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*CONJG(A(1,J)) - DO 160 I = J + 1,MIN(N,J+K) - TEMP = TEMP + CONJG(A(L+I,J))*X(I) - 160 CONTINUE - END IF - X(J) = TEMP - 170 CONTINUE - ELSE - JX = KX - DO 200 J = 1,N - TEMP = X(JX) - KX = KX + INCX - IX = KX - L = 1 - J - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*A(1,J) - DO 180 I = J + 1,MIN(N,J+K) - TEMP = TEMP + A(L+I,J)*X(IX) - IX = IX + INCX - 180 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*CONJG(A(1,J)) - DO 190 I = J + 1,MIN(N,J+K) - TEMP = TEMP + CONJG(A(L+I,J))*X(IX) - IX = IX + INCX - 190 CONTINUE - END IF - X(JX) = TEMP - JX = JX + INCX - 200 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of CTBMV . -* - END diff --git a/external/eigen3/blas/double.cpp b/external/eigen3/blas/double.cpp index 8fd0709..295b1d1 100644 --- a/external/eigen3/blas/double.cpp +++ b/external/eigen3/blas/double.cpp @@ -23,11 +23,10 @@ double BLASFUNC(dsdot)(int* n, float* x, int* incx, float* y, int* incy) { if(*n<=0) return 0; - if(*incx==1 && *incy==1) return (vector(x,*n).cast().cwiseProduct(vector(y,*n).cast())).sum(); - else if(*incx>0 && *incy>0) return (vector(x,*n,*incx).cast().cwiseProduct(vector(y,*n,*incy).cast())).sum(); - else if(*incx<0 && *incy>0) return (vector(x,*n,-*incx).reverse().cast().cwiseProduct(vector(y,*n,*incy).cast())).sum(); - else if(*incx>0 && *incy<0) return (vector(x,*n,*incx).cast().cwiseProduct(vector(y,*n,-*incy).reverse().cast())).sum(); - else if(*incx<0 && *incy<0) return (vector(x,*n,-*incx).reverse().cast().cwiseProduct(vector(y,*n,-*incy).reverse().cast())).sum(); + if(*incx==1 && *incy==1) return (make_vector(x,*n).cast().cwiseProduct(make_vector(y,*n).cast())).sum(); + else if(*incx>0 && *incy>0) return (make_vector(x,*n,*incx).cast().cwiseProduct(make_vector(y,*n,*incy).cast())).sum(); + else if(*incx<0 && *incy>0) return (make_vector(x,*n,-*incx).reverse().cast().cwiseProduct(make_vector(y,*n,*incy).cast())).sum(); + else if(*incx>0 && *incy<0) return (make_vector(x,*n,*incx).cast().cwiseProduct(make_vector(y,*n,-*incy).reverse().cast())).sum(); + else if(*incx<0 && *incy<0) return (make_vector(x,*n,-*incx).reverse().cast().cwiseProduct(make_vector(y,*n,-*incy).reverse().cast())).sum(); else return 0; } - diff --git a/external/eigen3/blas/drotm.f b/external/eigen3/blas/drotm.f deleted file mode 100644 index 63a3b11..0000000 --- a/external/eigen3/blas/drotm.f +++ /dev/null @@ -1,147 +0,0 @@ - SUBROUTINE DROTM(N,DX,INCX,DY,INCY,DPARAM) -* .. Scalar Arguments .. - INTEGER INCX,INCY,N -* .. -* .. Array Arguments .. - DOUBLE PRECISION DPARAM(5),DX(*),DY(*) -* .. -* -* Purpose -* ======= -* -* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX -* -* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN -* (DY**T) -* -* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE -* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. -* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. -* -* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 -* -* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) -* H=( ) ( ) ( ) ( ) -* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). -* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. -* -* Arguments -* ========= -* -* N (input) INTEGER -* number of elements in input vector(s) -* -* DX (input/output) DOUBLE PRECISION array, dimension N -* double precision vector with N elements -* -* INCX (input) INTEGER -* storage spacing between elements of DX -* -* DY (input/output) DOUBLE PRECISION array, dimension N -* double precision vector with N elements -* -* INCY (input) INTEGER -* storage spacing between elements of DY -* -* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 -* DPARAM(1)=DFLAG -* DPARAM(2)=DH11 -* DPARAM(3)=DH21 -* DPARAM(4)=DH12 -* DPARAM(5)=DH22 -* -* ===================================================================== -* -* .. Local Scalars .. - DOUBLE PRECISION DFLAG,DH11,DH12,DH21,DH22,TWO,W,Z,ZERO - INTEGER I,KX,KY,NSTEPS -* .. -* .. Data statements .. - DATA ZERO,TWO/0.D0,2.D0/ -* .. -* - DFLAG = DPARAM(1) - IF (N.LE.0 .OR. (DFLAG+TWO.EQ.ZERO)) GO TO 140 - IF (.NOT. (INCX.EQ.INCY.AND.INCX.GT.0)) GO TO 70 -* - NSTEPS = N*INCX - IF (DFLAG) 50,10,30 - 10 CONTINUE - DH12 = DPARAM(4) - DH21 = DPARAM(3) - DO 20 I = 1,NSTEPS,INCX - W = DX(I) - Z = DY(I) - DX(I) = W + Z*DH12 - DY(I) = W*DH21 + Z - 20 CONTINUE - GO TO 140 - 30 CONTINUE - DH11 = DPARAM(2) - DH22 = DPARAM(5) - DO 40 I = 1,NSTEPS,INCX - W = DX(I) - Z = DY(I) - DX(I) = W*DH11 + Z - DY(I) = -W + DH22*Z - 40 CONTINUE - GO TO 140 - 50 CONTINUE - DH11 = DPARAM(2) - DH12 = DPARAM(4) - DH21 = DPARAM(3) - DH22 = DPARAM(5) - DO 60 I = 1,NSTEPS,INCX - W = DX(I) - Z = DY(I) - DX(I) = W*DH11 + Z*DH12 - DY(I) = W*DH21 + Z*DH22 - 60 CONTINUE - GO TO 140 - 70 CONTINUE - KX = 1 - KY = 1 - IF (INCX.LT.0) KX = 1 + (1-N)*INCX - IF (INCY.LT.0) KY = 1 + (1-N)*INCY -* - IF (DFLAG) 120,80,100 - 80 CONTINUE - DH12 = DPARAM(4) - DH21 = DPARAM(3) - DO 90 I = 1,N - W = DX(KX) - Z = DY(KY) - DX(KX) = W + Z*DH12 - DY(KY) = W*DH21 + Z - KX = KX + INCX - KY = KY + INCY - 90 CONTINUE - GO TO 140 - 100 CONTINUE - DH11 = DPARAM(2) - DH22 = DPARAM(5) - DO 110 I = 1,N - W = DX(KX) - Z = DY(KY) - DX(KX) = W*DH11 + Z - DY(KY) = -W + DH22*Z - KX = KX + INCX - KY = KY + INCY - 110 CONTINUE - GO TO 140 - 120 CONTINUE - DH11 = DPARAM(2) - DH12 = DPARAM(4) - DH21 = DPARAM(3) - DH22 = DPARAM(5) - DO 130 I = 1,N - W = DX(KX) - Z = DY(KY) - DX(KX) = W*DH11 + Z*DH12 - DY(KY) = W*DH21 + Z*DH22 - KX = KX + INCX - KY = KY + INCY - 130 CONTINUE - 140 CONTINUE - RETURN - END diff --git a/external/eigen3/blas/drotmg.f b/external/eigen3/blas/drotmg.f deleted file mode 100644 index 3ae647b..0000000 --- a/external/eigen3/blas/drotmg.f +++ /dev/null @@ -1,206 +0,0 @@ - SUBROUTINE DROTMG(DD1,DD2,DX1,DY1,DPARAM) -* .. Scalar Arguments .. - DOUBLE PRECISION DD1,DD2,DX1,DY1 -* .. -* .. Array Arguments .. - DOUBLE PRECISION DPARAM(5) -* .. -* -* Purpose -* ======= -* -* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS -* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* -* DY2)**T. -* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. -* -* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 -* -* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) -* H=( ) ( ) ( ) ( ) -* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). -* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 -* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE -* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) -* -* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE -* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE -* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. -* -* -* Arguments -* ========= -* -* DD1 (input/output) DOUBLE PRECISION -* -* DD2 (input/output) DOUBLE PRECISION -* -* DX1 (input/output) DOUBLE PRECISION -* -* DY1 (input) DOUBLE PRECISION -* -* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 -* DPARAM(1)=DFLAG -* DPARAM(2)=DH11 -* DPARAM(3)=DH21 -* DPARAM(4)=DH12 -* DPARAM(5)=DH22 -* -* ===================================================================== -* -* .. Local Scalars .. - DOUBLE PRECISION DFLAG,DH11,DH12,DH21,DH22,DP1,DP2,DQ1,DQ2,DTEMP, - + DU,GAM,GAMSQ,ONE,RGAMSQ,TWO,ZERO - INTEGER IGO -* .. -* .. Intrinsic Functions .. - INTRINSIC DABS -* .. -* .. Data statements .. -* - DATA ZERO,ONE,TWO/0.D0,1.D0,2.D0/ - DATA GAM,GAMSQ,RGAMSQ/4096.D0,16777216.D0,5.9604645D-8/ -* .. - - IF (.NOT.DD1.LT.ZERO) GO TO 10 -* GO ZERO-H-D-AND-DX1.. - GO TO 60 - 10 CONTINUE -* CASE-DD1-NONNEGATIVE - DP2 = DD2*DY1 - IF (.NOT.DP2.EQ.ZERO) GO TO 20 - DFLAG = -TWO - GO TO 260 -* REGULAR-CASE.. - 20 CONTINUE - DP1 = DD1*DX1 - DQ2 = DP2*DY1 - DQ1 = DP1*DX1 -* - IF (.NOT.DABS(DQ1).GT.DABS(DQ2)) GO TO 40 - DH21 = -DY1/DX1 - DH12 = DP2/DP1 -* - DU = ONE - DH12*DH21 -* - IF (.NOT.DU.LE.ZERO) GO TO 30 -* GO ZERO-H-D-AND-DX1.. - GO TO 60 - 30 CONTINUE - DFLAG = ZERO - DD1 = DD1/DU - DD2 = DD2/DU - DX1 = DX1*DU -* GO SCALE-CHECK.. - GO TO 100 - 40 CONTINUE - IF (.NOT.DQ2.LT.ZERO) GO TO 50 -* GO ZERO-H-D-AND-DX1.. - GO TO 60 - 50 CONTINUE - DFLAG = ONE - DH11 = DP1/DP2 - DH22 = DX1/DY1 - DU = ONE + DH11*DH22 - DTEMP = DD2/DU - DD2 = DD1/DU - DD1 = DTEMP - DX1 = DY1*DU -* GO SCALE-CHECK - GO TO 100 -* PROCEDURE..ZERO-H-D-AND-DX1.. - 60 CONTINUE - DFLAG = -ONE - DH11 = ZERO - DH12 = ZERO - DH21 = ZERO - DH22 = ZERO -* - DD1 = ZERO - DD2 = ZERO - DX1 = ZERO -* RETURN.. - GO TO 220 -* PROCEDURE..FIX-H.. - 70 CONTINUE - IF (.NOT.DFLAG.GE.ZERO) GO TO 90 -* - IF (.NOT.DFLAG.EQ.ZERO) GO TO 80 - DH11 = ONE - DH22 = ONE - DFLAG = -ONE - GO TO 90 - 80 CONTINUE - DH21 = -ONE - DH12 = ONE - DFLAG = -ONE - 90 CONTINUE - GO TO IGO(120,150,180,210) -* PROCEDURE..SCALE-CHECK - 100 CONTINUE - 110 CONTINUE - IF (.NOT.DD1.LE.RGAMSQ) GO TO 130 - IF (DD1.EQ.ZERO) GO TO 160 - ASSIGN 120 TO IGO -* FIX-H.. - GO TO 70 - 120 CONTINUE - DD1 = DD1*GAM**2 - DX1 = DX1/GAM - DH11 = DH11/GAM - DH12 = DH12/GAM - GO TO 110 - 130 CONTINUE - 140 CONTINUE - IF (.NOT.DD1.GE.GAMSQ) GO TO 160 - ASSIGN 150 TO IGO -* FIX-H.. - GO TO 70 - 150 CONTINUE - DD1 = DD1/GAM**2 - DX1 = DX1*GAM - DH11 = DH11*GAM - DH12 = DH12*GAM - GO TO 140 - 160 CONTINUE - 170 CONTINUE - IF (.NOT.DABS(DD2).LE.RGAMSQ) GO TO 190 - IF (DD2.EQ.ZERO) GO TO 220 - ASSIGN 180 TO IGO -* FIX-H.. - GO TO 70 - 180 CONTINUE - DD2 = DD2*GAM**2 - DH21 = DH21/GAM - DH22 = DH22/GAM - GO TO 170 - 190 CONTINUE - 200 CONTINUE - IF (.NOT.DABS(DD2).GE.GAMSQ) GO TO 220 - ASSIGN 210 TO IGO -* FIX-H.. - GO TO 70 - 210 CONTINUE - DD2 = DD2/GAM**2 - DH21 = DH21*GAM - DH22 = DH22*GAM - GO TO 200 - 220 CONTINUE - IF (DFLAG) 250,230,240 - 230 CONTINUE - DPARAM(3) = DH21 - DPARAM(4) = DH12 - GO TO 260 - 240 CONTINUE - DPARAM(2) = DH11 - DPARAM(5) = DH22 - GO TO 260 - 250 CONTINUE - DPARAM(2) = DH11 - DPARAM(3) = DH21 - DPARAM(4) = DH12 - DPARAM(5) = DH22 - 260 CONTINUE - DPARAM(1) = DFLAG - RETURN - END diff --git a/external/eigen3/blas/dsbmv.f b/external/eigen3/blas/dsbmv.f deleted file mode 100644 index 8c82d1f..0000000 --- a/external/eigen3/blas/dsbmv.f +++ /dev/null @@ -1,304 +0,0 @@ - SUBROUTINE DSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) -* .. Scalar Arguments .. - DOUBLE PRECISION ALPHA,BETA - INTEGER INCX,INCY,K,LDA,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - DOUBLE PRECISION A(LDA,*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* DSBMV performs the matrix-vector operation -* -* y := alpha*A*x + beta*y, -* -* where alpha and beta are scalars, x and y are n element vectors and -* A is an n by n symmetric band matrix, with k super-diagonals. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the band matrix A is being supplied as -* follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* being supplied. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* being supplied. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* K - INTEGER. -* On entry, K specifies the number of super-diagonals of the -* matrix A. K must satisfy 0 .le. K. -* Unchanged on exit. -* -* ALPHA - DOUBLE PRECISION. -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). -* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) -* by n part of the array A must contain the upper triangular -* band part of the symmetric matrix, supplied column by -* column, with the leading diagonal of the matrix in row -* ( k + 1 ) of the array, the first super-diagonal starting at -* position 2 in row k, and so on. The top left k by k triangle -* of the array A is not referenced. -* The following program segment will transfer the upper -* triangular part of a symmetric band matrix from conventional -* full matrix storage to band storage: -* -* DO 20, J = 1, N -* M = K + 1 - J -* DO 10, I = MAX( 1, J - K ), J -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) -* by n part of the array A must contain the lower triangular -* band part of the symmetric matrix, supplied column by -* column, with the leading diagonal of the matrix in row 1 of -* the array, the first sub-diagonal starting at position 1 in -* row 2, and so on. The bottom right k by k triangle of the -* array A is not referenced. -* The following program segment will transfer the lower -* triangular part of a symmetric band matrix from conventional -* full matrix storage to band storage: -* -* DO 20, J = 1, N -* M = 1 - J -* DO 10, I = J, MIN( N, J + K ) -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Unchanged on exit. -* -* LDA - INTEGER. -* On entry, LDA specifies the first dimension of A as declared -* in the calling (sub) program. LDA must be at least -* ( k + 1 ). -* Unchanged on exit. -* -* X - DOUBLE PRECISION array of DIMENSION at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the -* vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* BETA - DOUBLE PRECISION. -* On entry, BETA specifies the scalar beta. -* Unchanged on exit. -* -* Y - DOUBLE PRECISION array of DIMENSION at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the -* vector y. On exit, Y is overwritten by the updated vector y. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ONE,ZERO - PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) -* .. -* .. Local Scalars .. - DOUBLE PRECISION TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC MAX,MIN -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (K.LT.0) THEN - INFO = 3 - ELSE IF (LDA.LT. (K+1)) THEN - INFO = 6 - ELSE IF (INCX.EQ.0) THEN - INFO = 8 - ELSE IF (INCY.EQ.0) THEN - INFO = 11 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('DSBMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN -* -* Set up the start points in X and Y. -* - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF -* -* Start the operations. In this version the elements of the array A -* are accessed sequentially with one pass through A. -* -* First form y := beta*y. -* - IF (BETA.NE.ONE) THEN - IF (INCY.EQ.1) THEN - IF (BETA.EQ.ZERO) THEN - DO 10 I = 1,N - Y(I) = ZERO - 10 CONTINUE - ELSE - DO 20 I = 1,N - Y(I) = BETA*Y(I) - 20 CONTINUE - END IF - ELSE - IY = KY - IF (BETA.EQ.ZERO) THEN - DO 30 I = 1,N - Y(IY) = ZERO - IY = IY + INCY - 30 CONTINUE - ELSE - DO 40 I = 1,N - Y(IY) = BETA*Y(IY) - IY = IY + INCY - 40 CONTINUE - END IF - END IF - END IF - IF (ALPHA.EQ.ZERO) RETURN - IF (LSAME(UPLO,'U')) THEN -* -* Form y when upper triangle of A is stored. -* - KPLUS1 = K + 1 - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - L = KPLUS1 - J - DO 50 I = MAX(1,J-K),J - 1 - Y(I) = Y(I) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + A(L+I,J)*X(I) - 50 CONTINUE - Y(J) = Y(J) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2 - 60 CONTINUE - ELSE - JX = KX - JY = KY - DO 80 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - IX = KX - IY = KY - L = KPLUS1 - J - DO 70 I = MAX(1,J-K),J - 1 - Y(IY) = Y(IY) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + A(L+I,J)*X(IX) - IX = IX + INCX - IY = IY + INCY - 70 CONTINUE - Y(JY) = Y(JY) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - IF (J.GT.K) THEN - KX = KX + INCX - KY = KY + INCY - END IF - 80 CONTINUE - END IF - ELSE -* -* Form y when lower triangle of A is stored. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 100 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - Y(J) = Y(J) + TEMP1*A(1,J) - L = 1 - J - DO 90 I = J + 1,MIN(N,J+K) - Y(I) = Y(I) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + A(L+I,J)*X(I) - 90 CONTINUE - Y(J) = Y(J) + ALPHA*TEMP2 - 100 CONTINUE - ELSE - JX = KX - JY = KY - DO 120 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - Y(JY) = Y(JY) + TEMP1*A(1,J) - L = 1 - J - IX = JX - IY = JY - DO 110 I = J + 1,MIN(N,J+K) - IX = IX + INCX - IY = IY + INCY - Y(IY) = Y(IY) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + A(L+I,J)*X(IX) - 110 CONTINUE - Y(JY) = Y(JY) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - 120 CONTINUE - END IF - END IF -* - RETURN -* -* End of DSBMV . -* - END diff --git a/external/eigen3/blas/dspmv.f b/external/eigen3/blas/dspmv.f deleted file mode 100644 index f6e121e..0000000 --- a/external/eigen3/blas/dspmv.f +++ /dev/null @@ -1,265 +0,0 @@ - SUBROUTINE DSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY) -* .. Scalar Arguments .. - DOUBLE PRECISION ALPHA,BETA - INTEGER INCX,INCY,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - DOUBLE PRECISION AP(*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* DSPMV performs the matrix-vector operation -* -* y := alpha*A*x + beta*y, -* -* where alpha and beta are scalars, x and y are n element vectors and -* A is an n by n symmetric matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - DOUBLE PRECISION. -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* AP - DOUBLE PRECISION array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the symmetric matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. -* Unchanged on exit. -* -* X - DOUBLE PRECISION array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* BETA - DOUBLE PRECISION. -* On entry, BETA specifies the scalar beta. When BETA is -* supplied as zero then Y need not be set on input. -* Unchanged on exit. -* -* Y - DOUBLE PRECISION array of dimension at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the n -* element vector y. On exit, Y is overwritten by the updated -* vector y. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ONE,ZERO - PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) -* .. -* .. Local Scalars .. - DOUBLE PRECISION TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 6 - ELSE IF (INCY.EQ.0) THEN - INFO = 9 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('DSPMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN -* -* Set up the start points in X and Y. -* - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* -* First form y := beta*y. -* - IF (BETA.NE.ONE) THEN - IF (INCY.EQ.1) THEN - IF (BETA.EQ.ZERO) THEN - DO 10 I = 1,N - Y(I) = ZERO - 10 CONTINUE - ELSE - DO 20 I = 1,N - Y(I) = BETA*Y(I) - 20 CONTINUE - END IF - ELSE - IY = KY - IF (BETA.EQ.ZERO) THEN - DO 30 I = 1,N - Y(IY) = ZERO - IY = IY + INCY - 30 CONTINUE - ELSE - DO 40 I = 1,N - Y(IY) = BETA*Y(IY) - IY = IY + INCY - 40 CONTINUE - END IF - END IF - END IF - IF (ALPHA.EQ.ZERO) RETURN - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form y when AP contains the upper triangle. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - K = KK - DO 50 I = 1,J - 1 - Y(I) = Y(I) + TEMP1*AP(K) - TEMP2 = TEMP2 + AP(K)*X(I) - K = K + 1 - 50 CONTINUE - Y(J) = Y(J) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2 - KK = KK + J - 60 CONTINUE - ELSE - JX = KX - JY = KY - DO 80 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - IX = KX - IY = KY - DO 70 K = KK,KK + J - 2 - Y(IY) = Y(IY) + TEMP1*AP(K) - TEMP2 = TEMP2 + AP(K)*X(IX) - IX = IX + INCX - IY = IY + INCY - 70 CONTINUE - Y(JY) = Y(JY) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - KK = KK + J - 80 CONTINUE - END IF - ELSE -* -* Form y when AP contains the lower triangle. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 100 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - Y(J) = Y(J) + TEMP1*AP(KK) - K = KK + 1 - DO 90 I = J + 1,N - Y(I) = Y(I) + TEMP1*AP(K) - TEMP2 = TEMP2 + AP(K)*X(I) - K = K + 1 - 90 CONTINUE - Y(J) = Y(J) + ALPHA*TEMP2 - KK = KK + (N-J+1) - 100 CONTINUE - ELSE - JX = KX - JY = KY - DO 120 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - Y(JY) = Y(JY) + TEMP1*AP(KK) - IX = JX - IY = JY - DO 110 K = KK + 1,KK + N - J - IX = IX + INCX - IY = IY + INCY - Y(IY) = Y(IY) + TEMP1*AP(K) - TEMP2 = TEMP2 + AP(K)*X(IX) - 110 CONTINUE - Y(JY) = Y(JY) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - KK = KK + (N-J+1) - 120 CONTINUE - END IF - END IF -* - RETURN -* -* End of DSPMV . -* - END diff --git a/external/eigen3/blas/dtbmv.f b/external/eigen3/blas/dtbmv.f deleted file mode 100644 index a87ffde..0000000 --- a/external/eigen3/blas/dtbmv.f +++ /dev/null @@ -1,335 +0,0 @@ - SUBROUTINE DTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,K,LDA,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - DOUBLE PRECISION A(LDA,*),X(*) -* .. -* -* Purpose -* ======= -* -* DTBMV performs one of the matrix-vector operations -* -* x := A*x, or x := A'*x, -* -* where x is an n element vector and A is an n by n unit, or non-unit, -* upper or lower triangular band matrix, with ( k + 1 ) diagonals. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the operation to be performed as -* follows: -* -* TRANS = 'N' or 'n' x := A*x. -* -* TRANS = 'T' or 't' x := A'*x. -* -* TRANS = 'C' or 'c' x := A'*x. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* K - INTEGER. -* On entry with UPLO = 'U' or 'u', K specifies the number of -* super-diagonals of the matrix A. -* On entry with UPLO = 'L' or 'l', K specifies the number of -* sub-diagonals of the matrix A. -* K must satisfy 0 .le. K. -* Unchanged on exit. -* -* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). -* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) -* by n part of the array A must contain the upper triangular -* band part of the matrix of coefficients, supplied column by -* column, with the leading diagonal of the matrix in row -* ( k + 1 ) of the array, the first super-diagonal starting at -* position 2 in row k, and so on. The top left k by k triangle -* of the array A is not referenced. -* The following program segment will transfer an upper -* triangular band matrix from conventional full matrix storage -* to band storage: -* -* DO 20, J = 1, N -* M = K + 1 - J -* DO 10, I = MAX( 1, J - K ), J -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) -* by n part of the array A must contain the lower triangular -* band part of the matrix of coefficients, supplied column by -* column, with the leading diagonal of the matrix in row 1 of -* the array, the first sub-diagonal starting at position 1 in -* row 2, and so on. The bottom right k by k triangle of the -* array A is not referenced. -* The following program segment will transfer a lower -* triangular band matrix from conventional full matrix storage -* to band storage: -* -* DO 20, J = 1, N -* M = 1 - J -* DO 10, I = J, MIN( N, J + K ) -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Note that when DIAG = 'U' or 'u' the elements of the array A -* corresponding to the diagonal elements of the matrix are not -* referenced, but are assumed to be unity. -* Unchanged on exit. -* -* LDA - INTEGER. -* On entry, LDA specifies the first dimension of A as declared -* in the calling (sub) program. LDA must be at least -* ( k + 1 ). -* Unchanged on exit. -* -* X - DOUBLE PRECISION array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. On exit, X is overwritten with the -* tranformed vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ZERO - PARAMETER (ZERO=0.0D+0) -* .. -* .. Local Scalars .. - DOUBLE PRECISION TEMP - INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L - LOGICAL NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC MAX,MIN -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (K.LT.0) THEN - INFO = 5 - ELSE IF (LDA.LT. (K+1)) THEN - INFO = 7 - ELSE IF (INCX.EQ.0) THEN - INFO = 9 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('DTBMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of A are -* accessed sequentially with one pass through A. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x := A*x. -* - IF (LSAME(UPLO,'U')) THEN - KPLUS1 = K + 1 - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - L = KPLUS1 - J - DO 10 I = MAX(1,J-K),J - 1 - X(I) = X(I) + TEMP*A(L+I,J) - 10 CONTINUE - IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J) - END IF - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - L = KPLUS1 - J - DO 30 I = MAX(1,J-K),J - 1 - X(IX) = X(IX) + TEMP*A(L+I,J) - IX = IX + INCX - 30 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J) - END IF - JX = JX + INCX - IF (J.GT.K) KX = KX + INCX - 40 CONTINUE - END IF - ELSE - IF (INCX.EQ.1) THEN - DO 60 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - L = 1 - J - DO 50 I = MIN(N,J+K),J + 1,-1 - X(I) = X(I) + TEMP*A(L+I,J) - 50 CONTINUE - IF (NOUNIT) X(J) = X(J)*A(1,J) - END IF - 60 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 80 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - L = 1 - J - DO 70 I = MIN(N,J+K),J + 1,-1 - X(IX) = X(IX) + TEMP*A(L+I,J) - IX = IX - INCX - 70 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*A(1,J) - END IF - JX = JX - INCX - IF ((N-J).GE.K) KX = KX - INCX - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := A'*x. -* - IF (LSAME(UPLO,'U')) THEN - KPLUS1 = K + 1 - IF (INCX.EQ.1) THEN - DO 100 J = N,1,-1 - TEMP = X(J) - L = KPLUS1 - J - IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J) - DO 90 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + A(L+I,J)*X(I) - 90 CONTINUE - X(J) = TEMP - 100 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 120 J = N,1,-1 - TEMP = X(JX) - KX = KX - INCX - IX = KX - L = KPLUS1 - J - IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J) - DO 110 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + A(L+I,J)*X(IX) - IX = IX - INCX - 110 CONTINUE - X(JX) = TEMP - JX = JX - INCX - 120 CONTINUE - END IF - ELSE - IF (INCX.EQ.1) THEN - DO 140 J = 1,N - TEMP = X(J) - L = 1 - J - IF (NOUNIT) TEMP = TEMP*A(1,J) - DO 130 I = J + 1,MIN(N,J+K) - TEMP = TEMP + A(L+I,J)*X(I) - 130 CONTINUE - X(J) = TEMP - 140 CONTINUE - ELSE - JX = KX - DO 160 J = 1,N - TEMP = X(JX) - KX = KX + INCX - IX = KX - L = 1 - J - IF (NOUNIT) TEMP = TEMP*A(1,J) - DO 150 I = J + 1,MIN(N,J+K) - TEMP = TEMP + A(L+I,J)*X(IX) - IX = IX + INCX - 150 CONTINUE - X(JX) = TEMP - JX = JX + INCX - 160 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of DTBMV . -* - END diff --git a/external/eigen3/blas/f2c/chbmv.c b/external/eigen3/blas/f2c/chbmv.c new file mode 100644 index 0000000..f218fe3 --- /dev/null +++ b/external/eigen3/blas/f2c/chbmv.c @@ -0,0 +1,487 @@ +/* chbmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int chbmv_(char *uplo, integer *n, integer *k, complex * + alpha, complex *a, integer *lda, complex *x, integer *incx, complex * + beta, complex *y, integer *incy, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + integer i__, j, l, ix, iy, jx, jy, kx, ky, info; + complex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kplus1; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* CHBMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n hermitian band matrix, with k super-diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the band matrix A is being supplied as */ +/* follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* being supplied. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* being supplied. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry, K specifies the number of super-diagonals of the */ +/* matrix A. K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* ALPHA - COMPLEX . */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - COMPLEX array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the hermitian matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer the upper */ +/* triangular part of a hermitian band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the hermitian matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer the lower */ +/* triangular part of a hermitian band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Note that the imaginary parts of the diagonal elements need */ +/* not be set and are assumed to be zero. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - COMPLEX array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - COMPLEX . */ +/* On entry, BETA specifies the scalar beta. */ +/* Unchanged on exit. */ + +/* Y - COMPLEX array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the */ +/* vector y. On exit, Y is overwritten by the updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*k < 0) { + info = 3; + } else if (*lda < *k + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("CHBMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && + beta->i == 0.f))) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array A */ +/* are accessed sequentially with one pass through A. */ + +/* First form y := beta*y. */ + + if (beta->r != 1.f || beta->i != 0.f) { + if (*incy == 1) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0.f, y[i__2].i = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when upper triangle of A is stored. */ + + kplus1 = *k + 1; + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + i__2 = i__; + i__3 = i__; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__2 = i__; + q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i = + q__3.r * x[i__2].i + q__3.i * x[i__2].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L50: */ + } + i__4 = j; + i__2 = j; + i__3 = kplus1 + j * a_dim1; + r__1 = a[i__3].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__4].r = q__1.r, y[i__4].i = q__1.i; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__4 = jx; + q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i = + alpha->r * x[i__4].i + alpha->i * x[i__4].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + ix = kx; + iy = ky; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + i__4 = iy; + i__2 = iy; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; + y[i__4].r = q__1.r, y[i__4].i = q__1.i; + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = + q__3.r * x[i__4].i + q__3.i * x[i__4].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ix += *incx; + iy += *incy; +/* L70: */ + } + i__3 = jy; + i__4 = jy; + i__2 = kplus1 + j * a_dim1; + r__1 = a[i__2].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + jx += *incx; + jy += *incy; + if (j > *k) { + kx += *incx; + ky += *incy; + } +/* L80: */ + } + } + } else { + +/* Form y when lower triangle of A is stored. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__3 = j; + q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = + alpha->r * x[i__3].i + alpha->i * x[i__3].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__3 = j; + i__4 = j; + i__2 = j * a_dim1 + 1; + r__1 = a[i__2].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__; + i__2 = i__; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; + y[i__4].r = q__1.r, y[i__4].i = q__1.i; + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = i__; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = + q__3.r * x[i__4].i + q__3.i * x[i__4].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L90: */ + } + i__3 = j; + i__4 = j; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__3 = jx; + q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i = + alpha->r * x[i__3].i + alpha->i * x[i__3].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__3 = jy; + i__4 = jy; + i__2 = j * a_dim1 + 1; + r__1 = a[i__2].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + l = 1 - j; + ix = jx; + iy = jy; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + ix += *incx; + iy += *incy; + i__4 = iy; + i__2 = iy; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i; + y[i__4].r = q__1.r, y[i__4].i = q__1.i; + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i = + q__3.r * x[i__4].i + q__3.i * x[i__4].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L110: */ + } + i__3 = jy; + i__4 = jy; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of CHBMV . */ + +} /* chbmv_ */ + diff --git a/external/eigen3/blas/f2c/chpmv.c b/external/eigen3/blas/f2c/chpmv.c new file mode 100644 index 0000000..65bab1c --- /dev/null +++ b/external/eigen3/blas/f2c/chpmv.c @@ -0,0 +1,438 @@ +/* chpmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int chpmv_(char *uplo, integer *n, complex *alpha, complex * + ap, complex *x, integer *incx, complex *beta, complex *y, integer * + incy, ftnlen uplo_len) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4, i__5; + real r__1; + complex q__1, q__2, q__3, q__4; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + complex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* CHPMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n hermitian matrix, supplied in packed form. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the matrix A is supplied in the packed */ +/* array AP as follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* supplied in AP. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* supplied in AP. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - COMPLEX . */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* AP - COMPLEX array of DIMENSION at least */ +/* ( ( n*( n + 1 ) )/2 ). */ +/* Before entry with UPLO = 'U' or 'u', the array AP must */ +/* contain the upper triangular part of the hermitian matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ +/* and a( 2, 2 ) respectively, and so on. */ +/* Before entry with UPLO = 'L' or 'l', the array AP must */ +/* contain the lower triangular part of the hermitian matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ +/* and a( 3, 1 ) respectively, and so on. */ +/* Note that the imaginary parts of the diagonal elements need */ +/* not be set and are assumed to be zero. */ +/* Unchanged on exit. */ + +/* X - COMPLEX array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - COMPLEX . */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - COMPLEX array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. On exit, Y is overwritten by the updated */ +/* vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --y; + --x; + --ap; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 6; + } else if (*incy == 0) { + info = 9; + } + if (info != 0) { + xerbla_("CHPMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && + beta->i == 0.f))) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array AP */ +/* are accessed sequentially with one pass through AP. */ + +/* First form y := beta*y. */ + + if (beta->r != 1.f || beta->i != 0.f) { + if (*incy == 1) { + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0.f, y[i__2].i = 0.f; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0.f && beta->i == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0.f, y[i__2].i = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + q__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0.f && alpha->i == 0.f) { + return 0; + } + kk = 1; + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when AP contains the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, + q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &ap[k]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ++k; +/* L50: */ + } + i__2 = j; + i__3 = j; + i__4 = kk + j - 1; + r__1 = ap[i__4].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + kk += j; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + ix = kx; + iy = ky; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + i__3 = iy; + i__4 = iy; + i__5 = k; + q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, + q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &ap[k]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ix += *incx; + iy += *incy; +/* L70: */ + } + i__2 = jy; + i__3 = jy; + i__4 = kk + j - 1; + r__1 = ap[i__4].r; + q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i; + q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i; + q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jx += *incx; + jy += *incy; + kk += j; +/* L80: */ + } + } + } else { + +/* Form y when AP contains the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = j; + i__3 = j; + i__4 = kk; + r__1 = ap[i__4].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, + q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &ap[k]); + i__3 = i__; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; + ++k; +/* L90: */ + } + i__2 = j; + i__3 = j; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + kk += *n - j + 1; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = q__1.r, temp1.i = q__1.i; + temp2.r = 0.f, temp2.i = 0.f; + i__2 = jy; + i__3 = jy; + i__4 = kk; + r__1 = ap[i__4].r; + q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = k; + q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, + q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] + .r; + q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i; + y[i__3].r = q__1.r, y[i__3].i = q__1.i; + r_cnjg(&q__3, &ap[k]); + i__3 = ix; + q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i = + q__3.r * x[i__3].i + q__3.i * x[i__3].r; + q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i; + temp2.r = q__1.r, temp2.i = q__1.i; +/* L110: */ + } + i__2 = jy; + i__3 = jy; + q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i; + y[i__2].r = q__1.r, y[i__2].i = q__1.i; + jx += *incx; + jy += *incy; + kk += *n - j + 1; +/* L120: */ + } + } + } + + return 0; + +/* End of CHPMV . */ + +} /* chpmv_ */ + diff --git a/external/eigen3/blas/f2c/complexdots.c b/external/eigen3/blas/f2c/complexdots.c new file mode 100644 index 0000000..a856a23 --- /dev/null +++ b/external/eigen3/blas/f2c/complexdots.c @@ -0,0 +1,84 @@ +/* This file has been modified to use the standard gfortran calling + convention, rather than the f2c calling convention. + + It does not require -ff2c when compiled with gfortran. +*/ + +/* complexdots.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +complex cdotc_(integer *n, complex *cx, integer + *incx, complex *cy, integer *incy) +{ + complex res; + extern /* Subroutine */ int cdotcw_(integer *, complex *, integer *, + complex *, integer *, complex *); + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + cdotcw_(n, &cx[1], incx, &cy[1], incy, &res); + return res; +} /* cdotc_ */ + +complex cdotu_(integer *n, complex *cx, integer + *incx, complex *cy, integer *incy) +{ + complex res; + extern /* Subroutine */ int cdotuw_(integer *, complex *, integer *, + complex *, integer *, complex *); + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + cdotuw_(n, &cx[1], incx, &cy[1], incy, &res); + return res; +} /* cdotu_ */ + +doublecomplex zdotc_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy) +{ + doublecomplex res; + extern /* Subroutine */ int zdotcw_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *); + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + zdotcw_(n, &cx[1], incx, &cy[1], incy, &res); + return res; +} /* zdotc_ */ + +doublecomplex zdotu_(integer *n, doublecomplex *cx, integer *incx, + doublecomplex *cy, integer *incy) +{ + doublecomplex res; + extern /* Subroutine */ int zdotuw_(integer *, doublecomplex *, integer *, + doublecomplex *, integer *, doublecomplex *); + + /* Parameter adjustments */ + --cy; + --cx; + + /* Function Body */ + zdotuw_(n, &cx[1], incx, &cy[1], incy, &res); + return res; +} /* zdotu_ */ + diff --git a/external/eigen3/blas/f2c/ctbmv.c b/external/eigen3/blas/f2c/ctbmv.c new file mode 100644 index 0000000..790fd58 --- /dev/null +++ b/external/eigen3/blas/f2c/ctbmv.c @@ -0,0 +1,647 @@ +/* ctbmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int ctbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, complex *a, integer *lda, complex *x, integer *incx, + ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + complex q__1, q__2, q__3; + + /* Builtin functions */ + void r_cnjg(complex *, complex *); + + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + complex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kplus1; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* CTBMV performs one of the matrix-vector operations */ + +/* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ + +/* where x is an n element vector and A is an n by n unit, or non-unit, */ +/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' x := A*x. */ + +/* TRANS = 'T' or 't' x := A'*x. */ + +/* TRANS = 'C' or 'c' x := conjg( A' )*x. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry with UPLO = 'U' or 'u', K specifies the number of */ +/* super-diagonals of the matrix A. */ +/* On entry with UPLO = 'L' or 'l', K specifies the number of */ +/* sub-diagonals of the matrix A. */ +/* K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* A - COMPLEX array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer an upper */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer a lower */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Note that when DIAG = 'U' or 'u' the elements of the array A */ +/* corresponding to the diagonal elements of the matrix are not */ +/* referenced, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - COMPLEX array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. On exit, X is overwritten with the */ +/* tranformed vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + "N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + xerbla_("CTBMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0.f || x[i__2].i != 0.f) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + i__2 = i__; + i__3 = i__; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + + q__2.i; + x[i__2].r = q__1.r, x[i__2].i = q__1.i; +/* L10: */ + } + if (nounit) { + i__4 = j; + i__2 = j; + i__3 = kplus1 + j * a_dim1; + q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, q__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__4].r = q__1.r, x[i__4].i = q__1.i; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__4 = jx; + if (x[i__4].r != 0.f || x[i__4].i != 0.f) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + ix = kx; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + i__4 = ix; + i__2 = ix; + i__5 = l + i__ + j * a_dim1; + q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + q__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i + + q__2.i; + x[i__4].r = q__1.r, x[i__4].i = q__1.i; + ix += *incx; +/* L30: */ + } + if (nounit) { + i__3 = jx; + i__4 = jx; + i__2 = kplus1 + j * a_dim1; + q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[ + i__2].i, q__1.i = x[i__4].r * a[i__2].i + + x[i__4].i * a[i__2].r; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } + } + jx += *incx; + if (j > *k) { + kx += *incx; + } +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0.f || x[i__1].i != 0.f) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + l = 1 - j; +/* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { + i__1 = i__; + i__3 = i__; + i__2 = l + i__ + j * a_dim1; + q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__2.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + + q__2.i; + x[i__1].r = q__1.r, x[i__1].i = q__1.i; +/* L50: */ + } + if (nounit) { + i__4 = j; + i__1 = j; + i__3 = j * a_dim1 + 1; + q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[ + i__3].i, q__1.i = x[i__1].r * a[i__3].i + + x[i__1].i * a[i__3].r; + x[i__4].r = q__1.r, x[i__4].i = q__1.i; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__4 = jx; + if (x[i__4].r != 0.f || x[i__4].i != 0.f) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + ix = kx; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { + i__4 = ix; + i__1 = ix; + i__2 = l + i__ + j * a_dim1; + q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + q__2.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i + + q__2.i; + x[i__4].r = q__1.r, x[i__4].i = q__1.i; + ix -= *incx; +/* L70: */ + } + if (nounit) { + i__3 = jx; + i__4 = jx; + i__1 = j * a_dim1 + 1; + q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[ + i__1].i, q__1.i = x[i__4].r * a[i__1].i + + x[i__4].i * a[i__1].r; + x[i__3].r = q__1.r, x[i__3].i = q__1.i; + } + } + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x or x := conjg( A' )*x. */ + + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__3 = j; + temp.r = x[i__3].r, temp.i = x[i__3].i; + l = kplus1 - j; + if (noconj) { + if (nounit) { + i__3 = kplus1 + j * a_dim1; + q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + q__1.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + i__4 = l + i__ + j * a_dim1; + i__1 = i__; + q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[ + i__1].i, q__2.i = a[i__4].r * x[i__1].i + + a[i__4].i * x[i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L90: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = i__; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, + q__2.i = q__3.r * x[i__4].i + q__3.i * x[ + i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L100: */ + } + } + i__3 = j; + x[i__3].r = temp.r, x[i__3].i = temp.i; +/* L110: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__3 = jx; + temp.r = x[i__3].r, temp.i = x[i__3].i; + kx -= *incx; + ix = kx; + l = kplus1 - j; + if (noconj) { + if (nounit) { + i__3 = kplus1 + j * a_dim1; + q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + q__1.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + temp.r = q__1.r, temp.i = q__1.i; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + i__4 = l + i__ + j * a_dim1; + i__1 = ix; + q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[ + i__1].i, q__2.i = a[i__4].r * x[i__1].i + + a[i__4].i * x[i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix -= *incx; +/* L120: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, + q__2.i = q__3.r * x[i__4].i + q__3.i * x[ + i__4].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix -= *incx; +/* L130: */ + } + } + i__3 = jx; + x[i__3].r = temp.r, x[i__3].i = temp.i; + jx -= *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = j; + temp.r = x[i__4].r, temp.i = x[i__4].i; + l = 1 - j; + if (noconj) { + if (nounit) { + i__4 = j * a_dim1 + 1; + q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + q__1.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + temp.r = q__1.r, temp.i = q__1.i; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + i__1 = l + i__ + j * a_dim1; + i__2 = i__; + q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, q__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L150: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j * a_dim1 + 1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__1 = i__; + q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, + q__2.i = q__3.r * x[i__1].i + q__3.i * x[ + i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; +/* L160: */ + } + } + i__4 = j; + x[i__4].r = temp.r, x[i__4].i = temp.i; +/* L170: */ + } + } else { + jx = kx; + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + kx += *incx; + ix = kx; + l = 1 - j; + if (noconj) { + if (nounit) { + i__4 = j * a_dim1 + 1; + q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + q__1.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + temp.r = q__1.r, temp.i = q__1.i; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + i__1 = l + i__ + j * a_dim1; + i__2 = ix; + q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, q__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L180: */ + } + } else { + if (nounit) { + r_cnjg(&q__2, &a[j * a_dim1 + 1]); + q__1.r = temp.r * q__2.r - temp.i * q__2.i, + q__1.i = temp.r * q__2.i + temp.i * + q__2.r; + temp.r = q__1.r, temp.i = q__1.i; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); + i__1 = ix; + q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, + q__2.i = q__3.r * x[i__1].i + q__3.i * x[ + i__1].r; + q__1.r = temp.r + q__2.r, q__1.i = temp.i + + q__2.i; + temp.r = q__1.r, temp.i = q__1.i; + ix += *incx; +/* L190: */ + } + } + i__4 = jx; + x[i__4].r = temp.r, x[i__4].i = temp.i; + jx += *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of CTBMV . */ + +} /* ctbmv_ */ + diff --git a/external/eigen3/blas/f2c/d_cnjg.c b/external/eigen3/blas/f2c/d_cnjg.c new file mode 100644 index 0000000..623090c --- /dev/null +++ b/external/eigen3/blas/f2c/d_cnjg.c @@ -0,0 +1,6 @@ +#include "datatypes.h" + +void d_cnjg(doublecomplex *r, doublecomplex *z) { + r->r = z->r; + r->i = -(z->i); +} diff --git a/external/eigen3/blas/f2c/datatypes.h b/external/eigen3/blas/f2c/datatypes.h new file mode 100644 index 0000000..63232b2 --- /dev/null +++ b/external/eigen3/blas/f2c/datatypes.h @@ -0,0 +1,24 @@ +/* This contains a limited subset of the typedefs exposed by f2c + for use by the Eigen BLAS C-only implementation. +*/ + +#ifndef __EIGEN_DATATYPES_H__ +#define __EIGEN_DATATYPES_H__ + +typedef int integer; +typedef unsigned int uinteger; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +typedef int ftnlen; +typedef int logical; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (doublereal)abs(x) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (doublereal)min(a,b) +#define dmax(a,b) (doublereal)max(a,b) + +#endif diff --git a/external/eigen3/blas/f2c/drotm.c b/external/eigen3/blas/f2c/drotm.c new file mode 100644 index 0000000..17a779b --- /dev/null +++ b/external/eigen3/blas/f2c/drotm.c @@ -0,0 +1,215 @@ +/* drotm.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int drotm_(integer *n, doublereal *dx, integer *incx, + doublereal *dy, integer *incy, doublereal *dparam) +{ + /* Initialized data */ + + static doublereal zero = 0.; + static doublereal two = 2.; + + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__; + doublereal w, z__; + integer kx, ky; + doublereal dh11, dh12, dh21, dh22, dflag; + integer nsteps; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ + +/* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */ +/* (DY**T) */ + +/* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ +/* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */ +/* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + +/* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ + +/* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ +/* H=( ) ( ) ( ) ( ) */ +/* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ +/* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */ + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* number of elements in input vector(s) */ + +/* DX (input/output) DOUBLE PRECISION array, dimension N */ +/* double precision vector with N elements */ + +/* INCX (input) INTEGER */ +/* storage spacing between elements of DX */ + +/* DY (input/output) DOUBLE PRECISION array, dimension N */ +/* double precision vector with N elements */ + +/* INCY (input) INTEGER */ +/* storage spacing between elements of DY */ + +/* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ +/* DPARAM(1)=DFLAG */ +/* DPARAM(2)=DH11 */ +/* DPARAM(3)=DH21 */ +/* DPARAM(4)=DH12 */ +/* DPARAM(5)=DH22 */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Data statements .. */ + /* Parameter adjustments */ + --dparam; + --dy; + --dx; + + /* Function Body */ +/* .. */ + + dflag = dparam[1]; + if (*n <= 0 || dflag + two == zero) { + goto L140; + } + if (! (*incx == *incy && *incx > 0)) { + goto L70; + } + + nsteps = *n * *incx; + if (dflag < 0.) { + goto L50; + } else if (dflag == 0) { + goto L10; + } else { + goto L30; + } +L10: + dh12 = dparam[4]; + dh21 = dparam[3]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w + z__ * dh12; + dy[i__] = w * dh21 + z__; +/* L20: */ + } + goto L140; +L30: + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = nsteps; + i__1 = *incx; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__; + dy[i__] = -w + dh22 * z__; +/* L40: */ + } + goto L140; +L50: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = dx[i__]; + z__ = dy[i__]; + dx[i__] = w * dh11 + z__ * dh12; + dy[i__] = w * dh21 + z__ * dh22; +/* L60: */ + } + goto L140; +L70: + kx = 1; + ky = 1; + if (*incx < 0) { + kx = (1 - *n) * *incx + 1; + } + if (*incy < 0) { + ky = (1 - *n) * *incy + 1; + } + + if (dflag < 0.) { + goto L120; + } else if (dflag == 0) { + goto L80; + } else { + goto L100; + } +L80: + dh12 = dparam[4]; + dh21 = dparam[3]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w + z__ * dh12; + dy[ky] = w * dh21 + z__; + kx += *incx; + ky += *incy; +/* L90: */ + } + goto L140; +L100: + dh11 = dparam[2]; + dh22 = dparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__; + dy[ky] = -w + dh22 * z__; + kx += *incx; + ky += *incy; +/* L110: */ + } + goto L140; +L120: + dh11 = dparam[2]; + dh12 = dparam[4]; + dh21 = dparam[3]; + dh22 = dparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = dx[kx]; + z__ = dy[ky]; + dx[kx] = w * dh11 + z__ * dh12; + dy[ky] = w * dh21 + z__ * dh22; + kx += *incx; + ky += *incy; +/* L130: */ + } +L140: + return 0; +} /* drotm_ */ + diff --git a/external/eigen3/blas/f2c/drotmg.c b/external/eigen3/blas/f2c/drotmg.c new file mode 100644 index 0000000..a63eb10 --- /dev/null +++ b/external/eigen3/blas/f2c/drotmg.c @@ -0,0 +1,293 @@ +/* drotmg.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int drotmg_(doublereal *dd1, doublereal *dd2, doublereal * + dx1, doublereal *dy1, doublereal *dparam) +{ + /* Initialized data */ + + static doublereal zero = 0.; + static doublereal one = 1.; + static doublereal two = 2.; + static doublereal gam = 4096.; + static doublereal gamsq = 16777216.; + static doublereal rgamsq = 5.9604645e-8; + + /* Format strings */ + static char fmt_120[] = ""; + static char fmt_150[] = ""; + static char fmt_180[] = ""; + static char fmt_210[] = ""; + + /* System generated locals */ + doublereal d__1; + + /* Local variables */ + doublereal du, dp1, dp2, dq1, dq2, dh11, dh12, dh21, dh22; + integer igo; + doublereal dflag, dtemp; + + /* Assigned format variables */ + static char *igo_fmt; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ +/* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* */ +/* DY2)**T. */ +/* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + +/* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ + +/* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ +/* H=( ) ( ) ( ) ( ) */ +/* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ +/* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */ +/* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */ +/* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */ + +/* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ +/* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ +/* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ + + +/* Arguments */ +/* ========= */ + +/* DD1 (input/output) DOUBLE PRECISION */ + +/* DD2 (input/output) DOUBLE PRECISION */ + +/* DX1 (input/output) DOUBLE PRECISION */ + +/* DY1 (input) DOUBLE PRECISION */ + +/* DPARAM (input/output) DOUBLE PRECISION array, dimension 5 */ +/* DPARAM(1)=DFLAG */ +/* DPARAM(2)=DH11 */ +/* DPARAM(3)=DH21 */ +/* DPARAM(4)=DH12 */ +/* DPARAM(5)=DH22 */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Data statements .. */ + + /* Parameter adjustments */ + --dparam; + + /* Function Body */ +/* .. */ + if (! (*dd1 < zero)) { + goto L10; + } +/* GO ZERO-H-D-AND-DX1.. */ + goto L60; +L10: +/* CASE-DD1-NONNEGATIVE */ + dp2 = *dd2 * *dy1; + if (! (dp2 == zero)) { + goto L20; + } + dflag = -two; + goto L260; +/* REGULAR-CASE.. */ +L20: + dp1 = *dd1 * *dx1; + dq2 = dp2 * *dy1; + dq1 = dp1 * *dx1; + + if (! (abs(dq1) > abs(dq2))) { + goto L40; + } + dh21 = -(*dy1) / *dx1; + dh12 = dp2 / dp1; + + du = one - dh12 * dh21; + + if (! (du <= zero)) { + goto L30; + } +/* GO ZERO-H-D-AND-DX1.. */ + goto L60; +L30: + dflag = zero; + *dd1 /= du; + *dd2 /= du; + *dx1 *= du; +/* GO SCALE-CHECK.. */ + goto L100; +L40: + if (! (dq2 < zero)) { + goto L50; + } +/* GO ZERO-H-D-AND-DX1.. */ + goto L60; +L50: + dflag = one; + dh11 = dp1 / dp2; + dh22 = *dx1 / *dy1; + du = one + dh11 * dh22; + dtemp = *dd2 / du; + *dd2 = *dd1 / du; + *dd1 = dtemp; + *dx1 = *dy1 * du; +/* GO SCALE-CHECK */ + goto L100; +/* PROCEDURE..ZERO-H-D-AND-DX1.. */ +L60: + dflag = -one; + dh11 = zero; + dh12 = zero; + dh21 = zero; + dh22 = zero; + + *dd1 = zero; + *dd2 = zero; + *dx1 = zero; +/* RETURN.. */ + goto L220; +/* PROCEDURE..FIX-H.. */ +L70: + if (! (dflag >= zero)) { + goto L90; + } + + if (! (dflag == zero)) { + goto L80; + } + dh11 = one; + dh22 = one; + dflag = -one; + goto L90; +L80: + dh21 = -one; + dh12 = one; + dflag = -one; +L90: + switch (igo) { + case 0: goto L120; + case 1: goto L150; + case 2: goto L180; + case 3: goto L210; + } +/* PROCEDURE..SCALE-CHECK */ +L100: +L110: + if (! (*dd1 <= rgamsq)) { + goto L130; + } + if (*dd1 == zero) { + goto L160; + } + igo = 0; + igo_fmt = fmt_120; +/* FIX-H.. */ + goto L70; +L120: +/* Computing 2nd power */ + d__1 = gam; + *dd1 *= d__1 * d__1; + *dx1 /= gam; + dh11 /= gam; + dh12 /= gam; + goto L110; +L130: +L140: + if (! (*dd1 >= gamsq)) { + goto L160; + } + igo = 1; + igo_fmt = fmt_150; +/* FIX-H.. */ + goto L70; +L150: +/* Computing 2nd power */ + d__1 = gam; + *dd1 /= d__1 * d__1; + *dx1 *= gam; + dh11 *= gam; + dh12 *= gam; + goto L140; +L160: +L170: + if (! (abs(*dd2) <= rgamsq)) { + goto L190; + } + if (*dd2 == zero) { + goto L220; + } + igo = 2; + igo_fmt = fmt_180; +/* FIX-H.. */ + goto L70; +L180: +/* Computing 2nd power */ + d__1 = gam; + *dd2 *= d__1 * d__1; + dh21 /= gam; + dh22 /= gam; + goto L170; +L190: +L200: + if (! (abs(*dd2) >= gamsq)) { + goto L220; + } + igo = 3; + igo_fmt = fmt_210; +/* FIX-H.. */ + goto L70; +L210: +/* Computing 2nd power */ + d__1 = gam; + *dd2 /= d__1 * d__1; + dh21 *= gam; + dh22 *= gam; + goto L200; +L220: + if (dflag < 0.) { + goto L250; + } else if (dflag == 0) { + goto L230; + } else { + goto L240; + } +L230: + dparam[3] = dh21; + dparam[4] = dh12; + goto L260; +L240: + dparam[2] = dh11; + dparam[5] = dh22; + goto L260; +L250: + dparam[2] = dh11; + dparam[3] = dh21; + dparam[4] = dh12; + dparam[5] = dh22; +L260: + dparam[1] = dflag; + return 0; +} /* drotmg_ */ + diff --git a/external/eigen3/blas/f2c/dsbmv.c b/external/eigen3/blas/f2c/dsbmv.c new file mode 100644 index 0000000..c6b4b21 --- /dev/null +++ b/external/eigen3/blas/f2c/dsbmv.c @@ -0,0 +1,366 @@ +/* dsbmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int dsbmv_(char *uplo, integer *n, integer *k, doublereal * + alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, + doublereal *beta, doublereal *y, integer *incy, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kplus1; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSBMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n symmetric band matrix, with k super-diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the band matrix A is being supplied as */ +/* follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* being supplied. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* being supplied. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry, K specifies the number of super-diagonals of the */ +/* matrix A. K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the symmetric matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer the upper */ +/* triangular part of a symmetric band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the symmetric matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer the lower */ +/* triangular part of a symmetric band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the */ +/* vector y. On exit, Y is overwritten by the updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*k < 0) { + info = 3; + } else if (*lda < *k + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("DSBMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. && *beta == 1.)) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array A */ +/* are accessed sequentially with one pass through A. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when upper triangle of A is stored. */ + + kplus1 = *k + 1; + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; +/* L50: */ + } + y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * + temp2; + jx += *incx; + jy += *incy; + if (j > *k) { + kx += *incx; + ky += *incy; + } +/* L80: */ + } + } + } else { + +/* Form y when lower triangle of A is stored. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[j] += *alpha * temp2; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; + ix = jx; + iy = jy; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of DSBMV . */ + +} /* dsbmv_ */ + diff --git a/external/eigen3/blas/f2c/dspmv.c b/external/eigen3/blas/f2c/dspmv.c new file mode 100644 index 0000000..0b4e92d --- /dev/null +++ b/external/eigen3/blas/f2c/dspmv.c @@ -0,0 +1,316 @@ +/* dspmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int dspmv_(char *uplo, integer *n, doublereal *alpha, + doublereal *ap, doublereal *x, integer *incx, doublereal *beta, + doublereal *y, integer *incy, ftnlen uplo_len) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + doublereal temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DSPMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n symmetric matrix, supplied in packed form. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the matrix A is supplied in the packed */ +/* array AP as follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* supplied in AP. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* supplied in AP. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - DOUBLE PRECISION. */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* AP - DOUBLE PRECISION array of DIMENSION at least */ +/* ( ( n*( n + 1 ) )/2 ). */ +/* Before entry with UPLO = 'U' or 'u', the array AP must */ +/* contain the upper triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ +/* and a( 2, 2 ) respectively, and so on. */ +/* Before entry with UPLO = 'L' or 'l', the array AP must */ +/* contain the lower triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ +/* and a( 3, 1 ) respectively, and so on. */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - DOUBLE PRECISION. */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. On exit, Y is overwritten by the updated */ +/* vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --y; + --x; + --ap; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 6; + } else if (*incy == 0) { + info = 9; + } + if (info != 0) { + xerbla_("DSPMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0. && *beta == 1.)) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array AP */ +/* are accessed sequentially with one pass through AP. */ + +/* First form y := beta*y. */ + + if (*beta != 1.) { + if (*incy == 1) { + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.) { + return 0; + } + kk = 1; + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when AP contains the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; +/* L50: */ + } + y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; + kk += j; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + ix = kx; + iy = ky; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; + jx += *incx; + jy += *incy; + kk += j; +/* L80: */ + } + } + } else { + +/* Form y when AP contains the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.; + y[j] += temp1 * ap[kk]; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; +/* L90: */ + } + y[j] += *alpha * temp2; + kk += *n - j + 1; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.; + y[jy] += temp1 * ap[kk]; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; + kk += *n - j + 1; +/* L120: */ + } + } + } + + return 0; + +/* End of DSPMV . */ + +} /* dspmv_ */ + diff --git a/external/eigen3/blas/f2c/dtbmv.c b/external/eigen3/blas/f2c/dtbmv.c new file mode 100644 index 0000000..fdf73eb --- /dev/null +++ b/external/eigen3/blas/f2c/dtbmv.c @@ -0,0 +1,428 @@ +/* dtbmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int dtbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx, + ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + doublereal temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kplus1; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* DTBMV performs one of the matrix-vector operations */ + +/* x := A*x, or x := A'*x, */ + +/* where x is an n element vector and A is an n by n unit, or non-unit, */ +/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' x := A*x. */ + +/* TRANS = 'T' or 't' x := A'*x. */ + +/* TRANS = 'C' or 'c' x := A'*x. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry with UPLO = 'U' or 'u', K specifies the number of */ +/* super-diagonals of the matrix A. */ +/* On entry with UPLO = 'L' or 'l', K specifies the number of */ +/* sub-diagonals of the matrix A. */ +/* K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer an upper */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer a lower */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Note that when DIAG = 'U' or 'u' the elements of the array A */ +/* corresponding to the diagonal elements of the matrix are not */ +/* referenced, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - DOUBLE PRECISION array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. On exit, X is overwritten with the */ +/* tranformed vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + "N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + xerbla_("DTBMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.) { + temp = x[j]; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[kplus1 + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[kplus1 + j * a_dim1]; + } + } + jx += *incx; + if (j > *k) { + kx += *incx; + } +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.) { + temp = x[j]; + l = 1 - j; +/* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j * a_dim1 + 1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.) { + temp = x[jx]; + ix = kx; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j * a_dim1 + 1]; + } + } + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + kx -= *incx; + ix = kx; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix -= *incx; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[j]; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[jx]; + kx += *incx; + ix = kx; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of DTBMV . */ + +} /* dtbmv_ */ + diff --git a/external/eigen3/blas/f2c/lsame.c b/external/eigen3/blas/f2c/lsame.c new file mode 100644 index 0000000..46324d9 --- /dev/null +++ b/external/eigen3/blas/f2c/lsame.c @@ -0,0 +1,117 @@ +/* lsame.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +logical lsame_(char *ca, char *cb, ftnlen ca_len, ftnlen cb_len) +{ + /* System generated locals */ + logical ret_val; + + /* Local variables */ + integer inta, intb, zcode; + + +/* -- LAPACK auxiliary routine (version 3.1) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ +/* November 2006 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ +/* case. */ + +/* Arguments */ +/* ========= */ + +/* CA (input) CHARACTER*1 */ + +/* CB (input) CHARACTER*1 */ +/* CA and CB specify the single characters to be compared. */ + +/* ===================================================================== */ + +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ + +/* Test if the characters are equal */ + + ret_val = *(unsigned char *)ca == *(unsigned char *)cb; + if (ret_val) { + return ret_val; + } + +/* Now test for equivalence if both characters are alphabetic. */ + + zcode = 'Z'; + +/* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ +/* machines, on which ICHAR returns a value with bit 8 set. */ +/* ICHAR('A') on Prime machines returns 193 which is the same as */ +/* ICHAR('A') on an EBCDIC machine. */ + + inta = *(unsigned char *)ca; + intb = *(unsigned char *)cb; + + if (zcode == 90 || zcode == 122) { + +/* ASCII is assumed - ZCODE is the ASCII code of either lower or */ +/* upper case 'Z'. */ + + if (inta >= 97 && inta <= 122) { + inta += -32; + } + if (intb >= 97 && intb <= 122) { + intb += -32; + } + + } else if (zcode == 233 || zcode == 169) { + +/* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ +/* upper case 'Z'. */ + + if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || + (inta >= 162 && inta <= 169)) { + inta += 64; + } + if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || + (intb >= 162 && intb <= 169)) { + intb += 64; + } + + } else if (zcode == 218 || zcode == 250) { + +/* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ +/* plus 128 of either lower or upper case 'Z'. */ + + if (inta >= 225 && inta <= 250) { + inta += -32; + } + if (intb >= 225 && intb <= 250) { + intb += -32; + } + } + ret_val = inta == intb; + +/* RETURN */ + +/* End of LSAME */ + + return ret_val; +} /* lsame_ */ + diff --git a/external/eigen3/blas/f2c/r_cnjg.c b/external/eigen3/blas/f2c/r_cnjg.c new file mode 100644 index 0000000..c08182f --- /dev/null +++ b/external/eigen3/blas/f2c/r_cnjg.c @@ -0,0 +1,6 @@ +#include "datatypes.h" + +void r_cnjg(complex *r, complex *z) { + r->r = z->r; + r->i = -(z->i); +} diff --git a/external/eigen3/blas/f2c/srotm.c b/external/eigen3/blas/f2c/srotm.c new file mode 100644 index 0000000..bd5944a --- /dev/null +++ b/external/eigen3/blas/f2c/srotm.c @@ -0,0 +1,216 @@ +/* srotm.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int srotm_(integer *n, real *sx, integer *incx, real *sy, + integer *incy, real *sparam) +{ + /* Initialized data */ + + static real zero = 0.f; + static real two = 2.f; + + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__; + real w, z__; + integer kx, ky; + real sh11, sh12, sh21, sh22, sflag; + integer nsteps; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ + +/* (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */ +/* (DX**T) */ + +/* SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ +/* LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */ +/* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + +/* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ + +/* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ +/* H=( ) ( ) ( ) ( ) */ +/* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ +/* SEE SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */ + + +/* Arguments */ +/* ========= */ + +/* N (input) INTEGER */ +/* number of elements in input vector(s) */ + +/* SX (input/output) REAL array, dimension N */ +/* double precision vector with N elements */ + +/* INCX (input) INTEGER */ +/* storage spacing between elements of SX */ + +/* SY (input/output) REAL array, dimension N */ +/* double precision vector with N elements */ + +/* INCY (input) INTEGER */ +/* storage spacing between elements of SY */ + +/* SPARAM (input/output) REAL array, dimension 5 */ +/* SPARAM(1)=SFLAG */ +/* SPARAM(2)=SH11 */ +/* SPARAM(3)=SH21 */ +/* SPARAM(4)=SH12 */ +/* SPARAM(5)=SH22 */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Data statements .. */ + /* Parameter adjustments */ + --sparam; + --sy; + --sx; + + /* Function Body */ +/* .. */ + + sflag = sparam[1]; + if (*n <= 0 || sflag + two == zero) { + goto L140; + } + if (! (*incx == *incy && *incx > 0)) { + goto L70; + } + + nsteps = *n * *incx; + if (sflag < 0.f) { + goto L50; + } else if (sflag == 0) { + goto L10; + } else { + goto L30; + } +L10: + sh12 = sparam[4]; + sh21 = sparam[3]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = sx[i__]; + z__ = sy[i__]; + sx[i__] = w + z__ * sh12; + sy[i__] = w * sh21 + z__; +/* L20: */ + } + goto L140; +L30: + sh11 = sparam[2]; + sh22 = sparam[5]; + i__2 = nsteps; + i__1 = *incx; + for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { + w = sx[i__]; + z__ = sy[i__]; + sx[i__] = w * sh11 + z__; + sy[i__] = -w + sh22 * z__; +/* L40: */ + } + goto L140; +L50: + sh11 = sparam[2]; + sh12 = sparam[4]; + sh21 = sparam[3]; + sh22 = sparam[5]; + i__1 = nsteps; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + w = sx[i__]; + z__ = sy[i__]; + sx[i__] = w * sh11 + z__ * sh12; + sy[i__] = w * sh21 + z__ * sh22; +/* L60: */ + } + goto L140; +L70: + kx = 1; + ky = 1; + if (*incx < 0) { + kx = (1 - *n) * *incx + 1; + } + if (*incy < 0) { + ky = (1 - *n) * *incy + 1; + } + + if (sflag < 0.f) { + goto L120; + } else if (sflag == 0) { + goto L80; + } else { + goto L100; + } +L80: + sh12 = sparam[4]; + sh21 = sparam[3]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = sx[kx]; + z__ = sy[ky]; + sx[kx] = w + z__ * sh12; + sy[ky] = w * sh21 + z__; + kx += *incx; + ky += *incy; +/* L90: */ + } + goto L140; +L100: + sh11 = sparam[2]; + sh22 = sparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = sx[kx]; + z__ = sy[ky]; + sx[kx] = w * sh11 + z__; + sy[ky] = -w + sh22 * z__; + kx += *incx; + ky += *incy; +/* L110: */ + } + goto L140; +L120: + sh11 = sparam[2]; + sh12 = sparam[4]; + sh21 = sparam[3]; + sh22 = sparam[5]; + i__2 = *n; + for (i__ = 1; i__ <= i__2; ++i__) { + w = sx[kx]; + z__ = sy[ky]; + sx[kx] = w * sh11 + z__ * sh12; + sy[ky] = w * sh21 + z__ * sh22; + kx += *incx; + ky += *incy; +/* L130: */ + } +L140: + return 0; +} /* srotm_ */ + diff --git a/external/eigen3/blas/f2c/srotmg.c b/external/eigen3/blas/f2c/srotmg.c new file mode 100644 index 0000000..75f789f --- /dev/null +++ b/external/eigen3/blas/f2c/srotmg.c @@ -0,0 +1,295 @@ +/* srotmg.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real + *sparam) +{ + /* Initialized data */ + + static real zero = 0.f; + static real one = 1.f; + static real two = 2.f; + static real gam = 4096.f; + static real gamsq = 16777200.f; + static real rgamsq = 5.96046e-8f; + + /* Format strings */ + static char fmt_120[] = ""; + static char fmt_150[] = ""; + static char fmt_180[] = ""; + static char fmt_210[] = ""; + + /* System generated locals */ + real r__1; + + /* Local variables */ + real su, sp1, sp2, sq1, sq2, sh11, sh12, sh21, sh22; + integer igo; + real sflag, stemp; + + /* Assigned format variables */ + static char *igo_fmt; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ +/* THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(SD1)*SX1,SQRT(SD2)* */ +/* SY2)**T. */ +/* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ + +/* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ + +/* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ +/* H=( ) ( ) ( ) ( ) */ +/* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ +/* LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */ +/* RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */ +/* VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */ + +/* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ +/* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ +/* OF SD1 AND SD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ + + +/* Arguments */ +/* ========= */ + + +/* SD1 (input/output) REAL */ + +/* SD2 (input/output) REAL */ + +/* SX1 (input/output) REAL */ + +/* SY1 (input) REAL */ + + +/* SPARAM (input/output) REAL array, dimension 5 */ +/* SPARAM(1)=SFLAG */ +/* SPARAM(2)=SH11 */ +/* SPARAM(3)=SH21 */ +/* SPARAM(4)=SH12 */ +/* SPARAM(5)=SH22 */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Data statements .. */ + + /* Parameter adjustments */ + --sparam; + + /* Function Body */ +/* .. */ + if (! (*sd1 < zero)) { + goto L10; + } +/* GO ZERO-H-D-AND-SX1.. */ + goto L60; +L10: +/* CASE-SD1-NONNEGATIVE */ + sp2 = *sd2 * *sy1; + if (! (sp2 == zero)) { + goto L20; + } + sflag = -two; + goto L260; +/* REGULAR-CASE.. */ +L20: + sp1 = *sd1 * *sx1; + sq2 = sp2 * *sy1; + sq1 = sp1 * *sx1; + + if (! (dabs(sq1) > dabs(sq2))) { + goto L40; + } + sh21 = -(*sy1) / *sx1; + sh12 = sp2 / sp1; + + su = one - sh12 * sh21; + + if (! (su <= zero)) { + goto L30; + } +/* GO ZERO-H-D-AND-SX1.. */ + goto L60; +L30: + sflag = zero; + *sd1 /= su; + *sd2 /= su; + *sx1 *= su; +/* GO SCALE-CHECK.. */ + goto L100; +L40: + if (! (sq2 < zero)) { + goto L50; + } +/* GO ZERO-H-D-AND-SX1.. */ + goto L60; +L50: + sflag = one; + sh11 = sp1 / sp2; + sh22 = *sx1 / *sy1; + su = one + sh11 * sh22; + stemp = *sd2 / su; + *sd2 = *sd1 / su; + *sd1 = stemp; + *sx1 = *sy1 * su; +/* GO SCALE-CHECK */ + goto L100; +/* PROCEDURE..ZERO-H-D-AND-SX1.. */ +L60: + sflag = -one; + sh11 = zero; + sh12 = zero; + sh21 = zero; + sh22 = zero; + + *sd1 = zero; + *sd2 = zero; + *sx1 = zero; +/* RETURN.. */ + goto L220; +/* PROCEDURE..FIX-H.. */ +L70: + if (! (sflag >= zero)) { + goto L90; + } + + if (! (sflag == zero)) { + goto L80; + } + sh11 = one; + sh22 = one; + sflag = -one; + goto L90; +L80: + sh21 = -one; + sh12 = one; + sflag = -one; +L90: + switch (igo) { + case 0: goto L120; + case 1: goto L150; + case 2: goto L180; + case 3: goto L210; + } +/* PROCEDURE..SCALE-CHECK */ +L100: +L110: + if (! (*sd1 <= rgamsq)) { + goto L130; + } + if (*sd1 == zero) { + goto L160; + } + igo = 0; + igo_fmt = fmt_120; +/* FIX-H.. */ + goto L70; +L120: +/* Computing 2nd power */ + r__1 = gam; + *sd1 *= r__1 * r__1; + *sx1 /= gam; + sh11 /= gam; + sh12 /= gam; + goto L110; +L130: +L140: + if (! (*sd1 >= gamsq)) { + goto L160; + } + igo = 1; + igo_fmt = fmt_150; +/* FIX-H.. */ + goto L70; +L150: +/* Computing 2nd power */ + r__1 = gam; + *sd1 /= r__1 * r__1; + *sx1 *= gam; + sh11 *= gam; + sh12 *= gam; + goto L140; +L160: +L170: + if (! (dabs(*sd2) <= rgamsq)) { + goto L190; + } + if (*sd2 == zero) { + goto L220; + } + igo = 2; + igo_fmt = fmt_180; +/* FIX-H.. */ + goto L70; +L180: +/* Computing 2nd power */ + r__1 = gam; + *sd2 *= r__1 * r__1; + sh21 /= gam; + sh22 /= gam; + goto L170; +L190: +L200: + if (! (dabs(*sd2) >= gamsq)) { + goto L220; + } + igo = 3; + igo_fmt = fmt_210; +/* FIX-H.. */ + goto L70; +L210: +/* Computing 2nd power */ + r__1 = gam; + *sd2 /= r__1 * r__1; + sh21 *= gam; + sh22 *= gam; + goto L200; +L220: + if (sflag < 0.f) { + goto L250; + } else if (sflag == 0) { + goto L230; + } else { + goto L240; + } +L230: + sparam[3] = sh21; + sparam[4] = sh12; + goto L260; +L240: + sparam[2] = sh11; + sparam[5] = sh22; + goto L260; +L250: + sparam[2] = sh11; + sparam[3] = sh21; + sparam[4] = sh12; + sparam[5] = sh22; +L260: + sparam[1] = sflag; + return 0; +} /* srotmg_ */ + diff --git a/external/eigen3/blas/f2c/ssbmv.c b/external/eigen3/blas/f2c/ssbmv.c new file mode 100644 index 0000000..8599325 --- /dev/null +++ b/external/eigen3/blas/f2c/ssbmv.c @@ -0,0 +1,368 @@ +/* ssbmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int ssbmv_(char *uplo, integer *n, integer *k, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy, ftnlen uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ix, iy, jx, jy, kx, ky, info; + real temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kplus1; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SSBMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n symmetric band matrix, with k super-diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the band matrix A is being supplied as */ +/* follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* being supplied. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* being supplied. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry, K specifies the number of super-diagonals of the */ +/* matrix A. K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* ALPHA - REAL . */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - REAL array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the symmetric matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer the upper */ +/* triangular part of a symmetric band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the symmetric matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer the lower */ +/* triangular part of a symmetric band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - REAL array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - REAL . */ +/* On entry, BETA specifies the scalar beta. */ +/* Unchanged on exit. */ + +/* Y - REAL array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the */ +/* vector y. On exit, Y is overwritten by the updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*k < 0) { + info = 3; + } else if (*lda < *k + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("SSBMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array A */ +/* are accessed sequentially with one pass through A. */ + +/* First form y := beta*y. */ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.f) { + return 0; + } + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when upper triangle of A is stored. */ + + kplus1 = *k + 1; + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; +/* L50: */ + } + y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + ix = kx; + iy = ky; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * + temp2; + jx += *incx; + jy += *incy; + if (j > *k) { + kx += *incx; + ky += *incy; + } +/* L80: */ + } + } + } else { + +/* Form y when lower triangle of A is stored. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + y[j] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + y[i__] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[j] += *alpha * temp2; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + y[jy] += temp1 * a[j * a_dim1 + 1]; + l = 1 - j; + ix = jx; + iy = jy; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * a[l + i__ + j * a_dim1]; + temp2 += a[l + i__ + j * a_dim1] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of SSBMV . */ + +} /* ssbmv_ */ + diff --git a/external/eigen3/blas/f2c/sspmv.c b/external/eigen3/blas/f2c/sspmv.c new file mode 100644 index 0000000..47858ec --- /dev/null +++ b/external/eigen3/blas/f2c/sspmv.c @@ -0,0 +1,316 @@ +/* sspmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int sspmv_(char *uplo, integer *n, real *alpha, real *ap, + real *x, integer *incx, real *beta, real *y, integer *incy, ftnlen + uplo_len) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + real temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SSPMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n symmetric matrix, supplied in packed form. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the matrix A is supplied in the packed */ +/* array AP as follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* supplied in AP. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* supplied in AP. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - REAL . */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* AP - REAL array of DIMENSION at least */ +/* ( ( n*( n + 1 ) )/2 ). */ +/* Before entry with UPLO = 'U' or 'u', the array AP must */ +/* contain the upper triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ +/* and a( 2, 2 ) respectively, and so on. */ +/* Before entry with UPLO = 'L' or 'l', the array AP must */ +/* contain the lower triangular part of the symmetric matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ +/* and a( 3, 1 ) respectively, and so on. */ +/* Unchanged on exit. */ + +/* X - REAL array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - REAL . */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - REAL array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. On exit, Y is overwritten by the updated */ +/* vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --y; + --x; + --ap; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 6; + } else if (*incy == 0) { + info = 9; + } + if (info != 0) { + xerbla_("SSPMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array AP */ +/* are accessed sequentially with one pass through AP. */ + +/* First form y := beta*y. */ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.f) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.f) { + return 0; + } + kk = 1; + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when AP contains the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; +/* L50: */ + } + y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; + kk += j; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + ix = kx; + iy = ky; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; + ix += *incx; + iy += *incy; +/* L70: */ + } + y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; + jx += *incx; + jy += *incy; + kk += j; +/* L80: */ + } + } + } else { + +/* Form y when AP contains the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[j]; + temp2 = 0.f; + y[j] += temp1 * ap[kk]; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + y[i__] += temp1 * ap[k]; + temp2 += ap[k] * x[i__]; + ++k; +/* L90: */ + } + y[j] += *alpha * temp2; + kk += *n - j + 1; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * x[jx]; + temp2 = 0.f; + y[jy] += temp1 * ap[kk]; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + iy += *incy; + y[iy] += temp1 * ap[k]; + temp2 += ap[k] * x[ix]; +/* L110: */ + } + y[jy] += *alpha * temp2; + jx += *incx; + jy += *incy; + kk += *n - j + 1; +/* L120: */ + } + } + } + + return 0; + +/* End of SSPMV . */ + +} /* sspmv_ */ + diff --git a/external/eigen3/blas/f2c/stbmv.c b/external/eigen3/blas/f2c/stbmv.c new file mode 100644 index 0000000..fcf9ce3 --- /dev/null +++ b/external/eigen3/blas/f2c/stbmv.c @@ -0,0 +1,428 @@ +/* stbmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int stbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, real *a, integer *lda, real *x, integer *incx, ftnlen + uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + real temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kplus1; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* STBMV performs one of the matrix-vector operations */ + +/* x := A*x, or x := A'*x, */ + +/* where x is an n element vector and A is an n by n unit, or non-unit, */ +/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' x := A*x. */ + +/* TRANS = 'T' or 't' x := A'*x. */ + +/* TRANS = 'C' or 'c' x := A'*x. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry with UPLO = 'U' or 'u', K specifies the number of */ +/* super-diagonals of the matrix A. */ +/* On entry with UPLO = 'L' or 'l', K specifies the number of */ +/* sub-diagonals of the matrix A. */ +/* K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* A - REAL array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer an upper */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer a lower */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Note that when DIAG = 'U' or 'u' the elements of the array A */ +/* corresponding to the diagonal elements of the matrix are not */ +/* referenced, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - REAL array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. On exit, X is overwritten with the */ +/* tranformed vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + "N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + xerbla_("STBMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[j] != 0.f) { + temp = x[j]; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; +/* L10: */ + } + if (nounit) { + x[j] *= a[kplus1 + j * a_dim1]; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = x[jx]; + ix = kx; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix += *incx; +/* L30: */ + } + if (nounit) { + x[jx] *= a[kplus1 + j * a_dim1]; + } + } + jx += *incx; + if (j > *k) { + kx += *incx; + } +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + if (x[j] != 0.f) { + temp = x[j]; + l = 1 - j; +/* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { + x[i__] += temp * a[l + i__ + j * a_dim1]; +/* L50: */ + } + if (nounit) { + x[j] *= a[j * a_dim1 + 1]; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + if (x[jx] != 0.f) { + temp = x[jx]; + ix = kx; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { + x[ix] += temp * a[l + i__ + j * a_dim1]; + ix -= *incx; +/* L70: */ + } + if (nounit) { + x[jx] *= a[j * a_dim1 + 1]; + } + } + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x. */ + + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + temp = x[j]; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + x[j] = temp; +/* L100: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + temp = x[jx]; + kx -= *incx; + ix = kx; + l = kplus1 - j; + if (nounit) { + temp *= a[kplus1 + j * a_dim1]; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix -= *incx; +/* L110: */ + } + x[jx] = temp; + jx -= *incx; +/* L120: */ + } + } + } else { + if (*incx == 1) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[j]; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[i__]; +/* L130: */ + } + x[j] = temp; +/* L140: */ + } + } else { + jx = kx; + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + temp = x[jx]; + kx += *incx; + ix = kx; + l = 1 - j; + if (nounit) { + temp *= a[j * a_dim1 + 1]; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + temp += a[l + i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L150: */ + } + x[jx] = temp; + jx += *incx; +/* L160: */ + } + } + } + } + + return 0; + +/* End of STBMV . */ + +} /* stbmv_ */ + diff --git a/external/eigen3/blas/f2c/zhbmv.c b/external/eigen3/blas/f2c/zhbmv.c new file mode 100644 index 0000000..42da13d --- /dev/null +++ b/external/eigen3/blas/f2c/zhbmv.c @@ -0,0 +1,488 @@ +/* zhbmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int zhbmv_(char *uplo, integer *n, integer *k, doublecomplex + *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer * + incx, doublecomplex *beta, doublecomplex *y, integer *incy, ftnlen + uplo_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, l, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kplus1; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ZHBMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n hermitian band matrix, with k super-diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the band matrix A is being supplied as */ +/* follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* being supplied. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* being supplied. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry, K specifies the number of super-diagonals of the */ +/* matrix A. K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* ALPHA - COMPLEX*16 . */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the hermitian matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer the upper */ +/* triangular part of a hermitian band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the hermitian matrix, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer the lower */ +/* triangular part of a hermitian band matrix from conventional */ +/* full matrix storage to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Note that the imaginary parts of the diagonal elements need */ +/* not be set and are assumed to be zero. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - COMPLEX*16 array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the */ +/* vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - COMPLEX*16 . */ +/* On entry, BETA specifies the scalar beta. */ +/* Unchanged on exit. */ + +/* Y - COMPLEX*16 array of DIMENSION at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the */ +/* vector y. On exit, Y is overwritten by the updated vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*k < 0) { + info = 3; + } else if (*lda < *k + 1) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("ZHBMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && + beta->i == 0.))) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array A */ +/* are accessed sequentially with one pass through A. */ + +/* First form y := beta*y. */ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when upper triangle of A is stored. */ + + kplus1 = *k + 1; + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + i__2 = i__; + i__3 = i__; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__2 = i__; + z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i = + z__3.r * x[i__2].i + z__3.i * x[i__2].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L50: */ + } + i__4 = j; + i__2 = j; + i__3 = kplus1 + j * a_dim1; + d__1 = a[i__3].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__4].r = z__1.r, y[i__4].i = z__1.i; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__4 = jx; + z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i = + alpha->r * x[i__4].i + alpha->i * x[i__4].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + i__4 = iy; + i__2 = iy; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; + y[i__4].r = z__1.r, y[i__4].i = z__1.i; + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = + z__3.r * x[i__4].i + z__3.i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; +/* L70: */ + } + i__3 = jy; + i__4 = jy; + i__2 = kplus1 + j * a_dim1; + d__1 = a[i__2].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + jx += *incx; + jy += *incy; + if (j > *k) { + kx += *incx; + ky += *incy; + } +/* L80: */ + } + } + } else { + +/* Form y when lower triangle of A is stored. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__3 = j; + z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = + alpha->r * x[i__3].i + alpha->i * x[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__3 = j; + i__4 = j; + i__2 = j * a_dim1 + 1; + d__1 = a[i__2].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + i__4 = i__; + i__2 = i__; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; + y[i__4].r = z__1.r, y[i__4].i = z__1.i; + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = i__; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = + z__3.r * x[i__4].i + z__3.i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L90: */ + } + i__3 = j; + i__4 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__3 = jx; + z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i = + alpha->r * x[i__3].i + alpha->i * x[i__3].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__3 = jy; + i__4 = jy; + i__2 = j * a_dim1 + 1; + d__1 = a[i__2].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + l = 1 - j; + ix = jx; + iy = jy; +/* Computing MIN */ + i__4 = *n, i__2 = j + *k; + i__3 = min(i__4,i__2); + for (i__ = j + 1; i__ <= i__3; ++i__) { + ix += *incx; + iy += *incy; + i__4 = iy; + i__2 = iy; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, + z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5] + .r; + z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i; + y[i__4].r = z__1.r, y[i__4].i = z__1.i; + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i = + z__3.r * x[i__4].i + z__3.i * x[i__4].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L110: */ + } + i__3 = jy; + i__4 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + jx += *incx; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of ZHBMV . */ + +} /* zhbmv_ */ + diff --git a/external/eigen3/blas/f2c/zhpmv.c b/external/eigen3/blas/f2c/zhpmv.c new file mode 100644 index 0000000..fbe2f42 --- /dev/null +++ b/external/eigen3/blas/f2c/zhpmv.c @@ -0,0 +1,438 @@ +/* zhpmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int zhpmv_(char *uplo, integer *n, doublecomplex *alpha, + doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex * + beta, doublecomplex *y, integer *incy, ftnlen uplo_len) +{ + /* System generated locals */ + integer i__1, i__2, i__3, i__4, i__5; + doublereal d__1; + doublecomplex z__1, z__2, z__3, z__4; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info; + doublecomplex temp1, temp2; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ZHPMV performs the matrix-vector operation */ + +/* y := alpha*A*x + beta*y, */ + +/* where alpha and beta are scalars, x and y are n element vectors and */ +/* A is an n by n hermitian matrix, supplied in packed form. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the upper or lower */ +/* triangular part of the matrix A is supplied in the packed */ +/* array AP as follows: */ + +/* UPLO = 'U' or 'u' The upper triangular part of A is */ +/* supplied in AP. */ + +/* UPLO = 'L' or 'l' The lower triangular part of A is */ +/* supplied in AP. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* ALPHA - COMPLEX*16 . */ +/* On entry, ALPHA specifies the scalar alpha. */ +/* Unchanged on exit. */ + +/* AP - COMPLEX*16 array of DIMENSION at least */ +/* ( ( n*( n + 1 ) )/2 ). */ +/* Before entry with UPLO = 'U' or 'u', the array AP must */ +/* contain the upper triangular part of the hermitian matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ +/* and a( 2, 2 ) respectively, and so on. */ +/* Before entry with UPLO = 'L' or 'l', the array AP must */ +/* contain the lower triangular part of the hermitian matrix */ +/* packed sequentially, column by column, so that AP( 1 ) */ +/* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ +/* and a( 3, 1 ) respectively, and so on. */ +/* Note that the imaginary parts of the diagonal elements need */ +/* not be set and are assumed to be zero. */ +/* Unchanged on exit. */ + +/* X - COMPLEX*16 array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. */ +/* Unchanged on exit. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* BETA - COMPLEX*16 . */ +/* On entry, BETA specifies the scalar beta. When BETA is */ +/* supplied as zero then Y need not be set on input. */ +/* Unchanged on exit. */ + +/* Y - COMPLEX*16 array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCY ) ). */ +/* Before entry, the incremented array Y must contain the n */ +/* element vector y. On exit, Y is overwritten by the updated */ +/* vector y. */ + +/* INCY - INTEGER. */ +/* On entry, INCY specifies the increment for the elements of */ +/* Y. INCY must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + --y; + --x; + --ap; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (*n < 0) { + info = 2; + } else if (*incx == 0) { + info = 6; + } else if (*incy == 0) { + info = 9; + } + if (info != 0) { + xerbla_("ZHPMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && + beta->i == 0.))) { + return 0; + } + +/* Set up the start points in X and Y. */ + + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (*n - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (*n - 1) * *incy; + } + +/* Start the operations. In this version the elements of the array AP */ +/* are accessed sequentially with one pass through AP. */ + +/* First form y := beta*y. */ + + if (beta->r != 1. || beta->i != 0.) { + if (*incy == 1) { + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + y[i__2].r = 0., y[i__2].i = 0.; +/* L10: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = i__; + i__3 = i__; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; +/* L20: */ + } + } + } else { + iy = ky; + if (beta->r == 0. && beta->i == 0.) { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + y[i__2].r = 0., y[i__2].i = 0.; + iy += *incy; +/* L30: */ + } + } else { + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + i__2 = iy; + i__3 = iy; + z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, + z__1.i = beta->r * y[i__3].i + beta->i * y[i__3] + .r; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + iy += *incy; +/* L40: */ + } + } + } + } + if (alpha->r == 0. && alpha->i == 0.) { + return 0; + } + kk = 1; + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + +/* Form y when AP contains the upper triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + k = kk; + i__2 = j - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, + z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &ap[k]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ++k; +/* L50: */ + } + i__2 = j; + i__3 = j; + i__4 = kk + j - 1; + d__1 = ap[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + kk += j; +/* L60: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + ix = kx; + iy = ky; + i__2 = kk + j - 2; + for (k = kk; k <= i__2; ++k) { + i__3 = iy; + i__4 = iy; + i__5 = k; + z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, + z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &ap[k]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ix += *incx; + iy += *incy; +/* L70: */ + } + i__2 = jy; + i__3 = jy; + i__4 = kk + j - 1; + d__1 = ap[i__4].r; + z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i; + z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i; + z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + kk += j; +/* L80: */ + } + } + } else { + +/* Form y when AP contains the lower triangle. */ + + if (*incx == 1 && *incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = j; + i__3 = j; + i__4 = kk; + d__1 = ap[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + k = kk + 1; + i__2 = *n; + for (i__ = j + 1; i__ <= i__2; ++i__) { + i__3 = i__; + i__4 = i__; + i__5 = k; + z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, + z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &ap[k]); + i__3 = i__; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; + ++k; +/* L90: */ + } + i__2 = j; + i__3 = j; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + kk += *n - j + 1; +/* L100: */ + } + } else { + jx = kx; + jy = ky; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = jx; + z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i = + alpha->r * x[i__2].i + alpha->i * x[i__2].r; + temp1.r = z__1.r, temp1.i = z__1.i; + temp2.r = 0., temp2.i = 0.; + i__2 = jy; + i__3 = jy; + i__4 = kk; + d__1 = ap[i__4].r; + z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + ix = jx; + iy = jy; + i__2 = kk + *n - j; + for (k = kk + 1; k <= i__2; ++k) { + ix += *incx; + iy += *incy; + i__3 = iy; + i__4 = iy; + i__5 = k; + z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, + z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5] + .r; + z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i; + y[i__3].r = z__1.r, y[i__3].i = z__1.i; + d_cnjg(&z__3, &ap[k]); + i__3 = ix; + z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i = + z__3.r * x[i__3].i + z__3.i * x[i__3].r; + z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i; + temp2.r = z__1.r, temp2.i = z__1.i; +/* L110: */ + } + i__2 = jy; + i__3 = jy; + z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = + alpha->r * temp2.i + alpha->i * temp2.r; + z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i; + y[i__2].r = z__1.r, y[i__2].i = z__1.i; + jx += *incx; + jy += *incy; + kk += *n - j + 1; +/* L120: */ + } + } + } + + return 0; + +/* End of ZHPMV . */ + +} /* zhpmv_ */ + diff --git a/external/eigen3/blas/f2c/ztbmv.c b/external/eigen3/blas/f2c/ztbmv.c new file mode 100644 index 0000000..4cdcd7f --- /dev/null +++ b/external/eigen3/blas/f2c/ztbmv.c @@ -0,0 +1,647 @@ +/* ztbmv.f -- translated by f2c (version 20100827). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "datatypes.h" + +/* Subroutine */ int ztbmv_(char *uplo, char *trans, char *diag, integer *n, + integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer + *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; + doublecomplex z__1, z__2, z__3; + + /* Builtin functions */ + void d_cnjg(doublecomplex *, doublecomplex *); + + /* Local variables */ + integer i__, j, l, ix, jx, kx, info; + doublecomplex temp; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + integer kplus1; + extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen); + logical noconj, nounit; + +/* .. Scalar Arguments .. */ +/* .. */ +/* .. Array Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* ZTBMV performs one of the matrix-vector operations */ + +/* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ + +/* where x is an n element vector and A is an n by n unit, or non-unit, */ +/* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ + +/* Arguments */ +/* ========== */ + +/* UPLO - CHARACTER*1. */ +/* On entry, UPLO specifies whether the matrix is an upper or */ +/* lower triangular matrix as follows: */ + +/* UPLO = 'U' or 'u' A is an upper triangular matrix. */ + +/* UPLO = 'L' or 'l' A is a lower triangular matrix. */ + +/* Unchanged on exit. */ + +/* TRANS - CHARACTER*1. */ +/* On entry, TRANS specifies the operation to be performed as */ +/* follows: */ + +/* TRANS = 'N' or 'n' x := A*x. */ + +/* TRANS = 'T' or 't' x := A'*x. */ + +/* TRANS = 'C' or 'c' x := conjg( A' )*x. */ + +/* Unchanged on exit. */ + +/* DIAG - CHARACTER*1. */ +/* On entry, DIAG specifies whether or not A is unit */ +/* triangular as follows: */ + +/* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ + +/* DIAG = 'N' or 'n' A is not assumed to be unit */ +/* triangular. */ + +/* Unchanged on exit. */ + +/* N - INTEGER. */ +/* On entry, N specifies the order of the matrix A. */ +/* N must be at least zero. */ +/* Unchanged on exit. */ + +/* K - INTEGER. */ +/* On entry with UPLO = 'U' or 'u', K specifies the number of */ +/* super-diagonals of the matrix A. */ +/* On entry with UPLO = 'L' or 'l', K specifies the number of */ +/* sub-diagonals of the matrix A. */ +/* K must satisfy 0 .le. K. */ +/* Unchanged on exit. */ + +/* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ +/* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ +/* by n part of the array A must contain the upper triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row */ +/* ( k + 1 ) of the array, the first super-diagonal starting at */ +/* position 2 in row k, and so on. The top left k by k triangle */ +/* of the array A is not referenced. */ +/* The following program segment will transfer an upper */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = K + 1 - J */ +/* DO 10, I = MAX( 1, J - K ), J */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ +/* by n part of the array A must contain the lower triangular */ +/* band part of the matrix of coefficients, supplied column by */ +/* column, with the leading diagonal of the matrix in row 1 of */ +/* the array, the first sub-diagonal starting at position 1 in */ +/* row 2, and so on. The bottom right k by k triangle of the */ +/* array A is not referenced. */ +/* The following program segment will transfer a lower */ +/* triangular band matrix from conventional full matrix storage */ +/* to band storage: */ + +/* DO 20, J = 1, N */ +/* M = 1 - J */ +/* DO 10, I = J, MIN( N, J + K ) */ +/* A( M + I, J ) = matrix( I, J ) */ +/* 10 CONTINUE */ +/* 20 CONTINUE */ + +/* Note that when DIAG = 'U' or 'u' the elements of the array A */ +/* corresponding to the diagonal elements of the matrix are not */ +/* referenced, but are assumed to be unity. */ +/* Unchanged on exit. */ + +/* LDA - INTEGER. */ +/* On entry, LDA specifies the first dimension of A as declared */ +/* in the calling (sub) program. LDA must be at least */ +/* ( k + 1 ). */ +/* Unchanged on exit. */ + +/* X - COMPLEX*16 array of dimension at least */ +/* ( 1 + ( n - 1 )*abs( INCX ) ). */ +/* Before entry, the incremented array X must contain the n */ +/* element vector x. On exit, X is overwritten with the */ +/* tranformed vector x. */ + +/* INCX - INTEGER. */ +/* On entry, INCX specifies the increment for the elements of */ +/* X. INCX must not be zero. */ +/* Unchanged on exit. */ + +/* Further Details */ +/* =============== */ + +/* Level 2 Blas routine. */ + +/* -- Written on 22-October-1986. */ +/* Jack Dongarra, Argonne National Lab. */ +/* Jeremy Du Croz, Nag Central Office. */ +/* Sven Hammarling, Nag Central Office. */ +/* Richard Hanson, Sandia National Labs. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ + +/* Test the input parameters. */ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + + /* Function Body */ + info = 0; + if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", ( + ftnlen)1, (ftnlen)1)) { + info = 1; + } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, + "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, ( + ftnlen)1)) { + info = 2; + } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, + "N", (ftnlen)1, (ftnlen)1)) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < *k + 1) { + info = 7; + } else if (*incx == 0) { + info = 9; + } + if (info != 0) { + xerbla_("ZTBMV ", &info, (ftnlen)6); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + + noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1); + nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1); + +/* Set up the start point in X if the increment is not unity. This */ +/* will be ( N - 1 )*INCX too small for descending loops. */ + + if (*incx <= 0) { + kx = 1 - (*n - 1) * *incx; + } else if (*incx != 1) { + kx = 1; + } + +/* Start the operations. In this version the elements of A are */ +/* accessed sequentially with one pass through A. */ + + if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) { + +/* Form x := A*x. */ + + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + kplus1 = *k + 1; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + if (x[i__2].r != 0. || x[i__2].i != 0.) { + i__2 = j; + temp.r = x[i__2].r, temp.i = x[i__2].i; + l = kplus1 - j; +/* Computing MAX */ + i__2 = 1, i__3 = j - *k; + i__4 = j - 1; + for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) { + i__2 = i__; + i__3 = i__; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__2].r = z__1.r, x[i__2].i = z__1.i; +/* L10: */ + } + if (nounit) { + i__4 = j; + i__2 = j; + i__3 = kplus1 + j * a_dim1; + z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[ + i__3].i, z__1.i = x[i__2].r * a[i__3].i + + x[i__2].i * a[i__3].r; + x[i__4].r = z__1.r, x[i__4].i = z__1.i; + } + } +/* L20: */ + } + } else { + jx = kx; + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__4 = jx; + if (x[i__4].r != 0. || x[i__4].i != 0.) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + ix = kx; + l = kplus1 - j; +/* Computing MAX */ + i__4 = 1, i__2 = j - *k; + i__3 = j - 1; + for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) { + i__4 = ix; + i__2 = ix; + i__5 = l + i__ + j * a_dim1; + z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, + z__2.i = temp.r * a[i__5].i + temp.i * a[ + i__5].r; + z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i + + z__2.i; + x[i__4].r = z__1.r, x[i__4].i = z__1.i; + ix += *incx; +/* L30: */ + } + if (nounit) { + i__3 = jx; + i__4 = jx; + i__2 = kplus1 + j * a_dim1; + z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[ + i__2].i, z__1.i = x[i__4].r * a[i__2].i + + x[i__4].i * a[i__2].r; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + jx += *incx; + if (j > *k) { + kx += *incx; + } +/* L40: */ + } + } + } else { + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__1 = j; + if (x[i__1].r != 0. || x[i__1].i != 0.) { + i__1 = j; + temp.r = x[i__1].r, temp.i = x[i__1].i; + l = 1 - j; +/* Computing MIN */ + i__1 = *n, i__3 = j + *k; + i__4 = j + 1; + for (i__ = min(i__1,i__3); i__ >= i__4; --i__) { + i__1 = i__; + i__3 = i__; + i__2 = l + i__ + j * a_dim1; + z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__2.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + + z__2.i; + x[i__1].r = z__1.r, x[i__1].i = z__1.i; +/* L50: */ + } + if (nounit) { + i__4 = j; + i__1 = j; + i__3 = j * a_dim1 + 1; + z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[ + i__3].i, z__1.i = x[i__1].r * a[i__3].i + + x[i__1].i * a[i__3].r; + x[i__4].r = z__1.r, x[i__4].i = z__1.i; + } + } +/* L60: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__4 = jx; + if (x[i__4].r != 0. || x[i__4].i != 0.) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + ix = kx; + l = 1 - j; +/* Computing MIN */ + i__4 = *n, i__1 = j + *k; + i__3 = j + 1; + for (i__ = min(i__4,i__1); i__ >= i__3; --i__) { + i__4 = ix; + i__1 = ix; + i__2 = l + i__ + j * a_dim1; + z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, + z__2.i = temp.r * a[i__2].i + temp.i * a[ + i__2].r; + z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i + + z__2.i; + x[i__4].r = z__1.r, x[i__4].i = z__1.i; + ix -= *incx; +/* L70: */ + } + if (nounit) { + i__3 = jx; + i__4 = jx; + i__1 = j * a_dim1 + 1; + z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[ + i__1].i, z__1.i = x[i__4].r * a[i__1].i + + x[i__4].i * a[i__1].r; + x[i__3].r = z__1.r, x[i__3].i = z__1.i; + } + } + jx -= *incx; + if (*n - j >= *k) { + kx -= *incx; + } +/* L80: */ + } + } + } + } else { + +/* Form x := A'*x or x := conjg( A' )*x. */ + + if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) { + kplus1 = *k + 1; + if (*incx == 1) { + for (j = *n; j >= 1; --j) { + i__3 = j; + temp.r = x[i__3].r, temp.i = x[i__3].i; + l = kplus1 - j; + if (noconj) { + if (nounit) { + i__3 = kplus1 + j * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__1.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + i__4 = l + i__ + j * a_dim1; + i__1 = i__; + z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[ + i__1].i, z__2.i = a[i__4].r * x[i__1].i + + a[i__4].i * x[i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L90: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = i__; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, + z__2.i = z__3.r * x[i__4].i + z__3.i * x[ + i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L100: */ + } + } + i__3 = j; + x[i__3].r = temp.r, x[i__3].i = temp.i; +/* L110: */ + } + } else { + kx += (*n - 1) * *incx; + jx = kx; + for (j = *n; j >= 1; --j) { + i__3 = jx; + temp.r = x[i__3].r, temp.i = x[i__3].i; + kx -= *incx; + ix = kx; + l = kplus1 - j; + if (noconj) { + if (nounit) { + i__3 = kplus1 + j * a_dim1; + z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, + z__1.i = temp.r * a[i__3].i + temp.i * a[ + i__3].r; + temp.r = z__1.r, temp.i = z__1.i; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + i__4 = l + i__ + j * a_dim1; + i__1 = ix; + z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[ + i__1].i, z__2.i = a[i__4].r * x[i__1].i + + a[i__4].i * x[i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; +/* L120: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } +/* Computing MAX */ + i__4 = 1, i__1 = j - *k; + i__3 = max(i__4,i__1); + for (i__ = j - 1; i__ >= i__3; --i__) { + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__4 = ix; + z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, + z__2.i = z__3.r * x[i__4].i + z__3.i * x[ + i__4].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix -= *incx; +/* L130: */ + } + } + i__3 = jx; + x[i__3].r = temp.r, x[i__3].i = temp.i; + jx -= *incx; +/* L140: */ + } + } + } else { + if (*incx == 1) { + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = j; + temp.r = x[i__4].r, temp.i = x[i__4].i; + l = 1 - j; + if (noconj) { + if (nounit) { + i__4 = j * a_dim1 + 1; + z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + z__1.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + temp.r = z__1.r, temp.i = z__1.i; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + i__1 = l + i__ + j * a_dim1; + i__2 = i__; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, z__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L150: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j * a_dim1 + 1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__1 = i__; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, + z__2.i = z__3.r * x[i__1].i + z__3.i * x[ + i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; +/* L160: */ + } + } + i__4 = j; + x[i__4].r = temp.r, x[i__4].i = temp.i; +/* L170: */ + } + } else { + jx = kx; + i__3 = *n; + for (j = 1; j <= i__3; ++j) { + i__4 = jx; + temp.r = x[i__4].r, temp.i = x[i__4].i; + kx += *incx; + ix = kx; + l = 1 - j; + if (noconj) { + if (nounit) { + i__4 = j * a_dim1 + 1; + z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, + z__1.i = temp.r * a[i__4].i + temp.i * a[ + i__4].r; + temp.r = z__1.r, temp.i = z__1.i; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + i__1 = l + i__ + j * a_dim1; + i__2 = ix; + z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[ + i__2].i, z__2.i = a[i__1].r * x[i__2].i + + a[i__1].i * x[i__2].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L180: */ + } + } else { + if (nounit) { + d_cnjg(&z__2, &a[j * a_dim1 + 1]); + z__1.r = temp.r * z__2.r - temp.i * z__2.i, + z__1.i = temp.r * z__2.i + temp.i * + z__2.r; + temp.r = z__1.r, temp.i = z__1.i; + } +/* Computing MIN */ + i__1 = *n, i__2 = j + *k; + i__4 = min(i__1,i__2); + for (i__ = j + 1; i__ <= i__4; ++i__) { + d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); + i__1 = ix; + z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, + z__2.i = z__3.r * x[i__1].i + z__3.i * x[ + i__1].r; + z__1.r = temp.r + z__2.r, z__1.i = temp.i + + z__2.i; + temp.r = z__1.r, temp.i = z__1.i; + ix += *incx; +/* L190: */ + } + } + i__4 = jx; + x[i__4].r = temp.r, x[i__4].i = temp.i; + jx += *incx; +/* L200: */ + } + } + } + } + + return 0; + +/* End of ZTBMV . */ + +} /* ztbmv_ */ + diff --git a/external/eigen3/blas/complexdots.f b/external/eigen3/blas/fortran/complexdots.f similarity index 100% rename from external/eigen3/blas/complexdots.f rename to external/eigen3/blas/fortran/complexdots.f diff --git a/external/eigen3/blas/level1_cplx_impl.h b/external/eigen3/blas/level1_cplx_impl.h index 283b9f8..719f5ba 100644 --- a/external/eigen3/blas/level1_cplx_impl.h +++ b/external/eigen3/blas/level1_cplx_impl.h @@ -32,45 +32,52 @@ RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),asum_)(int *n, if(*n<=0) return 0; - if(*incx==1) return vector(x,*n).unaryExpr().sum(); - else return vector(x,*n,std::abs(*incx)).unaryExpr().sum(); + if(*incx==1) return make_vector(x,*n).unaryExpr().sum(); + else return make_vector(x,*n,std::abs(*incx)).unaryExpr().sum(); } // computes a dot product of a conjugated vector with another vector. int EIGEN_BLAS_FUNC(dotcw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar* pres) { // std::cerr << "_dotc " << *n << " " << *incx << " " << *incy << "\n"; + Scalar* res = reinterpret_cast(pres); - if(*n<=0) return 0; + if(*n<=0) + { + *res = Scalar(0); + return 0; + } Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar* res = reinterpret_cast(pres); - if(*incx==1 && *incy==1) *res = (vector(x,*n).dot(vector(y,*n))); - else if(*incx>0 && *incy>0) *res = (vector(x,*n,*incx).dot(vector(y,*n,*incy))); - else if(*incx<0 && *incy>0) *res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,*incy))); - else if(*incx>0 && *incy<0) *res = (vector(x,*n,*incx).dot(vector(y,*n,-*incy).reverse())); - else if(*incx<0 && *incy<0) *res = (vector(x,*n,-*incx).reverse().dot(vector(y,*n,-*incy).reverse())); + if(*incx==1 && *incy==1) *res = (make_vector(x,*n).dot(make_vector(y,*n))); + else if(*incx>0 && *incy>0) *res = (make_vector(x,*n,*incx).dot(make_vector(y,*n,*incy))); + else if(*incx<0 && *incy>0) *res = (make_vector(x,*n,-*incx).reverse().dot(make_vector(y,*n,*incy))); + else if(*incx>0 && *incy<0) *res = (make_vector(x,*n,*incx).dot(make_vector(y,*n,-*incy).reverse())); + else if(*incx<0 && *incy<0) *res = (make_vector(x,*n,-*incx).reverse().dot(make_vector(y,*n,-*incy).reverse())); return 0; } // computes a vector-vector dot product without complex conjugation. int EIGEN_BLAS_FUNC(dotuw)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar* pres) { -// std::cerr << "_dotu " << *n << " " << *incx << " " << *incy << "\n"; + Scalar* res = reinterpret_cast(pres); - if(*n<=0) return 0; + if(*n<=0) + { + *res = Scalar(0); + return 0; + } Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar* res = reinterpret_cast(pres); - if(*incx==1 && *incy==1) *res = (vector(x,*n).cwiseProduct(vector(y,*n))).sum(); - else if(*incx>0 && *incy>0) *res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum(); - else if(*incx<0 && *incy>0) *res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum(); - else if(*incx>0 && *incy<0) *res = (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); - else if(*incx<0 && *incy<0) *res = (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); + if(*incx==1 && *incy==1) *res = (make_vector(x,*n).cwiseProduct(make_vector(y,*n))).sum(); + else if(*incx>0 && *incy>0) *res = (make_vector(x,*n,*incx).cwiseProduct(make_vector(y,*n,*incy))).sum(); + else if(*incx<0 && *incy>0) *res = (make_vector(x,*n,-*incx).reverse().cwiseProduct(make_vector(y,*n,*incy))).sum(); + else if(*incx>0 && *incy<0) *res = (make_vector(x,*n,*incx).cwiseProduct(make_vector(y,*n,-*incy).reverse())).sum(); + else if(*incx<0 && *incy<0) *res = (make_vector(x,*n,-*incx).reverse().cwiseProduct(make_vector(y,*n,-*incy).reverse())).sum(); return 0; } @@ -82,9 +89,9 @@ RealScalar EIGEN_CAT(EIGEN_CAT(REAL_SCALAR_SUFFIX,SCALAR_SUFFIX),nrm2_)(int *n, Scalar* x = reinterpret_cast(px); if(*incx==1) - return vector(x,*n).stableNorm(); + return make_vector(x,*n).stableNorm(); - return vector(x,*n,*incx).stableNorm(); + return make_vector(x,*n,*incx).stableNorm(); } int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),rot_)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) @@ -96,8 +103,8 @@ int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),rot_)(int *n, RealScal RealScalar c = *pc; RealScalar s = *ps; - StridedVectorType vx(vector(x,*n,std::abs(*incx))); - StridedVectorType vy(vector(y,*n,std::abs(*incy))); + StridedVectorType vx(make_vector(x,*n,std::abs(*incx))); + StridedVectorType vy(make_vector(y,*n,std::abs(*incy))); Reverse rvx(vx); Reverse rvy(vy); @@ -119,9 +126,8 @@ int EIGEN_CAT(EIGEN_CAT(SCALAR_SUFFIX,REAL_SCALAR_SUFFIX),scal_)(int *n, RealSca // std::cerr << "__scal " << *n << " " << alpha << " " << *incx << "\n"; - if(*incx==1) vector(x,*n) *= alpha; - else vector(x,*n,std::abs(*incx)) *= alpha; + if(*incx==1) make_vector(x,*n) *= alpha; + else make_vector(x,*n,std::abs(*incx)) *= alpha; return 0; } - diff --git a/external/eigen3/blas/level1_impl.h b/external/eigen3/blas/level1_impl.h index b08c2f6..f857bfa 100644 --- a/external/eigen3/blas/level1_impl.h +++ b/external/eigen3/blas/level1_impl.h @@ -9,19 +9,19 @@ #include "common.h" -int EIGEN_BLAS_FUNC(axpy)(int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy) +int EIGEN_BLAS_FUNC(axpy)(const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *py, const int *incy) { - Scalar* x = reinterpret_cast(px); + const Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); if(*n<=0) return 0; - if(*incx==1 && *incy==1) vector(y,*n) += alpha * vector(x,*n); - else if(*incx>0 && *incy>0) vector(y,*n,*incy) += alpha * vector(x,*n,*incx); - else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse() += alpha * vector(x,*n,*incx); - else if(*incx<0 && *incy>0) vector(y,*n,*incy) += alpha * vector(x,*n,-*incx).reverse(); - else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse() += alpha * vector(x,*n,-*incx).reverse(); + if(*incx==1 && *incy==1) make_vector(y,*n) += alpha * make_vector(x,*n); + else if(*incx>0 && *incy>0) make_vector(y,*n,*incy) += alpha * make_vector(x,*n,*incx); + else if(*incx>0 && *incy<0) make_vector(y,*n,-*incy).reverse() += alpha * make_vector(x,*n,*incx); + else if(*incx<0 && *incy>0) make_vector(y,*n,*incy) += alpha * make_vector(x,*n,-*incx).reverse(); + else if(*incx<0 && *incy<0) make_vector(y,*n,-*incy).reverse() += alpha * make_vector(x,*n,-*incx).reverse(); return 0; } @@ -35,7 +35,7 @@ int EIGEN_BLAS_FUNC(copy)(int *n, RealScalar *px, int *incx, RealScalar *py, int // be carefull, *incx==0 is allowed !! if(*incx==1 && *incy==1) - vector(y,*n) = vector(x,*n); + make_vector(y,*n) = make_vector(x,*n); else { if(*incx<0) x = x - (*n-1)*(*incx); @@ -57,27 +57,27 @@ int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amax_)(int *n, RealScalar *px, int *inc Scalar* x = reinterpret_cast(px); DenseIndex ret; - if(*incx==1) vector(x,*n).cwiseAbs().maxCoeff(&ret); - else vector(x,*n,std::abs(*incx)).cwiseAbs().maxCoeff(&ret); - return ret+1; + if(*incx==1) make_vector(x,*n).cwiseAbs().maxCoeff(&ret); + else make_vector(x,*n,std::abs(*incx)).cwiseAbs().maxCoeff(&ret); + return int(ret)+1; } int EIGEN_CAT(EIGEN_CAT(i,SCALAR_SUFFIX),amin_)(int *n, RealScalar *px, int *incx) { if(*n<=0) return 0; Scalar* x = reinterpret_cast(px); - + DenseIndex ret; - if(*incx==1) vector(x,*n).cwiseAbs().minCoeff(&ret); - else vector(x,*n,std::abs(*incx)).cwiseAbs().minCoeff(&ret); - return ret+1; + if(*incx==1) make_vector(x,*n).cwiseAbs().minCoeff(&ret); + else make_vector(x,*n,std::abs(*incx)).cwiseAbs().minCoeff(&ret); + return int(ret)+1; } int EIGEN_BLAS_FUNC(rotg)(RealScalar *pa, RealScalar *pb, RealScalar *pc, RealScalar *ps) { using std::sqrt; using std::abs; - + Scalar& a = *reinterpret_cast(pa); Scalar& b = *reinterpret_cast(pb); RealScalar* c = pc; @@ -143,8 +143,8 @@ int EIGEN_BLAS_FUNC(scal)(int *n, RealScalar *palpha, RealScalar *px, int *incx) Scalar* x = reinterpret_cast(px); Scalar alpha = *reinterpret_cast(palpha); - if(*incx==1) vector(x,*n) *= alpha; - else vector(x,*n,std::abs(*incx)) *= alpha; + if(*incx==1) make_vector(x,*n) *= alpha; + else make_vector(x,*n,std::abs(*incx)) *= alpha; return 0; } @@ -156,12 +156,11 @@ int EIGEN_BLAS_FUNC(swap)(int *n, RealScalar *px, int *incx, RealScalar *py, int Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - if(*incx==1 && *incy==1) vector(y,*n).swap(vector(x,*n)); - else if(*incx>0 && *incy>0) vector(y,*n,*incy).swap(vector(x,*n,*incx)); - else if(*incx>0 && *incy<0) vector(y,*n,-*incy).reverse().swap(vector(x,*n,*incx)); - else if(*incx<0 && *incy>0) vector(y,*n,*incy).swap(vector(x,*n,-*incx).reverse()); - else if(*incx<0 && *incy<0) vector(y,*n,-*incy).reverse().swap(vector(x,*n,-*incx).reverse()); + if(*incx==1 && *incy==1) make_vector(y,*n).swap(make_vector(x,*n)); + else if(*incx>0 && *incy>0) make_vector(y,*n,*incy).swap(make_vector(x,*n,*incx)); + else if(*incx>0 && *incy<0) make_vector(y,*n,-*incy).reverse().swap(make_vector(x,*n,*incx)); + else if(*incx<0 && *incy>0) make_vector(y,*n,*incy).swap(make_vector(x,*n,-*incx).reverse()); + else if(*incx<0 && *incy<0) make_vector(y,*n,-*incy).reverse().swap(make_vector(x,*n,-*incx).reverse()); return 1; } - diff --git a/external/eigen3/blas/level1_real_impl.h b/external/eigen3/blas/level1_real_impl.h index 8acecdf..02586d5 100644 --- a/external/eigen3/blas/level1_real_impl.h +++ b/external/eigen3/blas/level1_real_impl.h @@ -19,8 +19,8 @@ RealScalar EIGEN_BLAS_FUNC(asum)(int *n, RealScalar *px, int *incx) if(*n<=0) return 0; - if(*incx==1) return vector(x,*n).cwiseAbs().sum(); - else return vector(x,*n,std::abs(*incx)).cwiseAbs().sum(); + if(*incx==1) return make_vector(x,*n).cwiseAbs().sum(); + else return make_vector(x,*n,std::abs(*incx)).cwiseAbs().sum(); } // computes a vector-vector dot product. @@ -33,11 +33,11 @@ Scalar EIGEN_BLAS_FUNC(dot)(int *n, RealScalar *px, int *incx, RealScalar *py, i Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - if(*incx==1 && *incy==1) return (vector(x,*n).cwiseProduct(vector(y,*n))).sum(); - else if(*incx>0 && *incy>0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,*incy))).sum(); - else if(*incx<0 && *incy>0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,*incy))).sum(); - else if(*incx>0 && *incy<0) return (vector(x,*n,*incx).cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); - else if(*incx<0 && *incy<0) return (vector(x,*n,-*incx).reverse().cwiseProduct(vector(y,*n,-*incy).reverse())).sum(); + if(*incx==1 && *incy==1) return (make_vector(x,*n).cwiseProduct(make_vector(y,*n))).sum(); + else if(*incx>0 && *incy>0) return (make_vector(x,*n,*incx).cwiseProduct(make_vector(y,*n,*incy))).sum(); + else if(*incx<0 && *incy>0) return (make_vector(x,*n,-*incx).reverse().cwiseProduct(make_vector(y,*n,*incy))).sum(); + else if(*incx>0 && *incy<0) return (make_vector(x,*n,*incx).cwiseProduct(make_vector(y,*n,-*incy).reverse())).sum(); + else if(*incx<0 && *incy<0) return (make_vector(x,*n,-*incx).reverse().cwiseProduct(make_vector(y,*n,-*incy).reverse())).sum(); else return 0; } @@ -50,8 +50,8 @@ Scalar EIGEN_BLAS_FUNC(nrm2)(int *n, RealScalar *px, int *incx) Scalar* x = reinterpret_cast(px); - if(*incx==1) return vector(x,*n).stableNorm(); - else return vector(x,*n,std::abs(*incx)).stableNorm(); + if(*incx==1) return make_vector(x,*n).stableNorm(); + else return make_vector(x,*n,std::abs(*incx)).stableNorm(); } int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, RealScalar *ps) @@ -64,8 +64,8 @@ int EIGEN_BLAS_FUNC(rot)(int *n, RealScalar *px, int *incx, RealScalar *py, int Scalar c = *reinterpret_cast(pc); Scalar s = *reinterpret_cast(ps); - StridedVectorType vx(vector(x,*n,std::abs(*incx))); - StridedVectorType vy(vector(y,*n,std::abs(*incy))); + StridedVectorType vx(make_vector(x,*n,std::abs(*incx))); + StridedVectorType vy(make_vector(y,*n,std::abs(*incy))); Reverse rvx(vx); Reverse rvy(vy); diff --git a/external/eigen3/blas/level2_cplx_impl.h b/external/eigen3/blas/level2_cplx_impl.h index b850b6c..e3ce614 100644 --- a/external/eigen3/blas/level2_cplx_impl.h +++ b/external/eigen3/blas/level2_cplx_impl.h @@ -16,28 +16,22 @@ * where alpha and beta are scalars, x and y are n element vectors and * A is an n by n hermitian matrix. */ -int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) +int EIGEN_BLAS_FUNC(hemv)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, + const RealScalar *px, const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) { - typedef void (*functype)(int, const Scalar*, int, const Scalar*, int, Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_matrix_vector_product::run); - func[LO] = (internal::selfadjoint_matrix_vector_product::run); - - init = true; - } - - Scalar* a = reinterpret_cast(pa); - Scalar* x = reinterpret_cast(px); + typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_matrix_vector_product::run), + // array index: LO + (internal::selfadjoint_matrix_vector_product::run), + }; + + const Scalar* a = reinterpret_cast(pa); + const Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); // check arguments int info = 0; @@ -52,13 +46,13 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa if(*n==0) return 1; - Scalar* actual_x = get_compact_vector(x,*n,*incx); + const Scalar* actual_x = get_compact_vector(x,*n,*incx); Scalar* actual_y = get_compact_vector(y,*n,*incy); if(beta!=Scalar(1)) { - if(beta==Scalar(0)) vector(actual_y, *n).setZero(); - else vector(actual_y, *n) *= beta; + if(beta==Scalar(0)) make_vector(actual_y, *n).setZero(); + else make_vector(actual_y, *n) *= beta; } if(alpha!=Scalar(0)) @@ -67,7 +61,7 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa if(code>=2 || func[code]==0) return 0; - func[code](*n, a, *lda, actual_x, 1, actual_y, alpha); + func[code](*n, a, *lda, actual_x, actual_y, alpha); } if(actual_x!=x) delete[] actual_x; @@ -111,19 +105,12 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, RealScalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_packed_rank1_update::run); - func[LO] = (internal::selfadjoint_packed_rank1_update::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_packed_rank1_update::run), + // array index: LO + (internal::selfadjoint_packed_rank1_update::run), + }; Scalar* x = reinterpret_cast(px); Scalar* ap = reinterpret_cast(pap); @@ -162,19 +149,12 @@ int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::packed_rank2_update_selector::run); - func[LO] = (internal::packed_rank2_update_selector::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::packed_rank2_update_selector::run), + // array index: LO + (internal::packed_rank2_update_selector::run), + }; Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); @@ -217,19 +197,12 @@ int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) { typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (selfadjoint_rank1_update::run); - func[LO] = (selfadjoint_rank1_update::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (selfadjoint_rank1_update::run), + // array index: LO + (selfadjoint_rank1_update::run), + }; Scalar* x = reinterpret_cast(px); Scalar* a = reinterpret_cast(pa); @@ -271,19 +244,12 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) { typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::rank2_update_selector::run); - func[LO] = (internal::rank2_update_selector::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::rank2_update_selector::run), + // array index: LO + (internal::rank2_update_selector::run), + }; Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); diff --git a/external/eigen3/blas/level2_impl.h b/external/eigen3/blas/level2_impl.h index 5f39419..173f40b 100644 --- a/external/eigen3/blas/level2_impl.h +++ b/external/eigen3/blas/level2_impl.h @@ -9,29 +9,39 @@ #include "common.h" -int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc) +template +struct general_matrix_vector_product_wrapper { - typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int , Scalar *, int, Scalar); - static functype func[4]; - - static bool init = false; - if(!init) + static void run(Index rows, Index cols,const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsIncr, Scalar* res, Index resIncr, Scalar alpha) { - for(int k=0; k<4; ++k) - func[k] = 0; - - func[NOTR] = (internal::general_matrix_vector_product::run); - func[TR ] = (internal::general_matrix_vector_product::run); - func[ADJ ] = (internal::general_matrix_vector_product::run); - - init = true; + typedef internal::const_blas_data_mapper LhsMapper; + typedef internal::const_blas_data_mapper RhsMapper; + + internal::general_matrix_vector_product + ::run( + rows, cols, LhsMapper(lhs, lhsStride), RhsMapper(rhs, rhsIncr), res, resIncr, alpha); } +}; - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); +int EIGEN_BLAS_FUNC(gemv)(const char *opa, const int *m, const int *n, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *incb, const RealScalar *pbeta, RealScalar *pc, const int *incc) +{ + typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int , Scalar *, int, Scalar); + static const functype func[4] = { + // array index: NOTR + (general_matrix_vector_product_wrapper::run), + // array index: TR + (general_matrix_vector_product_wrapper::run), + // array index: ADJ + (general_matrix_vector_product_wrapper::run), + 0 + }; + + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); // check arguments int info = 0; @@ -53,13 +63,13 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca if(code!=NOTR) std::swap(actual_m,actual_n); - Scalar* actual_b = get_compact_vector(b,actual_n,*incb); + const Scalar* actual_b = get_compact_vector(b,actual_n,*incb); Scalar* actual_c = get_compact_vector(c,actual_m,*incc); if(beta!=Scalar(1)) { - if(beta==Scalar(0)) vector(actual_c, actual_m).setZero(); - else vector(actual_c, actual_m) *= beta; + if(beta==Scalar(0)) make_vector(actual_c, actual_m).setZero(); + else make_vector(actual_c, actual_m) *= beta; } if(code>=4 || func[code]==0) @@ -73,37 +83,41 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca return 1; } -int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) +int EIGEN_BLAS_FUNC(trsv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda, RealScalar *pb, const int *incb) { typedef void (*functype)(int, const Scalar *, int, Scalar *); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector::run); - - init = true; - } - - Scalar* a = reinterpret_cast(pa); + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::triangular_solve_vector::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::triangular_solve_vector::run), + 0 + }; + + const Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); int info = 0; @@ -128,37 +142,41 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar -int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) +int EIGEN_BLAS_FUNC(trmv)(const char *uplo, const char *opa, const char *diag, const int *n, const RealScalar *pa, const int *lda, RealScalar *pb, const int *incb) { typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar&); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product::run); - - init = true; - } - - Scalar* a = reinterpret_cast(pa); + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::triangular_matrix_vector_product::run), + 0 + }; + + const Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); int info = 0; @@ -200,13 +218,13 @@ int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) { - Scalar* a = reinterpret_cast(pa); - Scalar* x = reinterpret_cast(px); + const Scalar* a = reinterpret_cast(pa); + const Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); int coeff_rows = *kl+*ku+1; - + int info = 0; if(OP(*trans)==INVALID) info = 1; else if(*m<0) info = 2; @@ -218,26 +236,26 @@ int EIGEN_BLAS_FUNC(gbmv)(char *trans, int *m, int *n, int *kl, int *ku, RealSca else if(*incy==0) info = 13; if(info) return xerbla_(SCALAR_SUFFIX_UP"GBMV ",&info,6); - + if(*m==0 || *n==0 || (alpha==Scalar(0) && beta==Scalar(1))) return 0; - + int actual_m = *m; int actual_n = *n; if(OP(*trans)!=NOTR) std::swap(actual_m,actual_n); - - Scalar* actual_x = get_compact_vector(x,actual_n,*incx); + + const Scalar* actual_x = get_compact_vector(x,actual_n,*incx); Scalar* actual_y = get_compact_vector(y,actual_m,*incy); - + if(beta!=Scalar(1)) { - if(beta==Scalar(0)) vector(actual_y, actual_m).setZero(); - else vector(actual_y, actual_m) *= beta; + if(beta==Scalar(0)) make_vector(actual_y, actual_m).setZero(); + else make_vector(actual_y, actual_m) *= beta; } - - MatrixType mat_coeffs(a,coeff_rows,*n,*lda); - + + ConstMatrixType mat_coeffs(a,coeff_rows,*n,*lda); + int nb = std::min(*n,(*m)+(*ku)); for(int j=0; j(pa); Scalar* x = reinterpret_cast(px); int coeff_rows = *k + 1; - + int info = 0; if(UPLO(*uplo)==INVALID) info = 1; else if(OP(*opa)==INVALID) info = 2; @@ -283,37 +301,37 @@ int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, Rea else if(*incx==0) info = 9; if(info) return xerbla_(SCALAR_SUFFIX_UP"TBMV ",&info,6); - + if(*n==0) return 0; - + int actual_n = *n; - + Scalar* actual_x = get_compact_vector(x,actual_n,*incx); - + MatrixType mat_coeffs(a,coeff_rows,*n,*lda); - + int ku = UPLO(*uplo)==UPPER ? *k : 0; int kl = UPLO(*uplo)==LOWER ? *k : 0; - + for(int j=0; j<*n; ++j) { int start = std::max(0,j - ku); int end = std::min((*m)-1,j + kl); int len = end - start + 1; int offset = (ku) - j + start; - + if(OP(*trans)==NOTR) - vector(actual_y+start,len) += (alpha*actual_x[j]) * mat_coeffs.col(j).segment(offset,len); + make_vector(actual_y+start,len) += (alpha*actual_x[j]) * mat_coeffs.col(j).segment(offset,len); else if(OP(*trans)==TR) - actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).transpose() * vector(actual_x+start,len) ).value(); + actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).transpose() * make_vector(actual_x+start,len) ).value(); else - actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).adjoint() * vector(actual_x+start,len) ).value(); - } - + actual_y[j] += alpha * ( mat_coeffs.col(j).segment(offset,len).adjoint() * make_vector(actual_x+start,len) ).value(); + } + if(actual_x!=x) delete[] actual_x; if(actual_y!=y) delete[] copy_back(actual_y,y,actual_m,*incy); - + return 0; } #endif @@ -332,37 +350,41 @@ int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, Rea int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) { typedef void (*functype)(int, int, const Scalar *, int, Scalar *); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::band_solve_triangular_selector::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::band_solve_triangular_selector::run), + 0, + }; Scalar* a = reinterpret_cast(pa); Scalar* x = reinterpret_cast(px); int coeff_rows = *k+1; - + int info = 0; if(UPLO(*uplo)==INVALID) info = 1; else if(OP(*op)==INVALID) info = 2; @@ -373,22 +395,22 @@ int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, Real else if(*incx==0) info = 9; if(info) return xerbla_(SCALAR_SUFFIX_UP"TBSV ",&info,6); - + if(*n==0 || (*k==0 && DIAG(*diag)==UNIT)) return 0; - + int actual_n = *n; - + Scalar* actual_x = get_compact_vector(x,actual_n,*incx); - + int code = OP(*op) | (UPLO(*uplo) << 2) | (DIAG(*diag) << 3); if(code>=16 || func[code]==0) return 0; func[code](*n, *k, a, *lda, actual_x); - + if(actual_x!=x) delete[] copy_back(actual_x,x,actual_n,*incx); - + return 0; } @@ -402,32 +424,36 @@ int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, Real int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { typedef void (*functype)(int, const Scalar*, const Scalar*, Scalar*, Scalar); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_matrix_vector_product::run), + 0 + }; Scalar* ap = reinterpret_cast(pap); Scalar* x = reinterpret_cast(px); @@ -473,32 +499,36 @@ int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) { typedef void (*functype)(int, const Scalar*, Scalar*); - static functype func[16]; - - static bool init = false; - if(!init) - { - for(int k=0; k<16; ++k) - func[k] = 0; - - func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - - func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector::run); - - func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - - func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector::run); - - init = true; - } + static const functype func[16] = { + // array index: NOTR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: TR | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: ADJ | (UP << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + 0, + // array index: NOTR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: TR | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: ADJ | (LO << 2) | (NUNIT << 3) + (internal::packed_triangular_solve_vector::run), + 0, + // array index: NOTR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: TR | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: ADJ | (UP << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + 0, + // array index: NOTR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: TR | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + // array index: ADJ | (LO << 2) | (UNIT << 3) + (internal::packed_triangular_solve_vector::run), + 0 + }; Scalar* ap = reinterpret_cast(pap); Scalar* x = reinterpret_cast(px); @@ -521,4 +551,3 @@ int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar return 1; } - diff --git a/external/eigen3/blas/level2_real_impl.h b/external/eigen3/blas/level2_real_impl.h index 8d56eaa..7620f0a 100644 --- a/external/eigen3/blas/level2_real_impl.h +++ b/external/eigen3/blas/level2_real_impl.h @@ -10,28 +10,22 @@ #include "common.h" // y = alpha*A*x + beta*y -int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) +int EIGEN_BLAS_FUNC(symv) (const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *pa, const int *lda, + const RealScalar *px, const int *incx, const RealScalar *pbeta, RealScalar *py, const int *incy) { - typedef void (*functype)(int, const Scalar*, int, const Scalar*, int, Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_matrix_vector_product::run); - func[LO] = (internal::selfadjoint_matrix_vector_product::run); - - init = true; - } - - Scalar* a = reinterpret_cast(pa); - Scalar* x = reinterpret_cast(px); + typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_matrix_vector_product::run), + // array index: LO + (internal::selfadjoint_matrix_vector_product::run), + }; + + const Scalar* a = reinterpret_cast(pa); + const Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); // check arguments int info = 0; @@ -46,20 +40,20 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p if(*n==0) return 0; - Scalar* actual_x = get_compact_vector(x,*n,*incx); + const Scalar* actual_x = get_compact_vector(x,*n,*incx); Scalar* actual_y = get_compact_vector(y,*n,*incy); if(beta!=Scalar(1)) { - if(beta==Scalar(0)) vector(actual_y, *n).setZero(); - else vector(actual_y, *n) *= beta; + if(beta==Scalar(0)) make_vector(actual_y, *n).setZero(); + else make_vector(actual_y, *n) *= beta; } int code = UPLO(*uplo); if(code>=2 || func[code]==0) return 0; - func[code](*n, a, *lda, actual_x, 1, actual_y, alpha); + func[code](*n, a, *lda, actual_x, actual_y, alpha); if(actual_x!=x) delete[] actual_x; if(actual_y!=y) delete[] copy_back(actual_y,y,*n,*incy); @@ -68,41 +62,20 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p } // C := alpha*x*x' + C -int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syr)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, RealScalar *pc, const int *ldc) { -// typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar); -// static functype func[2]; - -// static bool init = false; -// if(!init) -// { -// for(int k=0; k<2; ++k) -// func[k] = 0; -// -// func[UP] = (internal::selfadjoint_product::run); -// func[LO] = (internal::selfadjoint_product::run); - -// init = true; -// } typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (selfadjoint_rank1_update::run); - func[LO] = (selfadjoint_rank1_update::run); - - init = true; - } - - Scalar* x = reinterpret_cast(px); + static const functype func[2] = { + // array index: UP + (selfadjoint_rank1_update::run), + // array index: LO + (selfadjoint_rank1_update::run), + }; + + const Scalar* x = reinterpret_cast(px); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); int info = 0; if(UPLO(*uplo)==INVALID) info = 1; @@ -115,7 +88,7 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, if(*n==0 || alpha==Scalar(0)) return 1; // if the increment is not 1, let's copy it to a temporary vector to enable vectorization - Scalar* x_cpy = get_compact_vector(x,*n,*incx); + const Scalar* x_cpy = get_compact_vector(x,*n,*incx); int code = UPLO(*uplo); if(code>=2 || func[code]==0) @@ -129,41 +102,20 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, } // C := alpha*x*y' + alpha*y*x' + C -int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syr2)(const char *uplo, const int *n, const RealScalar *palpha, const RealScalar *px, const int *incx, const RealScalar *py, const int *incy, RealScalar *pc, const int *ldc) { -// typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar); -// static functype func[2]; -// -// static bool init = false; -// if(!init) -// { -// for(int k=0; k<2; ++k) -// func[k] = 0; -// -// func[UP] = (internal::selfadjoint_product::run); -// func[LO] = (internal::selfadjoint_product::run); -// -// init = true; -// } typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::rank2_update_selector::run); - func[LO] = (internal::rank2_update_selector::run); - - init = true; - } - - Scalar* x = reinterpret_cast(px); - Scalar* y = reinterpret_cast(py); + static const functype func[2] = { + // array index: UP + (internal::rank2_update_selector::run), + // array index: LO + (internal::rank2_update_selector::run), + }; + + const Scalar* x = reinterpret_cast(px); + const Scalar* y = reinterpret_cast(py); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); int info = 0; if(UPLO(*uplo)==INVALID) info = 1; @@ -177,9 +129,9 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px if(alpha==Scalar(0)) return 1; - Scalar* x_cpy = get_compact_vector(x,*n,*incx); - Scalar* y_cpy = get_compact_vector(y,*n,*incy); - + const Scalar* x_cpy = get_compact_vector(x,*n,*incx); + const Scalar* y_cpy = get_compact_vector(y,*n,*incy); + int code = UPLO(*uplo); if(code>=2 || func[code]==0) return 0; @@ -234,19 +186,12 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::selfadjoint_packed_rank1_update::run); - func[LO] = (internal::selfadjoint_packed_rank1_update::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::selfadjoint_packed_rank1_update::run), + // array index: LO + (internal::selfadjoint_packed_rank1_update::run), + }; Scalar* x = reinterpret_cast(px); Scalar* ap = reinterpret_cast(pap); @@ -285,19 +230,12 @@ int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *in int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) { typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); - static functype func[2]; - - static bool init = false; - if(!init) - { - for(int k=0; k<2; ++k) - func[k] = 0; - - func[UP] = (internal::packed_rank2_update_selector::run); - func[LO] = (internal::packed_rank2_update_selector::run); - - init = true; - } + static const functype func[2] = { + // array index: UP + (internal::packed_rank2_update_selector::run), + // array index: LO + (internal::packed_rank2_update_selector::run), + }; Scalar* x = reinterpret_cast(px); Scalar* y = reinterpret_cast(py); @@ -366,5 +304,3 @@ int EIGEN_BLAS_FUNC(ger)(int *m, int *n, Scalar *palpha, Scalar *px, int *incx, return 1; } - - diff --git a/external/eigen3/blas/level3_impl.h b/external/eigen3/blas/level3_impl.h index 07dbc22..6c802cd 100644 --- a/external/eigen3/blas/level3_impl.h +++ b/external/eigen3/blas/level3_impl.h @@ -6,37 +6,43 @@ // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - +#include #include "common.h" -int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(gemm)(const char *opa, const char *opb, const int *m, const int *n, const int *k, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking&, Eigen::internal::GemmParallelInfo*); - static functype func[12]; - - static bool init = false; - if(!init) - { - for(int k=0; k<12; ++k) - func[k] = 0; - func[NOTR | (NOTR << 2)] = (internal::general_matrix_matrix_product::run); - func[TR | (NOTR << 2)] = (internal::general_matrix_matrix_product::run); - func[ADJ | (NOTR << 2)] = (internal::general_matrix_matrix_product::run); - func[NOTR | (TR << 2)] = (internal::general_matrix_matrix_product::run); - func[TR | (TR << 2)] = (internal::general_matrix_matrix_product::run); - func[ADJ | (TR << 2)] = (internal::general_matrix_matrix_product::run); - func[NOTR | (ADJ << 2)] = (internal::general_matrix_matrix_product::run); - func[TR | (ADJ << 2)] = (internal::general_matrix_matrix_product::run); - func[ADJ | (ADJ << 2)] = (internal::general_matrix_matrix_product::run); - init = true; - } - - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + static const functype func[12] = { + // array index: NOTR | (NOTR << 2) + (internal::general_matrix_matrix_product::run), + // array index: TR | (NOTR << 2) + (internal::general_matrix_matrix_product::run), + // array index: ADJ | (NOTR << 2) + (internal::general_matrix_matrix_product::run), + 0, + // array index: NOTR | (TR << 2) + (internal::general_matrix_matrix_product::run), + // array index: TR | (TR << 2) + (internal::general_matrix_matrix_product::run), + // array index: ADJ | (TR << 2) + (internal::general_matrix_matrix_product::run), + 0, + // array index: NOTR | (ADJ << 2) + (internal::general_matrix_matrix_product::run), + // array index: TR | (ADJ << 2) + (internal::general_matrix_matrix_product::run), + // array index: ADJ | (ADJ << 2) + (internal::general_matrix_matrix_product::run), + 0 + }; + + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); int info = 0; if(OP(*opa)==INVALID) info = 1; @@ -50,70 +56,92 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal if(info) return xerbla_(SCALAR_SUFFIX_UP"GEMM ",&info,6); + if (*m == 0 || *n == 0) + return 0; + if(beta!=Scalar(1)) { if(beta==Scalar(0)) matrix(c, *m, *n, *ldc).setZero(); else matrix(c, *m, *n, *ldc) *= beta; } - internal::gemm_blocking_space blocking(*m,*n,*k); + if(*k == 0) + return 0; + + internal::gemm_blocking_space blocking(*m,*n,*k,1,true); int code = OP(*opa) | (OP(*opb) << 2); func[code](*m, *n, *k, a, *lda, b, *ldb, c, *ldc, alpha, blocking, 0); return 0; } -int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) +int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n, + const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n"; typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking&); - static functype func[32]; - - static bool init = false; - if(!init) - { - for(int k=0; k<32; ++k) - func[k] = 0; - - func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix::run); - - - func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix::run); - - init = true; - } - - Scalar* a = reinterpret_cast(pa); + static const functype func[32] = { + // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run),\ + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::triangular_solve_matrix::run), + 0 + }; + + const Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); int info = 0; if(SIDE(*side)==INVALID) info = 1; @@ -127,16 +155,19 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, if(info) return xerbla_(SCALAR_SUFFIX_UP"TRSM ",&info,6); + if(*m==0 || *n==0) + return 0; + int code = OP(*opa) | (SIDE(*side) << 2) | (UPLO(*uplo) << 3) | (DIAG(*diag) << 4); - + if(SIDE(*side)==LEFT) { - internal::gemm_blocking_space blocking(*m,*n,*m); + internal::gemm_blocking_space blocking(*m,*n,*m,1,false); func[code](*m, *n, a, *lda, b, *ldb, blocking); } else { - internal::gemm_blocking_space blocking(*m,*n,*n); + internal::gemm_blocking_space blocking(*m,*n,*n,1,false); func[code](*n, *m, a, *lda, b, *ldb, blocking); } @@ -149,55 +180,73 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m, // b = alpha*op(a)*b for side = 'L'or'l' // b = alpha*b*op(a) for side = 'R'or'r' -int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb) +int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, const char *diag, const int *m, const int *n, + const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb) { // std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n"; typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); - static functype func[32]; - static bool init = false; - if(!init) - { - for(int k=0; k<32; ++k) - func[k] = 0; - - func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix::run); - - init = true; - } - - Scalar* a = reinterpret_cast(pa); + static const functype func[32] = { + // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0, + // array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + // array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4) + (internal::product_triangular_matrix_matrix::run), + 0 + }; + + const Scalar* a = reinterpret_cast(pa); Scalar* b = reinterpret_cast(pb); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); int info = 0; if(SIDE(*side)==INVALID) info = 1; @@ -222,12 +271,12 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, if(SIDE(*side)==LEFT) { - internal::gemm_blocking_space blocking(*m,*n,*m); + internal::gemm_blocking_space blocking(*m,*n,*m,1,false); func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha, blocking); } else { - internal::gemm_blocking_space blocking(*m,*n,*n); + internal::gemm_blocking_space blocking(*m,*n,*n,1,false); func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha, blocking); } return 1; @@ -235,14 +284,15 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m, // c = alpha*a*b + beta*c for side = 'L'or'l' // c = alpha*b*a + beta*c for side = 'R'or'r -int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(symm)(const char *side, const char *uplo, const int *m, const int *n, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in symm " << *side << " " << *uplo << " " << *m << "x" << *n << " lda:" << *lda << " ldb:" << *ldb << " ldc:" << *ldc << " alpha:" << *palpha << " beta:" << *pbeta << "\n"; - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); int info = 0; if(SIDE(*side)==INVALID) info = 1; @@ -266,9 +316,9 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa return 1; } + int size = (SIDE(*side)==LEFT) ? (*m) : (*n); #if ISCOMPLEX // FIXME add support for symmetric complex matrix - int size = (SIDE(*side)==LEFT) ? (*m) : (*n); Matrix matA(size,size); if(UPLO(*uplo)==UP) { @@ -285,13 +335,15 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa else if(SIDE(*side)==RIGHT) matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA; #else + internal::gemm_blocking_space blocking(*m,*n,size,1,false); + if(SIDE(*side)==LEFT) - if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); else return 0; else if(SIDE(*side)==RIGHT) - if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); - else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); + else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); else return 0; else return 0; @@ -302,39 +354,38 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa // c = alpha*a*a' + beta*c for op = 'N'or'n' // c = alpha*a'*a + beta*c for op = 'T'or't','C'or'c' -int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syrk)(const char *uplo, const char *op, const int *n, const int *k, + const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { // std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; #if !ISCOMPLEX - typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); - static functype func[8]; - - static bool init = false; - if(!init) - { - for(int k=0; k<8; ++k) - func[k] = 0; - - func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[TR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - - func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[TR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - - init = true; - } + typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); + static const functype func[8] = { + // array index: NOTR | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + // array index: TR | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + // array index: ADJ | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0, + // array index: NOTR | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + // array index: TR | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + // array index: ADJ | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0 + }; #endif - Scalar* a = reinterpret_cast(pa); + const Scalar* a = reinterpret_cast(pa); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); int info = 0; if(UPLO(*uplo)==INVALID) info = 1; - else if(OP(*op)==INVALID) info = 2; + else if(OP(*op)==INVALID || (ISCOMPLEX && OP(*op)==ADJ) ) info = 2; else if(*n<0) info = 3; else if(*k<0) info = 4; else if(*lda() *= beta; } + if(*n==0 || *k==0) + return 0; + #if ISCOMPLEX // FIXME add support for symmetric complex matrix if(UPLO(*uplo)==UP) @@ -369,8 +423,10 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp matrix(c, *n, *n, *ldc).triangularView() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda); } #else + internal::gemm_blocking_space blocking(*n,*n,*k,1,false); + int code = OP(*op) | (UPLO(*uplo) << 2); - func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha); + func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking); #endif return 0; @@ -378,17 +434,20 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp // c = alpha*a*b' + alpha*b*a' + beta*c for op = 'N'or'n' // c = alpha*a'*b + alpha*b'*a + beta*c for op = 'T'or't' -int EIGEN_BLAS_FUNC(syr2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(syr2k)(const char *uplo, const char *op, const int *n, const int *k, const RealScalar *palpha, + const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); + +// std::cerr << "in syr2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; int info = 0; if(UPLO(*uplo)==INVALID) info = 1; - else if(OP(*op)==INVALID) info = 2; + else if(OP(*op)==INVALID || (ISCOMPLEX && OP(*op)==ADJ) ) info = 2; else if(*n<0) info = 3; else if(*k<0) info = 4; else if(*lda(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); - Scalar beta = *reinterpret_cast(pbeta); + Scalar alpha = *reinterpret_cast(palpha); + Scalar beta = *reinterpret_cast(pbeta); // std::cerr << "in hemm " << *side << " " << *uplo << " " << *m << " " << *n << " " << alpha << " " << *lda << " " << beta << " " << *ldc << "\n"; @@ -472,20 +532,23 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa return 1; } + int size = (SIDE(*side)==LEFT) ? (*m) : (*n); + internal::gemm_blocking_space blocking(*m,*n,size,1,false); + if(SIDE(*side)==LEFT) { if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix - ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix - ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); + ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking); else return 0; } else if(SIDE(*side)==RIGHT) { if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView();/*internal::product_selfadjoint_matrix - ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);*/ + ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);*/ else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix - ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); + ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking); else return 0; } else @@ -498,27 +561,28 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa // c = alpha*a*conj(a') + beta*c for op = 'N'or'n' // c = alpha*conj(a')*a + beta*c for op = 'C'or'c' -int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(herk)(const char *uplo, const char *op, const int *n, const int *k, + const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { - typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); - static functype func[8]; - - static bool init = false; - if(!init) - { - for(int k=0; k<8; ++k) - func[k] = 0; - - func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product::run); - - func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product::run); - - init = true; - } - - Scalar* a = reinterpret_cast(pa); +// std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; + + typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking&); + static const functype func[8] = { + // array index: NOTR | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0, + // array index: ADJ | (UP << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0, + // array index: NOTR | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0, + // array index: ADJ | (LO << 2) + (internal::general_matrix_matrix_triangular_product::run), + 0 + }; + + const Scalar* a = reinterpret_cast(pa); Scalar* c = reinterpret_cast(pc); RealScalar alpha = *palpha; RealScalar beta = *pbeta; @@ -545,7 +609,7 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp else if(beta==Scalar(0)) matrix(c, *n, *n, *ldc).triangularView().setZero(); else matrix(c, *n, *n, *ldc).triangularView() *= beta; - + if(beta!=Scalar(0)) { matrix(c, *n, *n, *ldc).diagonal().real() *= beta; @@ -555,7 +619,8 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp if(*k>0 && alpha!=RealScalar(0)) { - func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha); + internal::gemm_blocking_space blocking(*n,*n,*k,1,false); + func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking); matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); } return 0; @@ -563,21 +628,24 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp // c = alpha*a*conj(b') + conj(alpha)*b*conj(a') + beta*c, for op = 'N'or'n' // c = alpha*conj(a')*b + conj(alpha)*conj(b')*a + beta*c, for op = 'C'or'c' -int EIGEN_BLAS_FUNC(her2k)(char *uplo, char *op, int *n, int *k, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *ldb, RealScalar *pbeta, RealScalar *pc, int *ldc) +int EIGEN_BLAS_FUNC(her2k)(const char *uplo, const char *op, const int *n, const int *k, + const RealScalar *palpha, const RealScalar *pa, const int *lda, const RealScalar *pb, const int *ldb, const RealScalar *pbeta, RealScalar *pc, const int *ldc) { - Scalar* a = reinterpret_cast(pa); - Scalar* b = reinterpret_cast(pb); + const Scalar* a = reinterpret_cast(pa); + const Scalar* b = reinterpret_cast(pb); Scalar* c = reinterpret_cast(pc); - Scalar alpha = *reinterpret_cast(palpha); + Scalar alpha = *reinterpret_cast(palpha); RealScalar beta = *pbeta; +// std::cerr << "in her2k " << *uplo << " " << *op << " " << *n << " " << *k << " " << alpha << " " << *lda << " " << *ldb << " " << beta << " " << *ldc << "\n"; + int info = 0; if(UPLO(*uplo)==INVALID) info = 1; else if((OP(*op)==INVALID) || (OP(*op)==TR)) info = 2; else if(*n<0) info = 3; else if(*k<0) info = 4; else if(*lda \brief \b CBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 1 BLAS. +*> Based upon the original BLAS test routine together with: +*> +*> F06GAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT1 -* Test program for the COMPLEX Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06GAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) @@ -114,8 +156,8 @@ + (5.0E0,6.0E0), (5.0E0,6.0E0), (0.1E0,0.1E0), + (-0.6E0,0.1E0), (0.1E0,-0.3E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.1E0,0.4E0), - + (0.4E0,0.1E0), (0.1E0,0.2E0), (2.0E0,3.0E0), + + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.5E0,0.0E0), + + (0.0E0,0.5E0), (0.0E0,0.2E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), @@ -129,10 +171,10 @@ + (3.0E0,6.0E0), (-0.6E0,0.1E0), (4.0E0,7.0E0), + (0.1E0,-0.3E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.3E0,0.1E0), (5.0E0,8.0E0), - + (0.1E0,0.4E0), (6.0E0,9.0E0), (0.4E0,0.1E0), - + (8.0E0,3.0E0), (0.1E0,0.2E0), (9.0E0,4.0E0)/ - DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.7E0/ - DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.7E0/ + + (0.5E0,0.0E0), (6.0E0,9.0E0), (0.0E0,0.5E0), + + (8.0E0,3.0E0), (0.0E0,0.2E0), (9.0E0,4.0E0)/ + DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.8E0/ + DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.6E0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), @@ -145,8 +187,8 @@ + (0.11E0,-0.03E0), (-0.17E0,0.46E0), + (-0.17E0,-0.19E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (0.19E0,-0.17E0), (0.32E0,0.09E0), - + (0.23E0,-0.24E0), (0.18E0,0.01E0), + + (0.19E0,-0.17E0), (0.20E0,-0.35E0), + + (0.35E0,0.20E0), (0.14E0,0.08E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0), + (2.0E0,3.0E0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), @@ -162,9 +204,9 @@ + (-0.17E0,0.46E0), (4.0E0,7.0E0), + (-0.17E0,-0.19E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.19E0,-0.17E0), (5.0E0,8.0E0), - + (0.32E0,0.09E0), (6.0E0,9.0E0), - + (0.23E0,-0.24E0), (8.0E0,3.0E0), - + (0.18E0,0.01E0), (9.0E0,4.0E0)/ + + (0.20E0,-0.35E0), (6.0E0,9.0E0), + + (0.35E0,0.20E0), (8.0E0,3.0E0), + + (0.14E0,0.08E0), (9.0E0,4.0E0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), @@ -177,8 +219,8 @@ + (0.03E0,0.03E0), (-0.18E0,0.03E0), + (0.03E0,-0.09E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), - + (0.09E0,0.03E0), (0.03E0,0.12E0), - + (0.12E0,0.03E0), (0.03E0,0.06E0), (2.0E0,3.0E0), + + (0.09E0,0.03E0), (0.15E0,0.00E0), + + (0.00E0,0.15E0), (0.00E0,0.06E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), @@ -193,8 +235,8 @@ + (-0.18E0,0.03E0), (4.0E0,7.0E0), + (0.03E0,-0.09E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.09E0,0.03E0), (5.0E0,8.0E0), - + (0.03E0,0.12E0), (6.0E0,9.0E0), (0.12E0,0.03E0), - + (8.0E0,3.0E0), (0.03E0,0.06E0), (9.0E0,4.0E0)/ + + (0.15E0,0.00E0), (6.0E0,9.0E0), (0.00E0,0.15E0), + + (8.0E0,3.0E0), (0.00E0,0.06E0), (9.0E0,4.0E0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 @@ -529,7 +571,8 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + REAL ZERO + PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC INTEGER LEN @@ -552,7 +595,7 @@ * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0E0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). diff --git a/external/eigen3/blas/testing/cblat2.f b/external/eigen3/blas/testing/cblat2.f index 20f1881..5833ea8 100644 --- a/external/eigen3/blas/testing/cblat2.f +++ b/external/eigen3/blas/testing/cblat2.f @@ -1,68 +1,114 @@ +*> \brief \b CBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 17 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 35 lines: +*> 'cblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> CGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CHEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CHBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CHPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> CTPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> CGERC T PUT F FOR NO TEST. SAME COLUMNS. +*> CGERU T PUT F FOR NO TEST. SAME COLUMNS. +*> CHER T PUT F FOR NO TEST. SAME COLUMNS. +*> CHPR T PUT F FOR NO TEST. SAME COLUMNS. +*> CHER2 T PUT F FOR NO TEST. SAME COLUMNS. +*> CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT2 * -* Test program for the COMPLEX Level 2 Blas. -* -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 17 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 35 lines: -* 'CBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* CGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* CGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* CHEMV T PUT F FOR NO TEST. SAME COLUMNS. -* CHBMV T PUT F FOR NO TEST. SAME COLUMNS. -* CHPMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTRMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTBMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTPMV T PUT F FOR NO TEST. SAME COLUMNS. -* CTRSV T PUT F FOR NO TEST. SAME COLUMNS. -* CTBSV T PUT F FOR NO TEST. SAME COLUMNS. -* CTPSV T PUT F FOR NO TEST. SAME COLUMNS. -* CGERC T PUT F FOR NO TEST. SAME COLUMNS. -* CGERU T PUT F FOR NO TEST. SAME COLUMNS. -* CHER T PUT F FOR NO TEST. SAME COLUMNS. -* CHPR T PUT F FOR NO TEST. SAME COLUMNS. -* CHER2 T PUT F FOR NO TEST. SAME COLUMNS. -* CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -71,8 +117,8 @@ PARAMETER ( NSUBS = 17 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) - REAL RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0, RHALF = 0.5, RONE = 1.0 ) + REAL RZERO + PARAMETER ( RZERO = 0.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -126,7 +172,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -135,7 +181,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -240,14 +286,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 90 CONTINUE - IF( SDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 100 - EPS = RHALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMVCH using exact data. @@ -3079,7 +3118,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LCERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/external/eigen3/blas/testing/cblat3.f b/external/eigen3/blas/testing/cblat3.f index b26be91..09f2cb9 100644 --- a/external/eigen3/blas/testing/cblat3.f +++ b/external/eigen3/blas/testing/cblat3.f @@ -1,50 +1,96 @@ +*> \brief \b CBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM CBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 9 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 23 lines: +*> 'cblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> CGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CHEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> CTRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> CHERK T PUT F FOR NO TEST. SAME COLUMNS. +*> CSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> CHER2K T PUT F FOR NO TEST. SAME COLUMNS. +*> CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex_blas_testing +* +* ===================================================================== PROGRAM CBLAT3 * -* Test program for the COMPLEX Level 3 Blas. -* -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 9 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 23 lines: -* 'CBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* CGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* CHEMM T PUT F FOR NO TEST. SAME COLUMNS. -* CSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* CTRMM T PUT F FOR NO TEST. SAME COLUMNS. -* CTRSM T PUT F FOR NO TEST. SAME COLUMNS. -* CHERK T PUT F FOR NO TEST. SAME COLUMNS. -* CSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* CHER2K T PUT F FOR NO TEST. SAME COLUMNS. -* CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -53,8 +99,8 @@ PARAMETER ( NSUBS = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) - REAL RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0, RHALF = 0.5, RONE = 1.0 ) + REAL RZERO + PARAMETER ( RZERO = 0.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -103,7 +149,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -112,7 +158,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -189,14 +235,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 70 CONTINUE - IF( SDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 80 - EPS = RHALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMMCH using exact data. @@ -1946,7 +1985,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, RALPHA, BETA, RBETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1956,12 +1995,19 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) +* 3-19-92: Fix argument 12 in calls to CSYMM and CHEMM +* with INFOT = 9 (eca) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. COMPLEX ALPHA, BETA REAL RALPHA, RBETA @@ -1979,6 +2025,14 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA, BETA, RALPHA, and RBETA. +* + ALPHA = CMPLX( ONE, -ONE ) + BETA = CMPLX( TWO, -TWO ) + RALPHA = ONE + RBETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 @@ -2205,16 +2259,16 @@ CALL CHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2272,16 +2326,16 @@ CALL CSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -3268,7 +3322,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LCERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/external/eigen3/blas/testing/dblat2.f b/external/eigen3/blas/testing/dblat2.f index 4002d43..0fa80af 100644 --- a/external/eigen3/blas/testing/dblat2.f +++ b/external/eigen3/blas/testing/dblat2.f @@ -1,75 +1,121 @@ +*> \brief \b DBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the DOUBLE PRECISION Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 16 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 34 lines: +*> 'dblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 0.9 VALUES OF BETAC +*> DGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DSBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DSPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> DTPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> DGER T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYR T PUT F FOR NO TEST. SAME COLUMNS. +*> DSPR T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup double_blas_testing +* +* ===================================================================== PROGRAM DBLAT2 * -* Test program for the DOUBLE PRECISION Level 2 Blas. -* -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 16 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 34 lines: -* 'DBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 0.9 VALUES OF BETA -* DGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* DGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* DSYMV T PUT F FOR NO TEST. SAME COLUMNS. -* DSBMV T PUT F FOR NO TEST. SAME COLUMNS. -* DSPMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTRMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTBMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTPMV T PUT F FOR NO TEST. SAME COLUMNS. -* DTRSV T PUT F FOR NO TEST. SAME COLUMNS. -* DTBSV T PUT F FOR NO TEST. SAME COLUMNS. -* DTPSV T PUT F FOR NO TEST. SAME COLUMNS. -* DGER T PUT F FOR NO TEST. SAME COLUMNS. -* DSYR T PUT F FOR NO TEST. SAME COLUMNS. -* DSPR T PUT F FOR NO TEST. SAME COLUMNS. -* DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. -* DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) - DOUBLE PRECISION ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -121,7 +167,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -130,7 +176,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -235,14 +281,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 90 CONTINUE - IF( DDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 100 - EPS = HALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMVCH using exact data. @@ -2982,7 +3021,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LDERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/external/eigen3/blas/testing/dblat3.f b/external/eigen3/blas/testing/dblat3.f index 082e03e..8d37c74 100644 --- a/external/eigen3/blas/testing/dblat3.f +++ b/external/eigen3/blas/testing/dblat3.f @@ -1,55 +1,101 @@ +*> \brief \b DBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the DOUBLE PRECISION Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 6 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 20 lines: +*> 'dblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 1.3 VALUES OF BETA +*> DGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> DTRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup double_blas_testing +* +* ===================================================================== PROGRAM DBLAT3 * -* Test program for the DOUBLE PRECISION Level 3 Blas. -* -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 6 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 20 lines: -* 'DBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 1.3 VALUES OF BETA -* DGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* DSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* DTRMM T PUT F FOR NO TEST. SAME COLUMNS. -* DTRSM T PUT F FOR NO TEST. SAME COLUMNS. -* DSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) - DOUBLE PRECISION ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -96,7 +142,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -105,7 +151,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -182,14 +228,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 70 CONTINUE - IF( DDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 80 - EPS = HALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMMCH using exact data. @@ -1802,7 +1841,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, BETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1812,12 +1851,18 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA and BETA (eca) +* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, BETA * .. Local Arrays .. @@ -1834,6 +1879,12 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA and BETA. +* + ALPHA = ONE + BETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL DGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) @@ -1963,16 +2014,16 @@ CALL DSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2660,7 +2711,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LDERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/external/eigen3/blas/testing/sblat2.f b/external/eigen3/blas/testing/sblat2.f index 057a854..71605ed 100644 --- a/external/eigen3/blas/testing/sblat2.f +++ b/external/eigen3/blas/testing/sblat2.f @@ -1,75 +1,121 @@ +*> \brief \b SBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM SBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the REAL Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 16 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 34 lines: +*> 'sblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 0.9 VALUES OF BETA +*> SGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SSBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> SSPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> STRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> STBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> STPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> SGER T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYR T PUT F FOR NO TEST. SAME COLUMNS. +*> SSPR T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup single_blas_testing +* +* ===================================================================== PROGRAM SBLAT2 * -* Test program for the REAL Level 2 Blas. -* -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 16 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 34 lines: -* 'SBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 0.9 VALUES OF BETA -* SGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* SGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* SSYMV T PUT F FOR NO TEST. SAME COLUMNS. -* SSBMV T PUT F FOR NO TEST. SAME COLUMNS. -* SSPMV T PUT F FOR NO TEST. SAME COLUMNS. -* STRMV T PUT F FOR NO TEST. SAME COLUMNS. -* STBMV T PUT F FOR NO TEST. SAME COLUMNS. -* STPMV T PUT F FOR NO TEST. SAME COLUMNS. -* STRSV T PUT F FOR NO TEST. SAME COLUMNS. -* STBSV T PUT F FOR NO TEST. SAME COLUMNS. -* STPSV T PUT F FOR NO TEST. SAME COLUMNS. -* SGER T PUT F FOR NO TEST. SAME COLUMNS. -* SSYR T PUT F FOR NO TEST. SAME COLUMNS. -* SSPR T PUT F FOR NO TEST. SAME COLUMNS. -* SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. -* SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) - REAL ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -121,7 +167,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -130,7 +176,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -235,14 +281,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 90 CONTINUE - IF( SDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 100 - EPS = HALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMVCH using exact data. @@ -2982,7 +3021,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LSERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/external/eigen3/blas/testing/sblat3.f b/external/eigen3/blas/testing/sblat3.f index 325a9eb..8792696 100644 --- a/external/eigen3/blas/testing/sblat3.f +++ b/external/eigen3/blas/testing/sblat3.f @@ -1,55 +1,101 @@ +*> \brief \b SBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM SBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the REAL Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 6 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 20 lines: +*> 'sblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> 0.0 1.0 0.7 VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> 0.0 1.0 1.3 VALUES OF BETA +*> SGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> STRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> STRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup single_blas_testing +* +* ===================================================================== PROGRAM SBLAT3 * -* Test program for the REAL Level 3 Blas. -* -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 6 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 20 lines: -* 'SBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* 0.0 1.0 0.7 VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* 0.0 1.0 1.3 VALUES OF BETA -* SGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* SSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* STRMM T PUT F FOR NO TEST. SAME COLUMNS. -* STRSM T PUT F FOR NO TEST. SAME COLUMNS. -* SSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) - REAL ZERO, HALF, ONE - PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -96,7 +142,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -105,7 +151,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -182,14 +228,7 @@ * * Compute EPS (the machine precision). * - EPS = ONE - 70 CONTINUE - IF( SDIFF( ONE + EPS, ONE ).EQ.ZERO ) - $ GO TO 80 - EPS = HALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMMCH using exact data. @@ -1802,7 +1841,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, BETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1812,12 +1851,18 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA and BETA (eca) +* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. REAL ALPHA, BETA * .. Local Arrays .. @@ -1834,6 +1879,12 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA and BETA. +* + ALPHA = ONE + BETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL SGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) @@ -1963,16 +2014,16 @@ CALL SSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2660,7 +2711,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LSERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/external/eigen3/blas/testing/zblat1.f b/external/eigen3/blas/testing/zblat1.f index e2415e1..d30112c 100644 --- a/external/eigen3/blas/testing/zblat1.f +++ b/external/eigen3/blas/testing/zblat1.f @@ -1,7 +1,49 @@ +*> \brief \b ZBLAT1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT1 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 1 BLAS. +*> +*> Based upon the original BLAS test routine together with: +*> F06GAF Example Program Text +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT1 -* Test program for the COMPLEX*16 Level 1 BLAS. -* Based upon the original BLAS test routine together with: -* F06GAF Example Program Text +* +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* ===================================================================== +* * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) @@ -114,8 +156,8 @@ + (5.0D0,6.0D0), (5.0D0,6.0D0), (0.1D0,0.1D0), + (-0.6D0,0.1D0), (0.1D0,-0.3D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.1D0,0.4D0), - + (0.4D0,0.1D0), (0.1D0,0.2D0), (2.0D0,3.0D0), + + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.5D0,0.0D0), + + (0.0D0,0.5D0), (0.0D0,0.2D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), @@ -129,10 +171,10 @@ + (3.0D0,6.0D0), (-0.6D0,0.1D0), (4.0D0,7.0D0), + (0.1D0,-0.3D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.3D0,0.1D0), (5.0D0,8.0D0), - + (0.1D0,0.4D0), (6.0D0,9.0D0), (0.4D0,0.1D0), - + (8.0D0,3.0D0), (0.1D0,0.2D0), (9.0D0,4.0D0)/ - DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.7D0/ - DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.7D0/ + + (0.5D0,0.0D0), (6.0D0,9.0D0), (0.0D0,0.5D0), + + (8.0D0,3.0D0), (0.0D0,0.2D0), (9.0D0,4.0D0)/ + DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.8D0/ + DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.6D0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), @@ -145,8 +187,8 @@ + (0.11D0,-0.03D0), (-0.17D0,0.46D0), + (-0.17D0,-0.19D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (0.19D0,-0.17D0), (0.32D0,0.09D0), - + (0.23D0,-0.24D0), (0.18D0,0.01D0), + + (0.19D0,-0.17D0), (0.20D0,-0.35D0), + + (0.35D0,0.20D0), (0.14D0,0.08D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0), + (2.0D0,3.0D0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), @@ -162,9 +204,9 @@ + (-0.17D0,0.46D0), (4.0D0,7.0D0), + (-0.17D0,-0.19D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.19D0,-0.17D0), (5.0D0,8.0D0), - + (0.32D0,0.09D0), (6.0D0,9.0D0), - + (0.23D0,-0.24D0), (8.0D0,3.0D0), - + (0.18D0,0.01D0), (9.0D0,4.0D0)/ + + (0.20D0,-0.35D0), (6.0D0,9.0D0), + + (0.35D0,0.20D0), (8.0D0,3.0D0), + + (0.14D0,0.08D0), (9.0D0,4.0D0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), @@ -177,8 +219,8 @@ + (0.03D0,0.03D0), (-0.18D0,0.03D0), + (0.03D0,-0.09D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), - + (0.09D0,0.03D0), (0.03D0,0.12D0), - + (0.12D0,0.03D0), (0.03D0,0.06D0), (2.0D0,3.0D0), + + (0.09D0,0.03D0), (0.15D0,0.00D0), + + (0.00D0,0.15D0), (0.00D0,0.06D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), @@ -193,8 +235,8 @@ + (-0.18D0,0.03D0), (4.0D0,7.0D0), + (0.03D0,-0.09D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.09D0,0.03D0), (5.0D0,8.0D0), - + (0.03D0,0.12D0), (6.0D0,9.0D0), (0.12D0,0.03D0), - + (8.0D0,3.0D0), (0.03D0,0.06D0), (9.0D0,4.0D0)/ + + (0.15D0,0.00D0), (6.0D0,9.0D0), (0.00D0,0.15D0), + + (8.0D0,3.0D0), (0.00D0,0.06D0), (9.0D0,4.0D0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 @@ -529,7 +571,8 @@ * * .. Parameters .. INTEGER NOUT - PARAMETER (NOUT=6) + DOUBLE PRECISION ZERO + PARAMETER (NOUT=6, ZERO=0.0D0) * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN @@ -552,7 +595,7 @@ * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) - IF (SDIFF(ABS(SSIZE(I))+ABS(SFAC*SD),ABS(SSIZE(I))).EQ.0.0D0) + IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). diff --git a/external/eigen3/blas/testing/zblat2.f b/external/eigen3/blas/testing/zblat2.f index e65cdcc..53129a1 100644 --- a/external/eigen3/blas/testing/zblat2.f +++ b/external/eigen3/blas/testing/zblat2.f @@ -1,68 +1,114 @@ +*> \brief \b ZBLAT2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT2 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 2 Blas. +*> +*> The program must be driven by a short data file. The first 18 records +*> of the file are read using list-directed input, the last 17 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 35 lines: +*> 'zblat2.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 4 NUMBER OF VALUES OF K +*> 0 1 2 4 VALUES OF K +*> 4 NUMBER OF VALUES OF INCX AND INCY +*> 1 2 -1 -2 VALUES OF INCX AND INCY +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGERC T PUT F FOR NO TEST. SAME COLUMNS. +*> ZGERU T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHER T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHPR T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. +*> An extended set of Fortran Basic Linear Algebra Subprograms. +*> +*> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics +*> and Computer Science Division, Argonne National Laboratory, +*> 9700 South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> Or +*> +*> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms +*> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford +*> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st +*> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +*> +*> +*> -- Written on 10-August-1987. +*> Richard Hanson, Sandia National Labs. +*> Jeremy Du Croz, NAG Central Office. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT2 * -* Test program for the COMPLEX*16 Level 2 Blas. -* -* The program must be driven by a short data file. The first 18 records -* of the file are read using list-directed input, the last 17 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 35 lines: -* 'ZBLAT2.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 4 NUMBER OF VALUES OF K -* 0 1 2 4 VALUES OF K -* 4 NUMBER OF VALUES OF INCX AND INCY -* 1 2 -1 -2 VALUES OF INCX AND INCY -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. -* ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. -* ZGERC T PUT F FOR NO TEST. SAME COLUMNS. -* ZGERU T PUT F FOR NO TEST. SAME COLUMNS. -* ZHER T PUT F FOR NO TEST. SAME COLUMNS. -* ZHPR T PUT F FOR NO TEST. SAME COLUMNS. -* ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. -* ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. -* An extended set of Fortran Basic Linear Algebra Subprograms. -* -* Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics -* and Computer Science Division, Argonne National Laboratory, -* 9700 South Cass Avenue, Argonne, Illinois 60439, US. -* -* Or -* -* NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms -* Group Ltd., NAG Central Office, 256 Banbury Road, Oxford -* OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st -* Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* -* -- Written on 10-August-1987. -* Richard Hanson, Sandia National Labs. -* Jeremy Du Croz, NAG Central Office. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -72,8 +118,8 @@ COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) - DOUBLE PRECISION RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0D0, RHALF = 0.5D0, RONE = 1.0D0 ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX @@ -127,7 +173,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -136,7 +182,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -241,14 +287,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 90 CONTINUE - IF( DDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 100 - EPS = RHALF*EPS - GO TO 90 - 100 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMVCH using exact data. @@ -3087,7 +3126,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LZERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/external/eigen3/blas/testing/zblat3.f b/external/eigen3/blas/testing/zblat3.f index d6a522f..59ca241 100644 --- a/external/eigen3/blas/testing/zblat3.f +++ b/external/eigen3/blas/testing/zblat3.f @@ -1,50 +1,97 @@ +*> \brief \b ZBLAT3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM ZBLAT3 +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Test program for the COMPLEX*16 Level 3 Blas. +*> +*> The program must be driven by a short data file. The first 14 records +*> of the file are read using list-directed input, the last 9 records +*> are read using the format ( A6, L2 ). An annotated example of a data +*> file can be obtained by deleting the first 3 characters from the +*> following 23 lines: +*> 'zblat3.out' NAME OF SUMMARY OUTPUT FILE +*> 6 UNIT NUMBER OF SUMMARY FILE +*> 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE +*> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) +*> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. +*> F LOGICAL FLAG, T TO STOP ON FAILURES. +*> T LOGICAL FLAG, T TO TEST ERROR EXITS. +*> 16.0 THRESHOLD VALUE OF TEST RATIO +*> 6 NUMBER OF VALUES OF N +*> 0 1 2 3 5 9 VALUES OF N +*> 3 NUMBER OF VALUES OF ALPHA +*> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA +*> 3 NUMBER OF VALUES OF BETA +*> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA +*> ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHERK T PUT F FOR NO TEST. SAME COLUMNS. +*> ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. +*> ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. +*> ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. +*> +*> +*> Further Details +*> =============== +*> +*> See: +*> +*> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. +*> A Set of Level 3 Basic Linear Algebra Subprograms. +*> +*> Technical Memorandum No.88 (Revision 1), Mathematics and +*> Computer Science Division, Argonne National Laboratory, 9700 +*> South Cass Avenue, Argonne, Illinois 60439, US. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers +*> can be run multiple times without deleting generated +*> output files (susan) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16_blas_testing +* +* ===================================================================== PROGRAM ZBLAT3 * -* Test program for the COMPLEX*16 Level 3 Blas. -* -* The program must be driven by a short data file. The first 14 records -* of the file are read using list-directed input, the last 9 records -* are read using the format ( A6, L2 ). An annotated example of a data -* file can be obtained by deleting the first 3 characters from the -* following 23 lines: -* 'ZBLAT3.SUMM' NAME OF SUMMARY OUTPUT FILE -* 6 UNIT NUMBER OF SUMMARY FILE -* 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -* -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) -* F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. -* F LOGICAL FLAG, T TO STOP ON FAILURES. -* T LOGICAL FLAG, T TO TEST ERROR EXITS. -* 16.0 THRESHOLD VALUE OF TEST RATIO -* 6 NUMBER OF VALUES OF N -* 0 1 2 3 5 9 VALUES OF N -* 3 NUMBER OF VALUES OF ALPHA -* (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA -* 3 NUMBER OF VALUES OF BETA -* (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA -* ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. -* ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. -* ZHERK T PUT F FOR NO TEST. SAME COLUMNS. -* ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. -* ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. -* ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. -* -* See: -* -* Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. -* A Set of Level 3 Basic Linear Algebra Subprograms. -* -* Technical Memorandum No.88 (Revision 1), Mathematics and -* Computer Science Division, Argonne National Laboratory, 9700 -* South Cass Avenue, Argonne, Illinois 60439, US. +* -- Reference BLAS test routine (version 3.4.1) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 * -* -- Written on 8-February-1989. -* Jack Dongarra, Argonne National Laboratory. -* Iain Duff, AERE Harwell. -* Jeremy Du Croz, Numerical Algorithms Group Ltd. -* Sven Hammarling, Numerical Algorithms Group Ltd. +* ===================================================================== * * .. Parameters .. INTEGER NIN @@ -54,8 +101,8 @@ COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) - DOUBLE PRECISION RZERO, RHALF, RONE - PARAMETER ( RZERO = 0.0D0, RHALF = 0.5D0, RONE = 1.0D0 ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX @@ -104,7 +151,7 @@ * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT - OPEN( NOUT, FILE = SUMMRY, STATUS = 'NEW' ) + OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. @@ -113,7 +160,7 @@ READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN - OPEN( NTRA, FILE = SNAPS, STATUS = 'NEW' ) + OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI @@ -190,14 +237,7 @@ * * Compute EPS (the machine precision). * - EPS = RONE - 70 CONTINUE - IF( DDIFF( RONE + EPS, RONE ).EQ.RZERO ) - $ GO TO 80 - EPS = RHALF*EPS - GO TO 70 - 80 CONTINUE - EPS = EPS + EPS + EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMMCH using exact data. @@ -1949,7 +1989,7 @@ * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. -* ALPHA, RALPHA, BETA, RBETA, A, B and C should not need to be defined. +* A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * @@ -1959,12 +1999,20 @@ * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * +* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) +* 3-19-92: Fix argument 12 in calls to ZSYMM and ZHEMM +* with INFOT = 9 (eca) +* 10-9-00: Declared INTRINSIC DCMPLX (susan) +* * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION RALPHA, RBETA @@ -1973,6 +2021,8 @@ * .. External Subroutines .. EXTERNAL ZGEMM, ZHEMM, ZHER2K, ZHERK, CHKXER, ZSYMM, $ ZSYR2K, ZSYRK, ZTRMM, ZTRSM +* .. Intrinsic Functions .. + INTRINSIC DCMPLX * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. @@ -1982,6 +2032,14 @@ * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. +* +* Initialize ALPHA, BETA, RALPHA, and RBETA. +* + ALPHA = DCMPLX( ONE, -ONE ) + BETA = DCMPLX( TWO, -TWO ) + RALPHA = ONE + RBETA = TWO +* GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 @@ -2208,16 +2266,16 @@ CALL ZHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -2275,16 +2333,16 @@ CALL ZSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 - CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) + CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) @@ -3274,7 +3332,6 @@ 50 CONTINUE END IF * - 60 CONTINUE LZERES = .TRUE. GO TO 80 70 CONTINUE diff --git a/external/eigen3/blas/xerbla.cpp b/external/eigen3/blas/xerbla.cpp index dd39a52..c373e86 100644 --- a/external/eigen3/blas/xerbla.cpp +++ b/external/eigen3/blas/xerbla.cpp @@ -1,5 +1,5 @@ -#include +#include #if (defined __GNUC__) && (!defined __MINGW32__) && (!defined __CYGWIN__) #define EIGEN_WEAK_LINKING __attribute__ ((weak)) @@ -14,7 +14,7 @@ extern "C" EIGEN_WEAK_LINKING int xerbla_(const char * msg, int *info, int) { - std::cerr << "Eigen BLAS ERROR #" << *info << ": " << msg << "\n"; + printf("Eigen BLAS ERROR #%i: %s\n", *info, msg ); return 0; } diff --git a/external/eigen3/blas/zhbmv.f b/external/eigen3/blas/zhbmv.f deleted file mode 100644 index bca0da5..0000000 --- a/external/eigen3/blas/zhbmv.f +++ /dev/null @@ -1,310 +0,0 @@ - SUBROUTINE ZHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) -* .. Scalar Arguments .. - DOUBLE COMPLEX ALPHA,BETA - INTEGER INCX,INCY,K,LDA,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - DOUBLE COMPLEX A(LDA,*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* ZHBMV performs the matrix-vector operation -* -* y := alpha*A*x + beta*y, -* -* where alpha and beta are scalars, x and y are n element vectors and -* A is an n by n hermitian band matrix, with k super-diagonals. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the band matrix A is being supplied as -* follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* being supplied. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* being supplied. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* K - INTEGER. -* On entry, K specifies the number of super-diagonals of the -* matrix A. K must satisfy 0 .le. K. -* Unchanged on exit. -* -* ALPHA - COMPLEX*16 . -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* A - COMPLEX*16 array of DIMENSION ( LDA, n ). -* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) -* by n part of the array A must contain the upper triangular -* band part of the hermitian matrix, supplied column by -* column, with the leading diagonal of the matrix in row -* ( k + 1 ) of the array, the first super-diagonal starting at -* position 2 in row k, and so on. The top left k by k triangle -* of the array A is not referenced. -* The following program segment will transfer the upper -* triangular part of a hermitian band matrix from conventional -* full matrix storage to band storage: -* -* DO 20, J = 1, N -* M = K + 1 - J -* DO 10, I = MAX( 1, J - K ), J -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) -* by n part of the array A must contain the lower triangular -* band part of the hermitian matrix, supplied column by -* column, with the leading diagonal of the matrix in row 1 of -* the array, the first sub-diagonal starting at position 1 in -* row 2, and so on. The bottom right k by k triangle of the -* array A is not referenced. -* The following program segment will transfer the lower -* triangular part of a hermitian band matrix from conventional -* full matrix storage to band storage: -* -* DO 20, J = 1, N -* M = 1 - J -* DO 10, I = J, MIN( N, J + K ) -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Note that the imaginary parts of the diagonal elements need -* not be set and are assumed to be zero. -* Unchanged on exit. -* -* LDA - INTEGER. -* On entry, LDA specifies the first dimension of A as declared -* in the calling (sub) program. LDA must be at least -* ( k + 1 ). -* Unchanged on exit. -* -* X - COMPLEX*16 array of DIMENSION at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the -* vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* BETA - COMPLEX*16 . -* On entry, BETA specifies the scalar beta. -* Unchanged on exit. -* -* Y - COMPLEX*16 array of DIMENSION at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the -* vector y. On exit, Y is overwritten by the updated vector y. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE COMPLEX ONE - PARAMETER (ONE= (1.0D+0,0.0D+0)) - DOUBLE COMPLEX ZERO - PARAMETER (ZERO= (0.0D+0,0.0D+0)) -* .. -* .. Local Scalars .. - DOUBLE COMPLEX TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC DBLE,DCONJG,MAX,MIN -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (K.LT.0) THEN - INFO = 3 - ELSE IF (LDA.LT. (K+1)) THEN - INFO = 6 - ELSE IF (INCX.EQ.0) THEN - INFO = 8 - ELSE IF (INCY.EQ.0) THEN - INFO = 11 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('ZHBMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN -* -* Set up the start points in X and Y. -* - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF -* -* Start the operations. In this version the elements of the array A -* are accessed sequentially with one pass through A. -* -* First form y := beta*y. -* - IF (BETA.NE.ONE) THEN - IF (INCY.EQ.1) THEN - IF (BETA.EQ.ZERO) THEN - DO 10 I = 1,N - Y(I) = ZERO - 10 CONTINUE - ELSE - DO 20 I = 1,N - Y(I) = BETA*Y(I) - 20 CONTINUE - END IF - ELSE - IY = KY - IF (BETA.EQ.ZERO) THEN - DO 30 I = 1,N - Y(IY) = ZERO - IY = IY + INCY - 30 CONTINUE - ELSE - DO 40 I = 1,N - Y(IY) = BETA*Y(IY) - IY = IY + INCY - 40 CONTINUE - END IF - END IF - END IF - IF (ALPHA.EQ.ZERO) RETURN - IF (LSAME(UPLO,'U')) THEN -* -* Form y when upper triangle of A is stored. -* - KPLUS1 = K + 1 - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - L = KPLUS1 - J - DO 50 I = MAX(1,J-K),J - 1 - Y(I) = Y(I) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(I) - 50 CONTINUE - Y(J) = Y(J) + TEMP1*DBLE(A(KPLUS1,J)) + ALPHA*TEMP2 - 60 CONTINUE - ELSE - JX = KX - JY = KY - DO 80 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - IX = KX - IY = KY - L = KPLUS1 - J - DO 70 I = MAX(1,J-K),J - 1 - Y(IY) = Y(IY) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(IX) - IX = IX + INCX - IY = IY + INCY - 70 CONTINUE - Y(JY) = Y(JY) + TEMP1*DBLE(A(KPLUS1,J)) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - IF (J.GT.K) THEN - KX = KX + INCX - KY = KY + INCY - END IF - 80 CONTINUE - END IF - ELSE -* -* Form y when lower triangle of A is stored. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 100 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - Y(J) = Y(J) + TEMP1*DBLE(A(1,J)) - L = 1 - J - DO 90 I = J + 1,MIN(N,J+K) - Y(I) = Y(I) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(I) - 90 CONTINUE - Y(J) = Y(J) + ALPHA*TEMP2 - 100 CONTINUE - ELSE - JX = KX - JY = KY - DO 120 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - Y(JY) = Y(JY) + TEMP1*DBLE(A(1,J)) - L = 1 - J - IX = JX - IY = JY - DO 110 I = J + 1,MIN(N,J+K) - IX = IX + INCX - IY = IY + INCY - Y(IY) = Y(IY) + TEMP1*A(L+I,J) - TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(IX) - 110 CONTINUE - Y(JY) = Y(JY) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - 120 CONTINUE - END IF - END IF -* - RETURN -* -* End of ZHBMV . -* - END diff --git a/external/eigen3/blas/zhpmv.f b/external/eigen3/blas/zhpmv.f deleted file mode 100644 index b686108..0000000 --- a/external/eigen3/blas/zhpmv.f +++ /dev/null @@ -1,272 +0,0 @@ - SUBROUTINE ZHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY) -* .. Scalar Arguments .. - DOUBLE COMPLEX ALPHA,BETA - INTEGER INCX,INCY,N - CHARACTER UPLO -* .. -* .. Array Arguments .. - DOUBLE COMPLEX AP(*),X(*),Y(*) -* .. -* -* Purpose -* ======= -* -* ZHPMV performs the matrix-vector operation -* -* y := alpha*A*x + beta*y, -* -* where alpha and beta are scalars, x and y are n element vectors and -* A is an n by n hermitian matrix, supplied in packed form. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the upper or lower -* triangular part of the matrix A is supplied in the packed -* array AP as follows: -* -* UPLO = 'U' or 'u' The upper triangular part of A is -* supplied in AP. -* -* UPLO = 'L' or 'l' The lower triangular part of A is -* supplied in AP. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* ALPHA - COMPLEX*16 . -* On entry, ALPHA specifies the scalar alpha. -* Unchanged on exit. -* -* AP - COMPLEX*16 array of DIMENSION at least -* ( ( n*( n + 1 ) )/2 ). -* Before entry with UPLO = 'U' or 'u', the array AP must -* contain the upper triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) -* and a( 2, 2 ) respectively, and so on. -* Before entry with UPLO = 'L' or 'l', the array AP must -* contain the lower triangular part of the hermitian matrix -* packed sequentially, column by column, so that AP( 1 ) -* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) -* and a( 3, 1 ) respectively, and so on. -* Note that the imaginary parts of the diagonal elements need -* not be set and are assumed to be zero. -* Unchanged on exit. -* -* X - COMPLEX*16 array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. -* Unchanged on exit. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* BETA - COMPLEX*16 . -* On entry, BETA specifies the scalar beta. When BETA is -* supplied as zero then Y need not be set on input. -* Unchanged on exit. -* -* Y - COMPLEX*16 array of dimension at least -* ( 1 + ( n - 1 )*abs( INCY ) ). -* Before entry, the incremented array Y must contain the n -* element vector y. On exit, Y is overwritten by the updated -* vector y. -* -* INCY - INTEGER. -* On entry, INCY specifies the increment for the elements of -* Y. INCY must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE COMPLEX ONE - PARAMETER (ONE= (1.0D+0,0.0D+0)) - DOUBLE COMPLEX ZERO - PARAMETER (ZERO= (0.0D+0,0.0D+0)) -* .. -* .. Local Scalars .. - DOUBLE COMPLEX TEMP1,TEMP2 - INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC DBLE,DCONJG -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (N.LT.0) THEN - INFO = 2 - ELSE IF (INCX.EQ.0) THEN - INFO = 6 - ELSE IF (INCY.EQ.0) THEN - INFO = 9 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('ZHPMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN -* -* Set up the start points in X and Y. -* - IF (INCX.GT.0) THEN - KX = 1 - ELSE - KX = 1 - (N-1)*INCX - END IF - IF (INCY.GT.0) THEN - KY = 1 - ELSE - KY = 1 - (N-1)*INCY - END IF -* -* Start the operations. In this version the elements of the array AP -* are accessed sequentially with one pass through AP. -* -* First form y := beta*y. -* - IF (BETA.NE.ONE) THEN - IF (INCY.EQ.1) THEN - IF (BETA.EQ.ZERO) THEN - DO 10 I = 1,N - Y(I) = ZERO - 10 CONTINUE - ELSE - DO 20 I = 1,N - Y(I) = BETA*Y(I) - 20 CONTINUE - END IF - ELSE - IY = KY - IF (BETA.EQ.ZERO) THEN - DO 30 I = 1,N - Y(IY) = ZERO - IY = IY + INCY - 30 CONTINUE - ELSE - DO 40 I = 1,N - Y(IY) = BETA*Y(IY) - IY = IY + INCY - 40 CONTINUE - END IF - END IF - END IF - IF (ALPHA.EQ.ZERO) RETURN - KK = 1 - IF (LSAME(UPLO,'U')) THEN -* -* Form y when AP contains the upper triangle. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 60 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - K = KK - DO 50 I = 1,J - 1 - Y(I) = Y(I) + TEMP1*AP(K) - TEMP2 = TEMP2 + DCONJG(AP(K))*X(I) - K = K + 1 - 50 CONTINUE - Y(J) = Y(J) + TEMP1*DBLE(AP(KK+J-1)) + ALPHA*TEMP2 - KK = KK + J - 60 CONTINUE - ELSE - JX = KX - JY = KY - DO 80 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - IX = KX - IY = KY - DO 70 K = KK,KK + J - 2 - Y(IY) = Y(IY) + TEMP1*AP(K) - TEMP2 = TEMP2 + DCONJG(AP(K))*X(IX) - IX = IX + INCX - IY = IY + INCY - 70 CONTINUE - Y(JY) = Y(JY) + TEMP1*DBLE(AP(KK+J-1)) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - KK = KK + J - 80 CONTINUE - END IF - ELSE -* -* Form y when AP contains the lower triangle. -* - IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN - DO 100 J = 1,N - TEMP1 = ALPHA*X(J) - TEMP2 = ZERO - Y(J) = Y(J) + TEMP1*DBLE(AP(KK)) - K = KK + 1 - DO 90 I = J + 1,N - Y(I) = Y(I) + TEMP1*AP(K) - TEMP2 = TEMP2 + DCONJG(AP(K))*X(I) - K = K + 1 - 90 CONTINUE - Y(J) = Y(J) + ALPHA*TEMP2 - KK = KK + (N-J+1) - 100 CONTINUE - ELSE - JX = KX - JY = KY - DO 120 J = 1,N - TEMP1 = ALPHA*X(JX) - TEMP2 = ZERO - Y(JY) = Y(JY) + TEMP1*DBLE(AP(KK)) - IX = JX - IY = JY - DO 110 K = KK + 1,KK + N - J - IX = IX + INCX - IY = IY + INCY - Y(IY) = Y(IY) + TEMP1*AP(K) - TEMP2 = TEMP2 + DCONJG(AP(K))*X(IX) - 110 CONTINUE - Y(JY) = Y(JY) + ALPHA*TEMP2 - JX = JX + INCX - JY = JY + INCY - KK = KK + (N-J+1) - 120 CONTINUE - END IF - END IF -* - RETURN -* -* End of ZHPMV . -* - END diff --git a/external/eigen3/blas/ztbmv.f b/external/eigen3/blas/ztbmv.f deleted file mode 100644 index 7c85c1b..0000000 --- a/external/eigen3/blas/ztbmv.f +++ /dev/null @@ -1,366 +0,0 @@ - SUBROUTINE ZTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX) -* .. Scalar Arguments .. - INTEGER INCX,K,LDA,N - CHARACTER DIAG,TRANS,UPLO -* .. -* .. Array Arguments .. - DOUBLE COMPLEX A(LDA,*),X(*) -* .. -* -* Purpose -* ======= -* -* ZTBMV performs one of the matrix-vector operations -* -* x := A*x, or x := A'*x, or x := conjg( A' )*x, -* -* where x is an n element vector and A is an n by n unit, or non-unit, -* upper or lower triangular band matrix, with ( k + 1 ) diagonals. -* -* Arguments -* ========== -* -* UPLO - CHARACTER*1. -* On entry, UPLO specifies whether the matrix is an upper or -* lower triangular matrix as follows: -* -* UPLO = 'U' or 'u' A is an upper triangular matrix. -* -* UPLO = 'L' or 'l' A is a lower triangular matrix. -* -* Unchanged on exit. -* -* TRANS - CHARACTER*1. -* On entry, TRANS specifies the operation to be performed as -* follows: -* -* TRANS = 'N' or 'n' x := A*x. -* -* TRANS = 'T' or 't' x := A'*x. -* -* TRANS = 'C' or 'c' x := conjg( A' )*x. -* -* Unchanged on exit. -* -* DIAG - CHARACTER*1. -* On entry, DIAG specifies whether or not A is unit -* triangular as follows: -* -* DIAG = 'U' or 'u' A is assumed to be unit triangular. -* -* DIAG = 'N' or 'n' A is not assumed to be unit -* triangular. -* -* Unchanged on exit. -* -* N - INTEGER. -* On entry, N specifies the order of the matrix A. -* N must be at least zero. -* Unchanged on exit. -* -* K - INTEGER. -* On entry with UPLO = 'U' or 'u', K specifies the number of -* super-diagonals of the matrix A. -* On entry with UPLO = 'L' or 'l', K specifies the number of -* sub-diagonals of the matrix A. -* K must satisfy 0 .le. K. -* Unchanged on exit. -* -* A - COMPLEX*16 array of DIMENSION ( LDA, n ). -* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) -* by n part of the array A must contain the upper triangular -* band part of the matrix of coefficients, supplied column by -* column, with the leading diagonal of the matrix in row -* ( k + 1 ) of the array, the first super-diagonal starting at -* position 2 in row k, and so on. The top left k by k triangle -* of the array A is not referenced. -* The following program segment will transfer an upper -* triangular band matrix from conventional full matrix storage -* to band storage: -* -* DO 20, J = 1, N -* M = K + 1 - J -* DO 10, I = MAX( 1, J - K ), J -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) -* by n part of the array A must contain the lower triangular -* band part of the matrix of coefficients, supplied column by -* column, with the leading diagonal of the matrix in row 1 of -* the array, the first sub-diagonal starting at position 1 in -* row 2, and so on. The bottom right k by k triangle of the -* array A is not referenced. -* The following program segment will transfer a lower -* triangular band matrix from conventional full matrix storage -* to band storage: -* -* DO 20, J = 1, N -* M = 1 - J -* DO 10, I = J, MIN( N, J + K ) -* A( M + I, J ) = matrix( I, J ) -* 10 CONTINUE -* 20 CONTINUE -* -* Note that when DIAG = 'U' or 'u' the elements of the array A -* corresponding to the diagonal elements of the matrix are not -* referenced, but are assumed to be unity. -* Unchanged on exit. -* -* LDA - INTEGER. -* On entry, LDA specifies the first dimension of A as declared -* in the calling (sub) program. LDA must be at least -* ( k + 1 ). -* Unchanged on exit. -* -* X - COMPLEX*16 array of dimension at least -* ( 1 + ( n - 1 )*abs( INCX ) ). -* Before entry, the incremented array X must contain the n -* element vector x. On exit, X is overwritten with the -* tranformed vector x. -* -* INCX - INTEGER. -* On entry, INCX specifies the increment for the elements of -* X. INCX must not be zero. -* Unchanged on exit. -* -* Further Details -* =============== -* -* Level 2 Blas routine. -* -* -- Written on 22-October-1986. -* Jack Dongarra, Argonne National Lab. -* Jeremy Du Croz, Nag Central Office. -* Sven Hammarling, Nag Central Office. -* Richard Hanson, Sandia National Labs. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE COMPLEX ZERO - PARAMETER (ZERO= (0.0D+0,0.0D+0)) -* .. -* .. Local Scalars .. - DOUBLE COMPLEX TEMP - INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L - LOGICAL NOCONJ,NOUNIT -* .. -* .. External Functions .. - LOGICAL LSAME - EXTERNAL LSAME -* .. -* .. External Subroutines .. - EXTERNAL XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC DCONJG,MAX,MIN -* .. -* -* Test the input parameters. -* - INFO = 0 - IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN - INFO = 1 - ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. - + .NOT.LSAME(TRANS,'C')) THEN - INFO = 2 - ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN - INFO = 3 - ELSE IF (N.LT.0) THEN - INFO = 4 - ELSE IF (K.LT.0) THEN - INFO = 5 - ELSE IF (LDA.LT. (K+1)) THEN - INFO = 7 - ELSE IF (INCX.EQ.0) THEN - INFO = 9 - END IF - IF (INFO.NE.0) THEN - CALL XERBLA('ZTBMV ',INFO) - RETURN - END IF -* -* Quick return if possible. -* - IF (N.EQ.0) RETURN -* - NOCONJ = LSAME(TRANS,'T') - NOUNIT = LSAME(DIAG,'N') -* -* Set up the start point in X if the increment is not unity. This -* will be ( N - 1 )*INCX too small for descending loops. -* - IF (INCX.LE.0) THEN - KX = 1 - (N-1)*INCX - ELSE IF (INCX.NE.1) THEN - KX = 1 - END IF -* -* Start the operations. In this version the elements of A are -* accessed sequentially with one pass through A. -* - IF (LSAME(TRANS,'N')) THEN -* -* Form x := A*x. -* - IF (LSAME(UPLO,'U')) THEN - KPLUS1 = K + 1 - IF (INCX.EQ.1) THEN - DO 20 J = 1,N - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - L = KPLUS1 - J - DO 10 I = MAX(1,J-K),J - 1 - X(I) = X(I) + TEMP*A(L+I,J) - 10 CONTINUE - IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J) - END IF - 20 CONTINUE - ELSE - JX = KX - DO 40 J = 1,N - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - L = KPLUS1 - J - DO 30 I = MAX(1,J-K),J - 1 - X(IX) = X(IX) + TEMP*A(L+I,J) - IX = IX + INCX - 30 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J) - END IF - JX = JX + INCX - IF (J.GT.K) KX = KX + INCX - 40 CONTINUE - END IF - ELSE - IF (INCX.EQ.1) THEN - DO 60 J = N,1,-1 - IF (X(J).NE.ZERO) THEN - TEMP = X(J) - L = 1 - J - DO 50 I = MIN(N,J+K),J + 1,-1 - X(I) = X(I) + TEMP*A(L+I,J) - 50 CONTINUE - IF (NOUNIT) X(J) = X(J)*A(1,J) - END IF - 60 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 80 J = N,1,-1 - IF (X(JX).NE.ZERO) THEN - TEMP = X(JX) - IX = KX - L = 1 - J - DO 70 I = MIN(N,J+K),J + 1,-1 - X(IX) = X(IX) + TEMP*A(L+I,J) - IX = IX - INCX - 70 CONTINUE - IF (NOUNIT) X(JX) = X(JX)*A(1,J) - END IF - JX = JX - INCX - IF ((N-J).GE.K) KX = KX - INCX - 80 CONTINUE - END IF - END IF - ELSE -* -* Form x := A'*x or x := conjg( A' )*x. -* - IF (LSAME(UPLO,'U')) THEN - KPLUS1 = K + 1 - IF (INCX.EQ.1) THEN - DO 110 J = N,1,-1 - TEMP = X(J) - L = KPLUS1 - J - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J) - DO 90 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + A(L+I,J)*X(I) - 90 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*DCONJG(A(KPLUS1,J)) - DO 100 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + DCONJG(A(L+I,J))*X(I) - 100 CONTINUE - END IF - X(J) = TEMP - 110 CONTINUE - ELSE - KX = KX + (N-1)*INCX - JX = KX - DO 140 J = N,1,-1 - TEMP = X(JX) - KX = KX - INCX - IX = KX - L = KPLUS1 - J - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J) - DO 120 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + A(L+I,J)*X(IX) - IX = IX - INCX - 120 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*DCONJG(A(KPLUS1,J)) - DO 130 I = J - 1,MAX(1,J-K),-1 - TEMP = TEMP + DCONJG(A(L+I,J))*X(IX) - IX = IX - INCX - 130 CONTINUE - END IF - X(JX) = TEMP - JX = JX - INCX - 140 CONTINUE - END IF - ELSE - IF (INCX.EQ.1) THEN - DO 170 J = 1,N - TEMP = X(J) - L = 1 - J - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*A(1,J) - DO 150 I = J + 1,MIN(N,J+K) - TEMP = TEMP + A(L+I,J)*X(I) - 150 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*DCONJG(A(1,J)) - DO 160 I = J + 1,MIN(N,J+K) - TEMP = TEMP + DCONJG(A(L+I,J))*X(I) - 160 CONTINUE - END IF - X(J) = TEMP - 170 CONTINUE - ELSE - JX = KX - DO 200 J = 1,N - TEMP = X(JX) - KX = KX + INCX - IX = KX - L = 1 - J - IF (NOCONJ) THEN - IF (NOUNIT) TEMP = TEMP*A(1,J) - DO 180 I = J + 1,MIN(N,J+K) - TEMP = TEMP + A(L+I,J)*X(IX) - IX = IX + INCX - 180 CONTINUE - ELSE - IF (NOUNIT) TEMP = TEMP*DCONJG(A(1,J)) - DO 190 I = J + 1,MIN(N,J+K) - TEMP = TEMP + DCONJG(A(L+I,J))*X(IX) - IX = IX + INCX - 190 CONTINUE - END IF - X(JX) = TEMP - JX = JX + INCX - 200 CONTINUE - END IF - END IF - END IF -* - RETURN -* -* End of ZTBMV . -* - END diff --git a/external/eigen3/cmake/Eigen3Config.cmake.in b/external/eigen3/cmake/Eigen3Config.cmake.in new file mode 100644 index 0000000..c5c5468 --- /dev/null +++ b/external/eigen3/cmake/Eigen3Config.cmake.in @@ -0,0 +1,21 @@ +# This file exports the Eigen3::Eigen CMake target which should be passed to the +# target_link_libraries command. + +@PACKAGE_INIT@ + +include ("${CMAKE_CURRENT_LIST_DIR}/Eigen3Targets.cmake") + +# Legacy variables, do *not* use. May be removed in the future. + +set (EIGEN3_FOUND 1) +set (EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake") + +set (EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@") +set (EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@") +set (EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@") +set (EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@") + +set (EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@") +set (EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@") +set (EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@") +set (EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@") diff --git a/external/eigen3/cmake/Eigen3ConfigLegacy.cmake.in b/external/eigen3/cmake/Eigen3ConfigLegacy.cmake.in new file mode 100644 index 0000000..62d7224 --- /dev/null +++ b/external/eigen3/cmake/Eigen3ConfigLegacy.cmake.in @@ -0,0 +1,30 @@ +# -*- cmake -*- +# +# Eigen3Config.cmake(.in) + +# Use the following variables to compile and link against Eigen: +# EIGEN3_FOUND - True if Eigen was found on your system +# EIGEN3_USE_FILE - The file making Eigen usable +# EIGEN3_DEFINITIONS - Definitions needed to build with Eigen +# EIGEN3_INCLUDE_DIR - Directory where signature_of_eigen3_matrix_library can be found +# EIGEN3_INCLUDE_DIRS - List of directories of Eigen and it's dependencies +# EIGEN3_ROOT_DIR - The base directory of Eigen +# EIGEN3_VERSION_STRING - A human-readable string containing the version +# EIGEN3_VERSION_MAJOR - The major version of Eigen +# EIGEN3_VERSION_MINOR - The minor version of Eigen +# EIGEN3_VERSION_PATCH - The patch version of Eigen + +@PACKAGE_INIT@ + +set ( EIGEN3_FOUND 1 ) +set ( EIGEN3_USE_FILE "${CMAKE_CURRENT_LIST_DIR}/UseEigen3.cmake" ) + +set ( EIGEN3_DEFINITIONS "@EIGEN_DEFINITIONS@" ) +set ( EIGEN3_INCLUDE_DIR "@PACKAGE_EIGEN_INCLUDE_DIR@" ) +set ( EIGEN3_INCLUDE_DIRS "@PACKAGE_EIGEN_INCLUDE_DIR@" ) +set ( EIGEN3_ROOT_DIR "@PACKAGE_EIGEN_ROOT_DIR@" ) + +set ( EIGEN3_VERSION_STRING "@EIGEN_VERSION_STRING@" ) +set ( EIGEN3_VERSION_MAJOR "@EIGEN_VERSION_MAJOR@" ) +set ( EIGEN3_VERSION_MINOR "@EIGEN_VERSION_MINOR@" ) +set ( EIGEN3_VERSION_PATCH "@EIGEN_VERSION_PATCH@" ) diff --git a/external/eigen3/cmake/EigenConfigureTesting.cmake b/external/eigen3/cmake/EigenConfigureTesting.cmake index b5b62ae..afc24b5 100644 --- a/external/eigen3/cmake/EigenConfigureTesting.cmake +++ b/external/eigen3/cmake/EigenConfigureTesting.cmake @@ -1,7 +1,7 @@ include(EigenTesting) include(CheckCXXSourceCompiles) -# configure the "site" and "buildname" +# configure the "site" and "buildname" ei_set_sitename() # retrieve and store the build string @@ -46,18 +46,16 @@ if(CMAKE_COMPILER_IS_GNUCXX) if(EIGEN_COVERAGE_TESTING) set(COVERAGE_FLAGS "-fprofile-arcs -ftest-coverage") set(CTEST_CUSTOM_COVERAGE_EXCLUDE "/test/") - else(EIGEN_COVERAGE_TESTING) - set(COVERAGE_FLAGS "") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS}") endif(EIGEN_COVERAGE_TESTING) - if(EIGEN_TEST_C++0x) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++0x") - endif(EIGEN_TEST_C++0x) - if(CMAKE_SYSTEM_NAME MATCHES Linux) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS} -g2") - set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COVERAGE_FLAGS} -O2 -g2") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${COVERAGE_FLAGS} -fno-inline-functions") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${COVERAGE_FLAGS} -O0 -g3") - endif(CMAKE_SYSTEM_NAME MATCHES Linux) + elseif(MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS") endif(CMAKE_COMPILER_IS_GNUCXX) + + +check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CXX11) + +if(EIGEN_TEST_CXX11 AND EIGEN_COMPILER_SUPPORT_CXX11) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +endif() diff --git a/external/eigen3/cmake/EigenDetermineVSServicePack.cmake b/external/eigen3/cmake/EigenDetermineVSServicePack.cmake index 8e5546a..fed7819 100644 --- a/external/eigen3/cmake/EigenDetermineVSServicePack.cmake +++ b/external/eigen3/cmake/EigenDetermineVSServicePack.cmake @@ -4,7 +4,6 @@ include(CMakeDetermineVSServicePack) # _DetermineVSServicePack_FastCheckVersionWithCompiler which lead to errors on some systems. function(EigenDetermineVSServicePack _pack) if(NOT DETERMINED_VS_SERVICE_PACK OR NOT ${_pack}) - if(NOT DETERMINED_VS_SERVICE_PACK) _DetermineVSServicePack_CheckVersionWithTryCompile(DETERMINED_VS_SERVICE_PACK _cl_version) if(NOT DETERMINED_VS_SERVICE_PACK) @@ -13,10 +12,25 @@ function(EigenDetermineVSServicePack _pack) endif() if(DETERMINED_VS_SERVICE_PACK) - if(_cl_version) # Call helper function to determine VS version _DetermineVSServicePackFromCompiler(_sp "${_cl_version}") + + # temporary fix, until CMake catches up + if (NOT _sp) + if(${_cl_version} VERSION_EQUAL "17.00.50727.1") + set(_sp "vc110") + elseif(${_cl_version} VERSION_EQUAL "17.00.51106.1") + set(_sp "vc110sp1") + elseif(${_cl_version} VERSION_EQUAL "17.00.60315.1") + set(_sp "vc110sp2") + elseif(${_cl_version} VERSION_EQUAL "17.00.60610.1") + set(_sp "vc110sp3") + else() + set(_sp ${CMAKE_CXX_COMPILER_VERSION}) + endif() + endif() + if(_sp) set(${_pack} ${_sp} CACHE INTERNAL "The Visual Studio Release with Service Pack") diff --git a/external/eigen3/cmake/EigenTesting.cmake b/external/eigen3/cmake/EigenTesting.cmake index cbe12d5..a92a297 100644 --- a/external/eigen3/cmake/EigenTesting.cmake +++ b/external/eigen3/cmake/EigenTesting.cmake @@ -1,19 +1,48 @@ macro(ei_add_property prop value) - get_property(previous GLOBAL PROPERTY ${prop}) + get_property(previous GLOBAL PROPERTY ${prop}) if ((NOT previous) OR (previous STREQUAL "")) set_property(GLOBAL PROPERTY ${prop} "${value}") else() set_property(GLOBAL PROPERTY ${prop} "${previous} ${value}") - endif() + endif() endmacro(ei_add_property) #internal. See documentation of ei_add_test for details. macro(ei_add_test_internal testname testname_with_suffix) set(targetname ${testname_with_suffix}) - set(filename ${testname}.cpp) - add_executable(${targetname} ${filename}) + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION STREQUAL cu) + if(EIGEN_TEST_CUDA_CLANG) + set_source_files_properties(${filename} PROPERTIES LANGUAGE CXX) + if(CUDA_64_BIT_DEVICE_CODE) + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib64") + else() + link_directories("${CUDA_TOOLKIT_ROOT_DIR}/lib") + endif() + if (${ARGC} GREATER 2) + add_executable(${targetname} ${filename}) + else() + add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + endif() + target_link_libraries(${targetname} "cudart_static" "cuda" "dl" "rt" "pthread") + else() + if (${ARGC} GREATER 2) + cuda_add_executable(${targetname} ${filename} OPTIONS ${ARGV2}) + else() + cuda_add_executable(${targetname} ${filename}) + endif() + endif() + else() + add_executable(${targetname} ${filename}) + endif() + if (targetname MATCHES "^eigen2_") add_dependencies(eigen2_buildtests ${targetname}) else() @@ -27,20 +56,20 @@ macro(ei_add_test_internal testname testname_with_suffix) ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1") endif(EIGEN_DEBUG_ASSERTS) endif(EIGEN_NO_ASSERTION_CHECKING) - + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}") ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_FUNC=${testname}") - - if(MSVC AND NOT EIGEN_SPLIT_LARGE_TESTS) + + if(MSVC) ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj") - endif() + endif() # let the user pass flags. if(${ARGC} GREATER 2) ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}") endif(${ARGC} GREATER 2) - + if(EIGEN_TEST_CUSTOM_CXX_FLAGS) ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}") endif() @@ -66,16 +95,12 @@ macro(ei_add_test_internal testname testname_with_suffix) # notice: no double quotes around ${libs_to_link} here. It may be a list. target_link_libraries(${targetname} ${libs_to_link}) endif() - endif() - - if(EIGEN_BIN_BASH_EXISTS) - add_test(${testname_with_suffix} "${Eigen_SOURCE_DIR}/test/runtest.sh" "${testname_with_suffix}") - else() - add_test(${testname_with_suffix} "${targetname}") endif() - + + add_test(${testname_with_suffix} "${targetname}") + # Specify target and test labels accoirding to EIGEN_CURRENT_SUBPROJECT - get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT) + get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT) if ((current_subproject) AND (NOT (current_subproject STREQUAL ""))) set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}") add_dependencies("Build${current_subproject}" ${targetname}) @@ -84,6 +109,103 @@ macro(ei_add_test_internal testname testname_with_suffix) endmacro(ei_add_test_internal) +# SYCL +macro(ei_add_test_internal_sycl testname testname_with_suffix) + include_directories( SYSTEM ${COMPUTECPP_PACKAGE_ROOT_DIR}/include) + set(targetname ${testname_with_suffix}) + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + set( include_file ${CMAKE_CURRENT_BINARY_DIR}/inc_${filename}) + set( bc_file ${CMAKE_CURRENT_BINARY_DIR}/${filename}) + set( host_file ${CMAKE_CURRENT_SOURCE_DIR}/${filename}) + + ADD_CUSTOM_COMMAND( + OUTPUT ${include_file} + COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${host_file}\\\"" > ${include_file} + COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${bc_file}.sycl\\\"" >> ${include_file} + DEPENDS ${filename} ${bc_file}.sycl + COMMENT "Building ComputeCpp integration header file ${include_file}" + ) + # Add a custom target for the generated integration header + add_custom_target(${testname}_integration_header_sycl DEPENDS ${include_file}) + + add_executable(${targetname} ${include_file}) + add_dependencies(${targetname} ${testname}_integration_header_sycl) + add_sycl_to_target(${targetname} ${filename} ${CMAKE_CURRENT_BINARY_DIR}) + + if (targetname MATCHES "^eigen2_") + add_dependencies(eigen2_buildtests ${targetname}) + else() + add_dependencies(buildtests ${targetname}) + endif() + + if(EIGEN_NO_ASSERTION_CHECKING) + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1") + else(EIGEN_NO_ASSERTION_CHECKING) + if(EIGEN_DEBUG_ASSERTS) + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1") + endif(EIGEN_DEBUG_ASSERTS) + endif(EIGEN_NO_ASSERTION_CHECKING) + + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}") + + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_FUNC=${testname}") + + if(MSVC AND NOT EIGEN_SPLIT_LARGE_TESTS) + ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj") + endif() + + # let the user pass flags. + if(${ARGC} GREATER 2) + ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}") + endif(${ARGC} GREATER 2) + + if(EIGEN_TEST_CUSTOM_CXX_FLAGS) + ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}") + endif() + + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + target_link_libraries(${targetname} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + endif() + if(EXTERNAL_LIBS) + target_link_libraries(${targetname} ${EXTERNAL_LIBS}) + endif() + if(EIGEN_TEST_CUSTOM_LINKER_FLAGS) + target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS}) + endif() + + if(${ARGC} GREATER 3) + set(libs_to_link ${ARGV3}) + # it could be that some cmake module provides a bad library string " " (just spaces), + # and that severely breaks target_link_libraries ("can't link to -l-lstdc++" errors). + # so we check for strings containing only spaces. + string(STRIP "${libs_to_link}" libs_to_link_stripped) + string(LENGTH "${libs_to_link_stripped}" libs_to_link_stripped_length) + if(${libs_to_link_stripped_length} GREATER 0) + # notice: no double quotes around ${libs_to_link} here. It may be a list. + target_link_libraries(${targetname} ${libs_to_link}) + endif() + endif() + + add_test(${testname_with_suffix} "${targetname}") + + # Specify target and test labels according to EIGEN_CURRENT_SUBPROJECT + get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT) + if ((current_subproject) AND (NOT (current_subproject STREQUAL ""))) + set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}") + add_dependencies("Build${current_subproject}" ${targetname}) + set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}") + endif() + + +endmacro(ei_add_test_internal_sycl) + + # Macro to add a test # # the unique mandatory parameter testname must correspond to a file @@ -131,7 +253,13 @@ macro(ei_add_test testname) set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}${testname}\n") set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") - file(READ "${testname}.cpp" test_source) + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + file(READ "${filename}" test_source) set(parts 0) string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+" occurences "${test_source}") @@ -154,6 +282,39 @@ macro(ei_add_test testname) endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes) endmacro(ei_add_test) +macro(ei_add_test_sycl testname) + get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) + set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}${testname}\n") + set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + file(READ "${filename}" test_source) + set(parts 0) + string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+" + occurences "${test_source}") + string(REGEX REPLACE "CALL_SUBTEST_|EIGEN_TEST_PART_|EIGEN_SUFFIXES" "" suffixes "${occurences}") + list(REMOVE_DUPLICATES suffixes) + if(EIGEN_SPLIT_LARGE_TESTS AND suffixes) + add_custom_target(${testname}) + foreach(suffix ${suffixes}) + ei_add_test_internal_sycl(${testname} ${testname}_${suffix} + "${ARGV1} -DEIGEN_TEST_PART_${suffix}=1" "${ARGV2}") + add_dependencies(${testname} ${testname}_${suffix}) + endforeach(suffix) + else(EIGEN_SPLIT_LARGE_TESTS AND suffixes) + set(symbols_to_enable_all_parts "") + foreach(suffix ${suffixes}) + set(symbols_to_enable_all_parts + "${symbols_to_enable_all_parts} -DEIGEN_TEST_PART_${suffix}=1") + endforeach(suffix) + ei_add_test_internal_sycl(${testname} ${testname} "${ARGV1} ${symbols_to_enable_all_parts}" "${ARGV2}") + endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes) +endmacro(ei_add_test_sycl) # adds a failtest, i.e. a test that succeed if the program fails to compile # note that the test runner for these is CMake itself, when passed -DEIGEN_FAILTEST=ON @@ -218,7 +379,7 @@ macro(ei_testing_print_summary) elseif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) message(STATUS "Explicit vectorization disabled (alignment kept enabled)") else() - + message(STATUS "Maximal matrix/vector size: ${EIGEN_TEST_MAX_SIZE}") if(EIGEN_TEST_SSE2) @@ -251,18 +412,75 @@ macro(ei_testing_print_summary) message(STATUS "SSE4.2: Using architecture defaults") endif() + if(EIGEN_TEST_AVX) + message(STATUS "AVX: ON") + else() + message(STATUS "AVX: Using architecture defaults") + endif() + + if(EIGEN_TEST_FMA) + message(STATUS "FMA: ON") + else() + message(STATUS "FMA: Using architecture defaults") + endif() + + if(EIGEN_TEST_AVX512) + message(STATUS "AVX512: ON") + else() + message(STATUS "AVX512: Using architecture defaults") + endif() + if(EIGEN_TEST_ALTIVEC) message(STATUS "Altivec: ON") else() message(STATUS "Altivec: Using architecture defaults") endif() + if(EIGEN_TEST_VSX) + message(STATUS "VSX: ON") + else() + message(STATUS "VSX: Using architecture defaults") + endif() + if(EIGEN_TEST_NEON) message(STATUS "ARM NEON: ON") else() message(STATUS "ARM NEON: Using architecture defaults") endif() + if(EIGEN_TEST_NEON64) + message(STATUS "ARMv8 NEON: ON") + else() + message(STATUS "ARMv8 NEON: Using architecture defaults") + endif() + + if(EIGEN_TEST_ZVECTOR) + message(STATUS "S390X ZVECTOR: ON") + else() + message(STATUS "S390X ZVECTOR: Using architecture defaults") + endif() + + if(EIGEN_TEST_CXX11) + message(STATUS "C++11: ON") + else() + message(STATUS "C++11: OFF") + endif() + + if(EIGEN_TEST_SYCL) + message(STATUS "SYCL: ON") + else() + message(STATUS "SYCL: OFF") + endif() + if(EIGEN_TEST_CUDA) + if(EIGEN_TEST_CUDA_CLANG) + message(STATUS "CUDA: ON (using clang)") + else() + message(STATUS "CUDA: ON (using nvcc)") + endif() + else() + message(STATUS "CUDA: OFF") + endif() + endif() # vectorization / alignment options message(STATUS "\n${EIGEN_TESTING_SUMMARY}") @@ -287,7 +505,7 @@ macro(ei_init_testing) set_property(GLOBAL PROPERTY EIGEN_FAILTEST_FAILURE_COUNT "0") set_property(GLOBAL PROPERTY EIGEN_FAILTEST_COUNT "0") - + # uncomment anytime you change the ei_get_compilerver_from_cxx_version_string macro # ei_test_get_compilerver_from_cxx_version_string() endmacro(ei_init_testing) @@ -296,47 +514,47 @@ macro(ei_set_sitename) # if the sitename is not yet set, try to set it if(NOT ${SITE} OR ${SITE} STREQUAL "") set(eigen_computername $ENV{COMPUTERNAME}) - set(eigen_hostname $ENV{HOSTNAME}) + set(eigen_hostname $ENV{HOSTNAME}) if(eigen_hostname) set(SITE ${eigen_hostname}) - elseif(eigen_computername) - set(SITE ${eigen_computername}) + elseif(eigen_computername) + set(SITE ${eigen_computername}) endif() endif() # in case it is already set, enforce lower case if(SITE) string(TOLOWER ${SITE} SITE) - endif() + endif() endmacro(ei_set_sitename) macro(ei_get_compilerver VAR) - if(MSVC) - # on windows system, we use a modified CMake script - include(EigenDetermineVSServicePack) - EigenDetermineVSServicePack( my_service_pack ) + if(MSVC) + # on windows system, we use a modified CMake script + include(EigenDetermineVSServicePack) + EigenDetermineVSServicePack( my_service_pack ) - if( my_service_pack ) - set(${VAR} ${my_service_pack}) + if( my_service_pack ) + set(${VAR} ${my_service_pack}) + else() + set(${VAR} "na") + endif() else() - set(${VAR} "na") - endif() - else() # on all other system we rely on ${CMAKE_CXX_COMPILER} # supporting a "--version" or "/version" flag - + if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} EQUAL "Intel") set(EIGEN_CXX_FLAG_VERSION "/version") else() set(EIGEN_CXX_FLAG_VERSION "--version") endif() - - execute_process(COMMAND ${CMAKE_CXX_COMPILER} ${EIGEN_CXX_FLAG_VERSION} + + execute_process(COMMAND ${CMAKE_CXX_COMPILER} ${EIGEN_CXX_FLAG_VERSION} OUTPUT_VARIABLE eigen_cxx_compiler_version_string OUTPUT_STRIP_TRAILING_WHITESPACE) string(REGEX REPLACE "[\n\r].*" "" eigen_cxx_compiler_version_string ${eigen_cxx_compiler_version_string}) - + ei_get_compilerver_from_cxx_version_string("${eigen_cxx_compiler_version_string}" CNAME CVER) set(${VAR} "${CNAME}-${CVER}") - + endif() endmacro(ei_get_compilerver) @@ -345,18 +563,20 @@ endmacro(ei_get_compilerver) # the testing macro call in ei_init_testing() of the EigenTesting.cmake file. # See also the ei_test_get_compilerver_from_cxx_version_string macro at the end of the file macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER) - # extract possible compiler names + # extract possible compiler names string(REGEX MATCH "g\\+\\+" ei_has_gpp ${VERSTRING}) string(REGEX MATCH "llvm|LLVM" ei_has_llvm ${VERSTRING}) string(REGEX MATCH "gcc|GCC" ei_has_gcc ${VERSTRING}) string(REGEX MATCH "icpc|ICC" ei_has_icpc ${VERSTRING}) string(REGEX MATCH "clang|CLANG" ei_has_clang ${VERSTRING}) - + # combine them if((ei_has_llvm) AND (ei_has_gpp OR ei_has_gcc)) set(${CNAME} "llvm-g++") elseif((ei_has_llvm) AND (ei_has_clang)) set(${CNAME} "llvm-clang++") + elseif(ei_has_clang) + set(${CNAME} "clang++") elseif(ei_has_icpc) set(${CNAME} "icpc") elseif(ei_has_gpp OR ei_has_gcc) @@ -364,7 +584,7 @@ macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER) else() set(${CNAME} "_") endif() - + # extract possible version numbers # first try to extract 3 isolated numbers: string(REGEX MATCH " [0-9]+\\.[0-9]+\\.[0-9]+" eicver ${VERSTRING}) @@ -382,9 +602,9 @@ macro(ei_get_compilerver_from_cxx_version_string VERSTRING CNAME CVER) endif() endif() endif() - + string(REGEX REPLACE ".(.*)" "\\1" ${CVER} ${eicver}) - + endmacro(ei_get_compilerver_from_cxx_version_string) macro(ei_get_cxxflags VAR) @@ -392,8 +612,18 @@ macro(ei_get_cxxflags VAR) ei_is_64bit_env(IS_64BIT_ENV) if(EIGEN_TEST_NEON) set(${VAR} NEON) + elseif(EIGEN_TEST_NEON64) + set(${VAR} NEON) + elseif(EIGEN_TEST_ZVECTOR) + set(${VAR} ZVECTOR) + elseif(EIGEN_TEST_VSX) + set(${VAR} VSX) elseif(EIGEN_TEST_ALTIVEC) set(${VAR} ALVEC) + elseif(EIGEN_TEST_FMA) + set(${VAR} FMA) + elseif(EIGEN_TEST_AVX) + set(${VAR} AVX) elseif(EIGEN_TEST_SSE4_2) set(${VAR} SSE42) elseif(EIGEN_TEST_SSE4_1) @@ -403,30 +633,30 @@ macro(ei_get_cxxflags VAR) elseif(EIGEN_TEST_SSE3) set(${VAR} SSE3) elseif(EIGEN_TEST_SSE2 OR IS_64BIT_ENV) - set(${VAR} SSE2) + set(${VAR} SSE2) endif() if(EIGEN_TEST_OPENMP) if (${VAR} STREQUAL "") - set(${VAR} OMP) - else() - set(${VAR} ${${VAR}}-OMP) - endif() + set(${VAR} OMP) + else() + set(${VAR} ${${VAR}}-OMP) + endif() endif() - + if(EIGEN_DEFAULT_TO_ROW_MAJOR) if (${VAR} STREQUAL "") - set(${VAR} ROW) - else() - set(${VAR} ${${VAR}}-ROWMAJ) - endif() + set(${VAR} ROW) + else() + set(${VAR} ${${VAR}}-ROWMAJ) + endif() endif() endmacro(ei_get_cxxflags) macro(ei_set_build_string) ei_get_compilerver(LOCAL_COMPILER_VERSION) ei_get_cxxflags(LOCAL_COMPILER_FLAGS) - + include(EigenDetermineOSVersion) DetermineOSVersion(OS_VERSION) @@ -442,7 +672,11 @@ macro(ei_set_build_string) else() set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-64bit) endif() - + + if(EIGEN_TEST_CXX11) + set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-cxx11) + endif() + if(EIGEN_BUILD_STRING_SUFFIX) set(TMP_BUILD_STRING ${TMP_BUILD_STRING}-${EIGEN_BUILD_STRING_SUFFIX}) endif() diff --git a/external/eigen3/cmake/EigenUninstall.cmake b/external/eigen3/cmake/EigenUninstall.cmake new file mode 100644 index 0000000..4dae8c8 --- /dev/null +++ b/external/eigen3/cmake/EigenUninstall.cmake @@ -0,0 +1,40 @@ +################ CMake Uninstall Template ####################### +# CMake Template file for uninstallation of files +# mentioned in 'install_manifest.txt' +# +# Used by uinstall target +################################################################# + +set(MANIFEST "${CMAKE_CURRENT_BINARY_DIR}/install_manifest.txt") + +if(EXISTS ${MANIFEST}) + message(STATUS "============== Uninstalling Eigen ===================") + + file(STRINGS ${MANIFEST} files) + foreach(file ${files}) + if(EXISTS ${file}) + message(STATUS "Removing file: '${file}'") + + execute_process( + COMMAND ${CMAKE_COMMAND} -E remove ${file} + OUTPUT_VARIABLE rm_out + RESULT_VARIABLE rm_retval + ) + + if(NOT "${rm_retval}" STREQUAL 0) + message(FATAL_ERROR "Failed to remove file: '${file}'.") + endif() + else() + message(STATUS "File '${file}' does not exist.") + endif() + endforeach(file) + + message(STATUS "========== Finished Uninstalling Eigen ==============") +else() + message(STATUS "Cannot find install manifest: '${MANIFEST}'") + message(STATUS "Probably make install has not been performed") + message(STATUS " or install_manifest.txt has been deleted.") +endif() + + + diff --git a/external/eigen3/cmake/FindAdolc.cmake b/external/eigen3/cmake/FindAdolc.cmake index 1a7ff36..937e549 100644 --- a/external/eigen3/cmake/FindAdolc.cmake +++ b/external/eigen3/cmake/FindAdolc.cmake @@ -5,7 +5,7 @@ endif (ADOLC_INCLUDES AND ADOLC_LIBRARIES) find_path(ADOLC_INCLUDES NAMES - adolc/adouble.h + adolc/adtl.h PATHS $ENV{ADOLCDIR} ${INCLUDE_INSTALL_DIR} diff --git a/external/eigen3/cmake/FindBLAS.cmake b/external/eigen3/cmake/FindBLAS.cmake index 68c4e07..9f74b07 100644 --- a/external/eigen3/cmake/FindBLAS.cmake +++ b/external/eigen3/cmake/FindBLAS.cmake @@ -1,385 +1,1363 @@ -# Find BLAS library +### # -# This module finds an installed library that implements the BLAS +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find BLAS library +# This module finds an installed fortran library that implements the BLAS # linear-algebra interface (see http://www.netlib.org/blas/). -# The list of libraries searched for is mainly taken +# The list of libraries searched for is taken # from the autoconf macro file, acx_blas.m4 (distributed at # http://ac-archive.sourceforge.net/ac-archive/acx_blas.html). # # This module sets the following variables: # BLAS_FOUND - set to true if a library implementing the BLAS interface # is found -# BLAS_INCLUDE_DIR - Directories containing the BLAS header files -# BLAS_DEFINITIONS - Compilation options to use BLAS -# BLAS_LINKER_FLAGS - Linker flags to use BLAS (excluding -l +# BLAS_LINKER_FLAGS - uncached list of required linker flags (excluding -l # and -L). -# BLAS_LIBRARIES_DIR - Directories containing the BLAS libraries. -# May be null if BLAS_LIBRARIES contains libraries name using full path. -# BLAS_LIBRARIES - List of libraries to link against BLAS interface. -# May be null if the compiler supports auto-link (e.g. VC++). -# BLAS_USE_FILE - The name of the cmake module to include to compile -# applications or libraries using BLAS. +# BLAS_COMPILER_FLAGS - uncached list of required compiler flags (including -I for mkl headers). +# BLAS_LIBRARIES - uncached list of libraries (using full path name) to +# link against to use BLAS +# BLAS95_LIBRARIES - uncached list of libraries (using full path name) +# to link against to use BLAS95 interface +# BLAS95_FOUND - set to true if a library implementing the BLAS f95 interface +# is found +# BLA_STATIC if set on this determines what kind of linkage we do (static) +# BLA_VENDOR if set checks only the specified vendor, if not set checks +# all the possibilities +# BLAS_VENDOR_FOUND stores the BLAS vendor found +# BLA_F95 if set on tries to find the f95 interfaces for BLAS/LAPACK +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DBLAS_DIR=path/to/blas): +# BLAS_DIR - Where to find the base directory of blas +# BLAS_INCDIR - Where to find the header files +# BLAS_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: BLAS_DIR, BLAS_INCDIR, BLAS_LIBDIR +# For MKL case and if no paths are given as hints, we will try to use the MKLROOT +# environment variable +# BLAS_VERBOSE Print some additional information during BLAS libraries detection +########## +### List of vendors (BLA_VENDOR) valid in this module +########## List of vendors (BLA_VENDOR) valid in this module +## Open (for OpenBlas), Eigen (for EigenBlas), Goto, ATLAS PhiPACK, +##  CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT +## Intel10_32 (intel mkl v10 32 bit), Intel10_64lp (intel mkl v10 64 bit,lp thread model, lp64 model), +## Intel10_64lp_seq (intel mkl v10 64 bit,sequential code, lp64 model), +## Intel( older versions of mkl 32 and 64 bit), +##  ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic +# C/CXX should be enabled to use Intel mkl +### +# We handle different modes to find the dependency +# +# - Detection if already installed on the system +# - BLAS libraries can be detected from different ways +# Here is the order of precedence: +# 1) we look in cmake variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined +# 2) we look in environment variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined +# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH) +# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables: +# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES +# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES +# + +#============================================================================= +# Copyright 2007-2009 Kitware, Inc. # -# This module was modified by CGAL team: -# - find libraries for a C++ compiler, instead of Fortran -# - added BLAS_INCLUDE_DIR, BLAS_DEFINITIONS and BLAS_LIBRARIES_DIR -# - removed BLAS95_LIBRARIES +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + +## Some macros to print status when search for headers and libs +# This macro informs why the _lib_to_find file has not been found +macro(Print_Find_Library_Blas_Status _libname _lib_to_find) + + # save _libname upper/lower case + string(TOUPPER ${_libname} LIBNAME) + string(TOLOWER ${_libname} libname) + + # print status + #message(" ") + if(${LIBNAME}_LIBDIR) + message("${Yellow}${LIBNAME}_LIBDIR is defined but ${_lib_to_find}" + "has not been found in ${ARGN}${ColourReset}") + else() + if(${LIBNAME}_DIR) + message("${Yellow}${LIBNAME}_DIR is defined but ${_lib_to_find}" + "has not been found in ${ARGN}${ColourReset}") + else() + message("${Yellow}${_lib_to_find} not found." + "Nor ${LIBNAME}_DIR neither ${LIBNAME}_LIBDIR" + "are defined so that we look for ${_lib_to_find} in" + "system paths (Linux: LD_LIBRARY_PATH, Windows: LIB," + "Mac: DYLD_LIBRARY_PATH," + "CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES," + "CMAKE_C_IMPLICIT_LINK_DIRECTORIES)${ColourReset}") + if(_lib_env) + message("${Yellow}${_lib_to_find} has not been found in" + "${_lib_env}${ColourReset}") + endif() + endif() + endif() + message("${BoldYellow}Please indicate where to find ${_lib_to_find}. You have three options:\n" + "- Option 1: Provide the Installation directory of BLAS library with cmake option: -D${LIBNAME}_DIR=your/path/to/${libname}/\n" + "- Option 2: Provide the directory where to find the library with cmake option: -D${LIBNAME}_LIBDIR=your/path/to/${libname}/lib/\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "- Option 4: If your library provides a PkgConfig file, make sure pkg-config finds your library${ColourReset}") + +endmacro() + +# This macro informs why the _lib_to_find file has not been found +macro(Print_Find_Library_Blas_CheckFunc_Status _name) + + # save _libname upper/lower case + string(TOUPPER ${_name} FUNCNAME) + string(TOLOWER ${_name} funcname) + + # print status + #message(" ") + message("${Red}Libs have been found but check of symbol ${_name} failed " + "with following libraries ${ARGN}${ColourReset}") + message("${BoldRed}Please open your error file CMakeFiles/CMakeError.log" + "to figure out why it fails${ColourReset}") + #message(" ") + +endmacro() + +if (NOT BLAS_FOUND) + set(BLAS_DIR "" CACHE PATH "Installation directory of BLAS library") + if (NOT BLAS_FIND_QUIETLY) + message(STATUS "A cache variable, namely BLAS_DIR, has been set to specify the install directory of BLAS") + endif() +endif() + +option(BLAS_VERBOSE "Print some additional information during BLAS libraries detection" OFF) +mark_as_advanced(BLAS_VERBOSE) include(CheckFunctionExists) +include(CheckFortranFunctionExists) +set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) -# This macro checks for the existence of the combination of fortran libraries -# given by _list. If the combination is found, this macro checks (using the -# check_function_exists macro) whether can link against that library -# combination using the name of a routine given by _name using the linker -# flags given by _flags. If the combination of libraries is found and passes -# the link test, LIBRARIES is set to the list of complete library paths that -# have been found and DEFINITIONS to the required definitions. -# Otherwise, LIBRARIES is set to FALSE. -# N.B. _prefix is the prefix applied to the names of all cached variables that -# are generated internally and marked advanced by this macro. -macro(check_fortran_libraries DEFINITIONS LIBRARIES _prefix _name _flags _list _path) - #message("DEBUG: check_fortran_libraries(${_list} in ${_path})") - - # Check for the existence of the libraries given by _list - set(_libraries_found TRUE) - set(_libraries_work FALSE) - set(${DEFINITIONS} "") - set(${LIBRARIES} "") +# Check the language being used +get_property( _LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES ) +if( _LANGUAGES_ MATCHES Fortran ) + set( _CHECK_FORTRAN TRUE ) +elseif( (_LANGUAGES_ MATCHES C) OR (_LANGUAGES_ MATCHES CXX) ) + set( _CHECK_FORTRAN FALSE ) +else() + if(BLAS_FIND_REQUIRED) + message(FATAL_ERROR "FindBLAS requires Fortran, C, or C++ to be enabled.") + else() + message(STATUS "Looking for BLAS... - NOT found (Unsupported languages)") + return() + endif() +endif() + +macro(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list _thread) + # This macro checks for the existence of the combination of fortran libraries + # given by _list. If the combination is found, this macro checks (using the + # Check_Fortran_Function_Exists macro) whether can link against that library + # combination using the name of a routine given by _name using the linker + # flags given by _flags. If the combination of libraries is found and passes + # the link test, LIBRARIES is set to the list of complete library paths that + # have been found. Otherwise, LIBRARIES is set to FALSE. + + # N.B. _prefix is the prefix applied to the names of all cached variables that + # are generated internally and marked advanced by this macro. + + set(_libdir ${ARGN}) + + set(_libraries_work TRUE) + set(${LIBRARIES}) set(_combined_name) + set(ENV_MKLROOT "$ENV{MKLROOT}") + set(ENV_BLAS_DIR "$ENV{BLAS_DIR}") + set(ENV_BLAS_LIBDIR "$ENV{BLAS_LIBDIR}") + if (NOT _libdir) + if (BLAS_LIBDIR) + list(APPEND _libdir "${BLAS_LIBDIR}") + elseif (BLAS_DIR) + list(APPEND _libdir "${BLAS_DIR}") + list(APPEND _libdir "${BLAS_DIR}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _libdir "${BLAS_DIR}/lib64") + list(APPEND _libdir "${BLAS_DIR}/lib/intel64") + else() + list(APPEND _libdir "${BLAS_DIR}/lib32") + list(APPEND _libdir "${BLAS_DIR}/lib/ia32") + endif() + elseif(ENV_BLAS_LIBDIR) + list(APPEND _libdir "${ENV_BLAS_LIBDIR}") + elseif(ENV_BLAS_DIR) + list(APPEND _libdir "${ENV_BLAS_DIR}") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib64") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib/intel64") + else() + list(APPEND _libdir "${ENV_BLAS_DIR}/lib32") + list(APPEND _libdir "${ENV_BLAS_DIR}/lib/ia32") + endif() + else() + if (ENV_MKLROOT) + list(APPEND _libdir "${ENV_MKLROOT}/lib") + if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + list(APPEND _libdir "${ENV_MKLROOT}/lib64") + list(APPEND _libdir "${ENV_MKLROOT}/lib/intel64") + else() + list(APPEND _libdir "${ENV_MKLROOT}/lib32") + list(APPEND _libdir "${ENV_MKLROOT}/lib/ia32") + endif() + endif() + if (WIN32) + string(REPLACE ":" ";" _libdir2 "$ENV{LIB}") + elseif (APPLE) + string(REPLACE ":" ";" _libdir2 "$ENV{DYLD_LIBRARY_PATH}") + else () + string(REPLACE ":" ";" _libdir2 "$ENV{LD_LIBRARY_PATH}") + endif () + list(APPEND _libdir "${_libdir2}") + list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif () + + if (BLAS_VERBOSE) + message("${Cyan}Try to find BLAS libraries: ${_list}") + endif () + foreach(_library ${_list}) set(_combined_name ${_combined_name}_${_library}) - if(_libraries_found) - # search first in ${_path} - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS ${_path} NO_DEFAULT_PATH - ) - # if not found, search in environment variables and system - if ( WIN32 ) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS ENV LIB - ) - elseif ( APPLE ) - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV DYLD_LIBRARY_PATH - ) + if(_libraries_work) + if (BLA_STATIC) + if (WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif () + if (APPLE) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib ${CMAKE_FIND_LIBRARY_SUFFIXES}) + else () + set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif () else () - find_library(${_prefix}_${_library}_LIBRARY - NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV LD_LIBRARY_PATH - ) - endif() + if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + # for ubuntu's libblas3gf and liblapack3gf packages + set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES} .so.3gf) + endif () + endif () + find_library(${_prefix}_${_library}_LIBRARY + NAMES ${_library} + HINTS ${_libdir} + NO_DEFAULT_PATH + ) mark_as_advanced(${_prefix}_${_library}_LIBRARY) + # Print status if not found + # ------------------------- + if (NOT ${_prefix}_${_library}_LIBRARY AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE) + Print_Find_Library_Blas_Status(blas ${_library} ${_libdir}) + endif () set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY}) - set(_libraries_found ${${_prefix}_${_library}_LIBRARY}) - endif(_libraries_found) + set(_libraries_work ${${_prefix}_${_library}_LIBRARY}) + endif(_libraries_work) endforeach(_library ${_list}) - if(_libraries_found) - set(_libraries_found ${${LIBRARIES}}) - endif() - - # Test this combination of libraries with the Fortran/f2c interface. - # We test the Fortran interface first as it is well standardized. - if(_libraries_found AND NOT _libraries_work) - set(${DEFINITIONS} "-D${_prefix}_USE_F2C") - set(${LIBRARIES} ${_libraries_found}) - # Some C++ linkers require the f2c library to link with Fortran libraries. - # I do not know which ones, thus I just add the f2c library if it is available. - find_package( F2C QUIET ) - if ( F2C_FOUND ) - set(${DEFINITIONS} ${${DEFINITIONS}} ${F2C_DEFINITIONS}) - set(${LIBRARIES} ${${LIBRARIES}} ${F2C_LIBRARIES}) + + if(_libraries_work) + # Test this combination of libraries. + if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND BLA_STATIC) + list(INSERT ${LIBRARIES} 0 "-Wl,--start-group") + list(APPEND ${LIBRARIES} "-Wl,--end-group") + endif() + set(CMAKE_REQUIRED_LIBRARIES "${_flags};${${LIBRARIES}};${_thread}") + set(CMAKE_REQUIRED_FLAGS "${BLAS_COMPILER_FLAGS}") + if (BLAS_VERBOSE) + message("${Cyan}BLAS libs found for BLA_VENDOR ${BLA_VENDOR}." + "Try to compile symbol ${_name} with following libraries:" + "${CMAKE_REQUIRED_LIBRARIES}") + endif () + if(NOT BLAS_FOUND) + unset(${_prefix}${_combined_name}_WORKS CACHE) + endif() + if (_CHECK_FORTRAN) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + string(REPLACE "mkl_intel_lp64" "mkl_gf_lp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + string(REPLACE "mkl_intel_ilp64" "mkl_gf_ilp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + endif() + check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS) + else() + check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) endif() - set(CMAKE_REQUIRED_DEFINITIONS ${${DEFINITIONS}}) - set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) - #message("DEBUG: CMAKE_REQUIRED_DEFINITIONS = ${CMAKE_REQUIRED_DEFINITIONS}") - #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") - # Check if function exists with f2c calling convention (ie a trailing underscore) - check_function_exists(${_name}_ ${_prefix}_${_name}_${_combined_name}_f2c_WORKS) - set(CMAKE_REQUIRED_DEFINITIONS} "") - set(CMAKE_REQUIRED_LIBRARIES "") - mark_as_advanced(${_prefix}_${_name}_${_combined_name}_f2c_WORKS) - set(_libraries_work ${${_prefix}_${_name}_${_combined_name}_f2c_WORKS}) - endif(_libraries_found AND NOT _libraries_work) - - # If not found, test this combination of libraries with a C interface. - # A few implementations (ie ACML) provide a C interface. Unfortunately, there is no standard. - if(_libraries_found AND NOT _libraries_work) - set(${DEFINITIONS} "") - set(${LIBRARIES} ${_libraries_found}) - set(CMAKE_REQUIRED_DEFINITIONS "") - set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}}) - #message("DEBUG: CMAKE_REQUIRED_LIBRARIES = ${CMAKE_REQUIRED_LIBRARIES}") - check_function_exists(${_name} ${_prefix}_${_name}${_combined_name}_WORKS) - set(CMAKE_REQUIRED_LIBRARIES "") - mark_as_advanced(${_prefix}_${_name}${_combined_name}_WORKS) - set(_libraries_work ${${_prefix}_${_name}${_combined_name}_WORKS}) - endif(_libraries_found AND NOT _libraries_work) - - # on failure - if(NOT _libraries_work) - set(${DEFINITIONS} "") - set(${LIBRARIES} FALSE) - endif() - #message("DEBUG: ${DEFINITIONS} = ${${DEFINITIONS}}") - #message("DEBUG: ${LIBRARIES} = ${${LIBRARIES}}") -endmacro(check_fortran_libraries) + mark_as_advanced(${_prefix}${_combined_name}_WORKS) + set(_libraries_work ${${_prefix}${_combined_name}_WORKS}) + # Print status if not found + # ------------------------- + if (NOT _libraries_work AND NOT BLAS_FIND_QUIETLY AND BLAS_VERBOSE) + Print_Find_Library_Blas_CheckFunc_Status(${_name} ${CMAKE_REQUIRED_LIBRARIES}) + endif () + set(CMAKE_REQUIRED_LIBRARIES) + endif() + if(_libraries_work) + set(${LIBRARIES} ${${LIBRARIES}} ${_thread}) + else(_libraries_work) + set(${LIBRARIES} FALSE) + endif(_libraries_work) -# -# main -# +endmacro(Check_Fortran_Libraries) -# Is it already configured? -if (BLAS_LIBRARIES_DIR OR BLAS_LIBRARIES) - set(BLAS_FOUND TRUE) +set(BLAS_LINKER_FLAGS) +set(BLAS_LIBRARIES) +set(BLAS95_LIBRARIES) +if ($ENV{BLA_VENDOR} MATCHES ".+") + set(BLA_VENDOR $ENV{BLA_VENDOR}) +else () + if(NOT BLA_VENDOR) + set(BLA_VENDOR "All") + endif() +endif () -else() +#BLAS in intel mkl 10 library? (em64t 64bit) +if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES OR BLA_VENDOR MATCHES "Intel*") + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_MKLROOT "$ENV{MKLROOT}") + set(ENV_BLAS_DIR "$ENV{BLAS_DIR}") + set(ENV_BLAS_INCDIR "$ENV{BLAS_INCDIR}") + if(ENV_BLAS_INCDIR) + list(APPEND _inc_env "${ENV_BLAS_INCDIR}") + elseif(ENV_BLAS_DIR) + list(APPEND _inc_env "${ENV_BLAS_DIR}") + list(APPEND _inc_env "${ENV_BLAS_DIR}/include") + else() + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() + # system variables + if(WIN32) + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) - # reset variables - set( BLAS_INCLUDE_DIR "" ) - set( BLAS_DEFINITIONS "" ) - set( BLAS_LINKER_FLAGS "" ) - set( BLAS_LIBRARIES "" ) - set( BLAS_LIBRARIES_DIR "" ) + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_inc_env}") - # - # If Unix, search for BLAS function in possible libraries - # + # Try to find the fftw header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(BLAS_INCDIR) + set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_DIRS + NAMES mkl.h + HINTS ${BLAS_INCDIR}) + else() + if(BLAS_DIR) + set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_DIRS + NAMES mkl.h + HINTS ${BLAS_DIR} + PATH_SUFFIXES "include") + else() + set(BLAS_mkl.h_DIRS "BLAS_mkl.h_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_DIRS + NAMES mkl.h + HINTS ${PATH_TO_LOOK_FOR}) + endif() + endif() + mark_as_advanced(BLAS_mkl.h_DIRS) - # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + # If found, add path to cmake variable + # ------------------------------------ + if (BLAS_mkl.h_DIRS) + set(BLAS_INCLUDE_DIRS "${BLAS_mkl.h_DIRS}") + else () + set(BLAS_INCLUDE_DIRS "BLAS_INCLUDE_DIRS-NOTFOUND") + if(NOT BLAS_FIND_QUIETLY) + message(STATUS "Looking for BLAS -- mkl.h not found") + endif() + endif() + + if (WIN32) + string(REPLACE ":" ";" _libdir "$ENV{LIB}") + elseif (APPLE) + string(REPLACE ":" ";" _libdir "$ENV{DYLD_LIBRARY_PATH}") + else () + string(REPLACE ":" ";" _libdir "$ENV{LD_LIBRARY_PATH}") + endif () + list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + # libiomp5 + # -------- + set(OMP_iomp5_LIBRARY "OMP_iomp5_LIBRARY-NOTFOUND") + find_library(OMP_iomp5_LIBRARY + NAMES iomp5 + HINTS ${_libdir} + ) + mark_as_advanced(OMP_iomp5_LIBRARY) + set(OMP_LIB "") + # libgomp + # ------- + set(OMP_gomp_LIBRARY "OMP_gomp_LIBRARY-NOTFOUND") + find_library(OMP_gomp_LIBRARY + NAMES gomp + HINTS ${_libdir} + ) + mark_as_advanced(OMP_gomp_LIBRARY) + # choose one or another depending on the compilo + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + if (OMP_gomp_LIBRARY) + set(OMP_LIB "${OMP_gomp_LIBRARY}") + endif() + else(CMAKE_C_COMPILER_ID STREQUAL "Intel") + if (OMP_iomp5_LIBRARY) + set(OMP_LIB "${OMP_iomp5_LIBRARY}") + endif() + endif() + + if (UNIX AND NOT WIN32) + # m + find_library(M_LIBRARY + NAMES m + HINTS ${_libdir}) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + set(LM "-lm") + else() + set(LM "") + endif() + # Fortran + set(LGFORTRAN "") + if (CMAKE_C_COMPILER_ID MATCHES "GNU") + find_library( + FORTRAN_gfortran_LIBRARY + NAMES gfortran + HINTS ${_libdir} + ) + mark_as_advanced(FORTRAN_gfortran_LIBRARY) + if (FORTRAN_gfortran_LIBRARY) + set(LGFORTRAN "${FORTRAN_gfortran_LIBRARY}") + endif() + elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") + find_library( + FORTRAN_ifcore_LIBRARY + NAMES ifcore + HINTS ${_libdir} + ) + mark_as_advanced(FORTRAN_ifcore_LIBRARY) + if (FORTRAN_ifcore_LIBRARY) + set(LGFORTRAN "{FORTRAN_ifcore_LIBRARY}") + endif() + endif() + set(BLAS_COMPILER_FLAGS "") + if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_COMPILER_FLAGS "-openmp") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_COMPILER_FLAGS "-fopenmp") + endif() + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + if (BLA_VENDOR STREQUAL "Intel10_32") + list(APPEND BLAS_COMPILER_FLAGS "-m32") + else() + list(APPEND BLAS_COMPILER_FLAGS "-m64") + endif() + if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") + list(APPEND OMP_LIB "-ldl") + endif() + if (ENV_MKLROOT) + list(APPEND BLAS_COMPILER_FLAGS "-I${ENV_MKLROOT}/include") + endif() + endif() + + set(additional_flags "") + if (CMAKE_C_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(additional_flags "-Wl,--no-as-needed") + endif() + endif () + + if (_LANGUAGES_ MATCHES C OR _LANGUAGES_ MATCHES CXX) + if(BLAS_FIND_QUIETLY OR NOT BLAS_FIND_REQUIRED) + find_package(Threads) + else() + find_package(Threads REQUIRED) + endif() + + set(BLAS_SEARCH_LIBS "") + + if(BLA_F95) + + set(BLAS_mkl_SEARCH_SYMBOL SGEMM) + set(_LIBRARIES BLAS95_LIBRARIES) + if (WIN32) + if (BLA_STATIC) + set(BLAS_mkl_DLL_SUFFIX "") + else() + set(BLAS_mkl_DLL_SUFFIX "_dll") + endif() + + # Find the main file (32-bit or 64-bit) + set(BLAS_SEARCH_LIBS_WIN_MAIN "") + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_blas95${BLAS_mkl_DLL_SUFFIX} mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") + endif() + if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_blas95_lp64${BLAS_mkl_DLL_SUFFIX} mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}") + endif () + + # Add threading/sequential libs + set(BLAS_SEARCH_LIBS_WIN_THREAD "") + if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") + endif() + if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Cartesian product of the above + foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) + foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) + list(APPEND BLAS_SEARCH_LIBS + "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") + endforeach() + endforeach() + else (WIN32) + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95 mkl_intel mkl_intel_thread mkl_core guide") + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95 mkl_intel_lp64 mkl_intel_thread mkl_core guide") + # mkl >= 10.3 + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95_lp64 mkl_intel_lp64 mkl_intel_thread mkl_core") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95_lp64 mkl_intel_lp64 mkl_gnu_thread mkl_core") + endif() + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_sequential mkl_core") + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq") + set(OMP_LIB "") + endif() + endif () + endif (WIN32) + + else (BLA_F95) + + set(BLAS_mkl_SEARCH_SYMBOL sgemm) + set(_LIBRARIES BLAS_LIBRARIES) + if (WIN32) + if (BLA_STATIC) + set(BLAS_mkl_DLL_SUFFIX "") + else() + set(BLAS_mkl_DLL_SUFFIX "_dll") + endif() + + # Find the main file (32-bit or 64-bit) + set(BLAS_SEARCH_LIBS_WIN_MAIN "") + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_intel_c${BLAS_mkl_DLL_SUFFIX}") + endif() + if (BLA_VENDOR STREQUAL "Intel10_64lp*" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_MAIN + "mkl_intel_lp64${BLAS_mkl_DLL_SUFFIX}") + endif () + + # Add threading/sequential libs + set(BLAS_SEARCH_LIBS_WIN_THREAD "") + if (NOT BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libguide40 mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + # mkl >= 10.3 + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "libiomp5md mkl_intel_thread${BLAS_mkl_DLL_SUFFIX}") + endif() + if (BLA_VENDOR STREQUAL "*_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS_WIN_THREAD + "mkl_sequential${BLAS_mkl_DLL_SUFFIX}") + endif() + + # Cartesian product of the above + foreach (MAIN ${BLAS_SEARCH_LIBS_WIN_MAIN}) + foreach (THREAD ${BLAS_SEARCH_LIBS_WIN_THREAD}) + list(APPEND BLAS_SEARCH_LIBS + "${MAIN} ${THREAD} mkl_core${BLAS_mkl_DLL_SUFFIX}") + endforeach() + endforeach() + else (WIN32) + if (BLA_VENDOR STREQUAL "Intel10_32" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel mkl_intel_thread mkl_core guide") + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp" OR BLA_VENDOR STREQUAL "All") + # old version + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_intel_thread mkl_core guide") + # mkl >= 10.3 + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_intel_thread mkl_core") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_gnu_thread mkl_core") + endif() + endif () + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_sequential mkl_core") + if (BLA_VENDOR STREQUAL "Intel10_64lp_seq") + set(OMP_LIB "") + endif() + endif () + #older vesions of intel mkl libs + if (BLA_VENDOR STREQUAL "Intel" OR BLA_VENDOR STREQUAL "All") + list(APPEND BLAS_SEARCH_LIBS + "mkl") + list(APPEND BLAS_SEARCH_LIBS + "mkl_ia32") + list(APPEND BLAS_SEARCH_LIBS + "mkl_em64t") + endif () + endif (WIN32) + + endif (BLA_F95) + + foreach (IT ${BLAS_SEARCH_LIBS}) + string(REPLACE " " ";" SEARCH_LIBS ${IT}) + if (${_LIBRARIES}) + else () + check_fortran_libraries( + ${_LIBRARIES} + BLAS + ${BLAS_mkl_SEARCH_SYMBOL} + "${additional_flags}" + "${SEARCH_LIBS}" + "${OMP_LIB};${CMAKE_THREAD_LIBS_INIT};${LM}" + ) + if(_LIBRARIES) + set(BLAS_LINKER_FLAGS "${additional_flags}") + endif() + endif() + endforeach () + if(NOT BLAS_FIND_QUIETLY) + if(${_LIBRARIES}) + message(STATUS "Looking for MKL BLAS: found") + else() + message(STATUS "Looking for MKL BLAS: not found") + endif() + endif() + if (${_LIBRARIES} AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Intel MKL") + endif() + endif (_LANGUAGES_ MATCHES C OR _LANGUAGES_ MATCHES CXX) + endif(NOT BLAS_LIBRARIES OR BLA_VENDOR MATCHES "Intel*") +endif (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") + + +if (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # gotoblas (http://www.tacc.utexas.edu/tacc-projects/gotoblas2) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "cblas;f77blas;atlas" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "goto2" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Goto BLAS: found") + else() + message(STATUS "Looking for Goto BLAS: not found") + endif() endif() + endif() + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Goto") + endif() - # BLAS in PhiPACK libraries? (requires generic BLAS lib, too) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS +endif (BLA_VENDOR STREQUAL "Goto" OR BLA_VENDOR STREQUAL "All") + + +# OpenBlas +if (BLA_VENDOR STREQUAL "Open" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # openblas (http://www.openblas.net/) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "sgemm;dgemm;blas" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "openblas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Open BLAS: found") + else() + message(STATUS "Looking for Open BLAS: not found") + endif() endif() + endif() + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Openblas") + endif() - # BLAS in Alpha CXML library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS +endif (BLA_VENDOR STREQUAL "Open" OR BLA_VENDOR STREQUAL "All") + + +# EigenBlas +if (BLA_VENDOR STREQUAL "Eigen" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "cxml" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "eigen_blas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + message(STATUS "Looking for Eigen BLAS: found") + else() + message(STATUS "Looking for Eigen BLAS: not found") + endif() endif() + endif() - # BLAS in Alpha DXML library? (now called CXML, see above) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if(NOT BLAS_LIBRARIES) + # eigenblas (http://eigen.tuxfamily.org/index.php?title=Main_Page) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "dxml" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "eigen_blas_static" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Eigen BLAS: found") + else() + message(STATUS "Looking for Eigen BLAS: not found") + endif() endif() + endif() + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Eigen") + endif() - # BLAS in Sun Performance library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS +endif (BLA_VENDOR STREQUAL "Eigen" OR BLA_VENDOR STREQUAL "All") + + +if (BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + # BLAS in ATLAS library? (http://math-atlas.sourceforge.net/) + check_fortran_libraries( BLAS_LIBRARIES BLAS - sgemm - "-xlic_lib=sunperf" - "sunperf;sunmath" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + dgemm + "" + "f77blas;atlas" + "" ) + if(NOT BLAS_FIND_QUIETLY) if(BLAS_LIBRARIES) - # Extra linker flag - set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf") + message(STATUS "Looking for Atlas BLAS: found") + else() + message(STATUS "Looking for Atlas BLAS: not found") endif() endif() + endif() - # BLAS in SCSL library? (SGI/Cray Scientific Library) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Atlas") + endif() + +endif (BLA_VENDOR STREQUAL "ATLAS" OR BLA_VENDOR STREQUAL "All") + + +# BLAS in PhiPACK libraries? (requires generic BLAS lib, too) +if (BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "scsl" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "sgemm;dgemm;blas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for PhiPACK BLAS: found") + else() + message(STATUS "Looking for PhiPACK BLAS: not found") + endif() endif() + endif() - # BLAS in SGIMATH library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "PhiPACK") + endif() + +endif (BLA_VENDOR STREQUAL "PhiPACK" OR BLA_VENDOR STREQUAL "All") + + +# BLAS in Alpha CXML library? +if (BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "complib.sgimath" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "cxml" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for CXML BLAS: found") + else() + message(STATUS "Looking for CXML BLAS: not found") + endif() endif() + endif() - # BLAS in IBM ESSL library? (requires generic BLAS lib, too) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "CXML") + endif() + +endif (BLA_VENDOR STREQUAL "CXML" OR BLA_VENDOR STREQUAL "All") + + +# BLAS in Alpha DXML library? (now called CXML, see above) +if (BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "essl;blas" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "dxml" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for DXML BLAS: found") + else() + message(STATUS "Looking for DXML BLAS: not found") + endif() endif() + endif() - #BLAS in intel mkl 10 library? (em64t 64bit) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "DXML") + endif() + +endif (BLA_VENDOR STREQUAL "DXML" OR BLA_VENDOR STREQUAL "All") + + +# BLAS in Sun Performance library? +if (BLA_VENDOR STREQUAL "SunPerf" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm + "-xlic_lib=sunperf" + "sunperf;sunmath" "" - "mkl_intel_lp64;mkl_intel_thread;mkl_core;guide;pthread" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" ) + if(BLAS_LIBRARIES) + set(BLAS_LINKER_FLAGS "-xlic_lib=sunperf") endif() + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for SunPerf BLAS: found") + else() + message(STATUS "Looking for SunPerf BLAS: not found") + endif() + endif() + endif() - ### windows version of intel mkl 10? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "SunPerf") + endif() + +endif () + + +# BLAS in SCSL library? (SGI/Cray Scientific Library) +if (BLA_VENDOR STREQUAL "SCSL" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS - SGEMM + sgemm + "" + "scsl" "" - "mkl_c_dll;mkl_intel_thread_dll;mkl_core_dll;libguide40" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for SCSL BLAS: found") + else() + message(STATUS "Looking for SCSL BLAS: not found") + endif() endif() + endif() - #older versions of intel mkl libs + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "SunPerf") + endif() - # BLAS in intel mkl library? (shared) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS +endif () + + +# BLAS in SGIMATH library? +if (BLA_VENDOR STREQUAL "SGIMATH" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "mkl;guide;pthread" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "complib.sgimath" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for SGIMATH BLAS: found") + else() + message(STATUS "Looking for SGIMATH BLAS: not found") + endif() endif() + endif() - #BLAS in intel mkl library? (static, 32bit) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "SGIMATH") + endif() + +endif () + + +# BLAS in IBM ESSL library (requires generic BLAS lib, too) +if (BLA_VENDOR STREQUAL "IBMESSL" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "mkl_ia32;guide;pthread" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "essl;xlfmath;xlf90_r;blas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for IBM ESSL BLAS: found") + else() + message(STATUS "Looking for IBM ESSL BLAS: not found") + endif() endif() + endif() - #BLAS in intel mkl library? (static, em64t 64bit) - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "IBM ESSL") + endif() + +endif () + +# BLAS in IBM ESSL_MT library (requires generic BLAS lib, too) +if (BLA_VENDOR STREQUAL "IBMESSLMT" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "mkl_em64t;guide;pthread" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "esslsmp;xlsmp;xlfmath;xlf90_r;blas" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for IBM ESSL MT BLAS: found") + else() + message(STATUS "Looking for IBM ESSL MT BLAS: not found") + endif() endif() + endif() - #BLAS in acml library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "IBM ESSL MT") + endif() + +endif () + + +#BLAS in acml library? +if (BLA_VENDOR MATCHES "ACML.*" OR BLA_VENDOR STREQUAL "All") + + if( ((BLA_VENDOR STREQUAL "ACML") AND (NOT BLAS_ACML_LIB_DIRS)) OR + ((BLA_VENDOR STREQUAL "ACML_MP") AND (NOT BLAS_ACML_MP_LIB_DIRS)) OR + ((BLA_VENDOR STREQUAL "ACML_GPU") AND (NOT BLAS_ACML_GPU_LIB_DIRS))) + + # try to find acml in "standard" paths + if( WIN32 ) + file( GLOB _ACML_ROOT "C:/AMD/acml*/ACML-EULA.txt" ) + else() + file( GLOB _ACML_ROOT "/opt/acml*/ACML-EULA.txt" ) + endif() + if( WIN32 ) + file( GLOB _ACML_GPU_ROOT "C:/AMD/acml*/GPGPUexamples" ) + else() + file( GLOB _ACML_GPU_ROOT "/opt/acml*/GPGPUexamples" ) + endif() + list(GET _ACML_ROOT 0 _ACML_ROOT) + list(GET _ACML_GPU_ROOT 0 _ACML_GPU_ROOT) + + if( _ACML_ROOT ) + + get_filename_component( _ACML_ROOT ${_ACML_ROOT} PATH ) + if( SIZEOF_INTEGER EQUAL 8 ) + set( _ACML_PATH_SUFFIX "_int64" ) + else() + set( _ACML_PATH_SUFFIX "" ) + endif() + if( CMAKE_Fortran_COMPILER_ID STREQUAL "Intel" ) + set( _ACML_COMPILER32 "ifort32" ) + set( _ACML_COMPILER64 "ifort64" ) + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "SunPro" ) + set( _ACML_COMPILER32 "sun32" ) + set( _ACML_COMPILER64 "sun64" ) + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "PGI" ) + set( _ACML_COMPILER32 "pgi32" ) + if( WIN32 ) + set( _ACML_COMPILER64 "win64" ) + else() + set( _ACML_COMPILER64 "pgi64" ) + endif() + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "Open64" ) + # 32 bit builds not supported on Open64 but for code simplicity + # We'll just use the same directory twice + set( _ACML_COMPILER32 "open64_64" ) + set( _ACML_COMPILER64 "open64_64" ) + elseif( CMAKE_Fortran_COMPILER_ID STREQUAL "NAG" ) + set( _ACML_COMPILER32 "nag32" ) + set( _ACML_COMPILER64 "nag64" ) + else() + set( _ACML_COMPILER32 "gfortran32" ) + set( _ACML_COMPILER64 "gfortran64" ) + endif() + + if( BLA_VENDOR STREQUAL "ACML_MP" ) + set(_ACML_MP_LIB_DIRS + "${_ACML_ROOT}/${_ACML_COMPILER32}_mp${_ACML_PATH_SUFFIX}/lib" + "${_ACML_ROOT}/${_ACML_COMPILER64}_mp${_ACML_PATH_SUFFIX}/lib" ) + else() + set(_ACML_LIB_DIRS + "${_ACML_ROOT}/${_ACML_COMPILER32}${_ACML_PATH_SUFFIX}/lib" + "${_ACML_ROOT}/${_ACML_COMPILER64}${_ACML_PATH_SUFFIX}/lib" ) + endif() + + endif(_ACML_ROOT) + + elseif(BLAS_${BLA_VENDOR}_LIB_DIRS) + + set(_${BLA_VENDOR}_LIB_DIRS ${BLAS_${BLA_VENDOR}_LIB_DIRS}) + + endif() + + if( BLA_VENDOR STREQUAL "ACML_MP" ) + foreach( BLAS_ACML_MP_LIB_DIRS ${_ACML_MP_LIB_DIRS}) + check_fortran_libraries ( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml_mp;acml_mv" "" ${BLAS_ACML_MP_LIB_DIRS} + ) + if( BLAS_LIBRARIES ) + break() + endif() + endforeach() + elseif( BLA_VENDOR STREQUAL "ACML_GPU" ) + foreach( BLAS_ACML_GPU_LIB_DIRS ${_ACML_GPU_LIB_DIRS}) + check_fortran_libraries ( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml;acml_mv;CALBLAS" "" ${BLAS_ACML_GPU_LIB_DIRS} + ) + if( BLAS_LIBRARIES ) + break() + endif() + endforeach() + else() + foreach( BLAS_ACML_LIB_DIRS ${_ACML_LIB_DIRS} ) + check_fortran_libraries ( + BLAS_LIBRARIES + BLAS + sgemm + "" "acml;acml_mv" "" ${BLAS_ACML_LIB_DIRS} + ) + if( BLAS_LIBRARIES ) + break() + endif() + endforeach() + endif() + + # Either acml or acml_mp should be in LD_LIBRARY_PATH but not both + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "acml" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "acml;acml_mv" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for ACML BLAS: found") + else() + message(STATUS "Looking for ACML BLAS: not found") + endif() endif() + endif() - # Apple BLAS library? - if(NOT BLAS_LIBRARIES) - check_fortran_libraries( - BLAS_DEFINITIONS + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "Accelerate" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "acml_mp;acml_mv" + "" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for ACML BLAS: found") + else() + message(STATUS "Looking for ACML BLAS: not found") + endif() endif() + endif() - if ( NOT BLAS_LIBRARIES ) - check_fortran_libraries( - BLAS_DEFINITIONS + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS sgemm "" - "vecLib" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" + "acml;acml_mv;CALBLAS" + "" ) - endif ( NOT BLAS_LIBRARIES ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for ACML BLAS: found") + else() + message(STATUS "Looking for ACML BLAS: not found") + endif() + endif() + endif() - # Generic BLAS library? - # This configuration *must* be the last try as this library is notably slow. - if ( NOT BLAS_LIBRARIES ) - check_fortran_libraries( - BLAS_DEFINITIONS + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "ACML") + endif() + +endif (BLA_VENDOR MATCHES "ACML.*" OR BLA_VENDOR STREQUAL "All") # ACML + + +# Apple BLAS library? +if (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All") + + if(NOT BLAS_LIBRARIES) + check_fortran_libraries( BLAS_LIBRARIES BLAS - sgemm + dgemm + "" + "Accelerate" "" - "blas" - "${CGAL_TAUCS_LIBRARIES_DIR} ENV BLAS_LIB_DIR" ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Apple BLAS: found") + else() + message(STATUS "Looking for Apple BLAS: not found") + endif() endif() + endif() + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Apple Accelerate") + endif() + +endif (BLA_VENDOR STREQUAL "Apple" OR BLA_VENDOR STREQUAL "All") - if(BLAS_LIBRARIES_DIR OR BLAS_LIBRARIES) + +if (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") + + if ( NOT BLAS_LIBRARIES ) + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + dgemm + "" + "vecLib" + "" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for NAS BLAS: found") + else() + message(STATUS "Looking for NAS BLAS: not found") + endif() + endif() + endif () + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "NAS") + endif() + +endif (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") + + +# Generic BLAS library? +if (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All") + + set(BLAS_SEARCH_LIBS "blas;blas_LINUX;blas_MAC;blas_WINDOWS;refblas") + foreach (SEARCH_LIB ${BLAS_SEARCH_LIBS}) + if (BLAS_LIBRARIES) + else () + check_fortran_libraries( + BLAS_LIBRARIES + BLAS + sgemm + "" + "${SEARCH_LIB}" + "${LGFORTRAN}" + ) + if(NOT BLAS_FIND_QUIETLY) + if(BLAS_LIBRARIES) + message(STATUS "Looking for Generic BLAS: found") + else() + message(STATUS "Looking for Generic BLAS: not found") + endif() + endif() + endif() + endforeach () + + if (BLAS_LIBRARIES AND NOT BLAS_VENDOR_FOUND) + set (BLAS_VENDOR_FOUND "Netlib or other Generic libblas") + endif() + +endif (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All") + + +if(BLA_F95) + + if(BLAS95_LIBRARIES) + set(BLAS95_FOUND TRUE) + else() + set(BLAS95_FOUND FALSE) + endif() + + if(NOT BLAS_FIND_QUIETLY) + if(BLAS95_FOUND) + message(STATUS "A library with BLAS95 API found.") + message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}") + else(BLAS95_FOUND) + message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas 95 libraries could not be found or check of symbols failed." + "\nPlease indicate where to find blas libraries. You have three options:\n" + "- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n" + "- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure." + "\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name." + "\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit)," + "Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") + if(BLAS_FIND_REQUIRED) + message(FATAL_ERROR + "A required library with BLAS95 API not found. Please specify library location.") + else() + message(STATUS + "A library with BLAS95 API not found. Please specify library location.") + endif() + endif(BLAS95_FOUND) + endif(NOT BLAS_FIND_QUIETLY) + + set(BLAS_FOUND TRUE) + set(BLAS_LIBRARIES "${BLAS95_LIBRARIES}") + +else(BLA_F95) + + if(BLAS_LIBRARIES) set(BLAS_FOUND TRUE) else() set(BLAS_FOUND FALSE) @@ -388,32 +1366,41 @@ else() if(NOT BLAS_FIND_QUIETLY) if(BLAS_FOUND) message(STATUS "A library with BLAS API found.") + message(STATUS "BLAS_LIBRARIES ${BLAS_LIBRARIES}") else(BLAS_FOUND) + message(WARNING "BLA_VENDOR has been set to ${BLA_VENDOR} but blas libraries could not be found or check of symbols failed." + "\nPlease indicate where to find blas libraries. You have three options:\n" + "- Option 1: Provide the installation directory of BLAS library with cmake option: -DBLAS_DIR=your/path/to/blas\n" + "- Option 2: Provide the directory where to find BLAS libraries with cmake option: -DBLAS_LIBDIR=your/path/to/blas/libs\n" + "- Option 3: Update your environment variable (Linux: LD_LIBRARY_PATH, Windows: LIB, Mac: DYLD_LIBRARY_PATH)\n" + "\nTo follow libraries detection more precisely you can activate a verbose mode with -DBLAS_VERBOSE=ON at cmake configure." + "\nYou could also specify a BLAS vendor to look for by setting -DBLA_VENDOR=blas_vendor_name." + "\nList of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, Intel10_32 (intel mkl v10 32 bit)," + "Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "Intel( older versions of mkl 32 and 64 bit), ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") if(BLAS_FIND_REQUIRED) - message(FATAL_ERROR "A required library with BLAS API not found. Please specify library location.") + message(FATAL_ERROR + "A required library with BLAS API not found. Please specify library location.") else() - message(STATUS "A library with BLAS API not found. Please specify library location.") + message(STATUS + "A library with BLAS API not found. Please specify library location.") endif() endif(BLAS_FOUND) endif(NOT BLAS_FIND_QUIETLY) - # Add variables to cache - set( BLAS_INCLUDE_DIR "${BLAS_INCLUDE_DIR}" - CACHE PATH "Directories containing the BLAS header files" FORCE ) - set( BLAS_DEFINITIONS "${BLAS_DEFINITIONS}" - CACHE STRING "Compilation options to use BLAS" FORCE ) - set( BLAS_LINKER_FLAGS "${BLAS_LINKER_FLAGS}" - CACHE STRING "Linker flags to use BLAS" FORCE ) - set( BLAS_LIBRARIES "${BLAS_LIBRARIES}" - CACHE FILEPATH "BLAS libraries name" FORCE ) - set( BLAS_LIBRARIES_DIR "${BLAS_LIBRARIES_DIR}" - CACHE PATH "Directories containing the BLAS libraries" FORCE ) - - #message("DEBUG: BLAS_INCLUDE_DIR = ${BLAS_INCLUDE_DIR}") - #message("DEBUG: BLAS_DEFINITIONS = ${BLAS_DEFINITIONS}") - #message("DEBUG: BLAS_LINKER_FLAGS = ${BLAS_LINKER_FLAGS}") - #message("DEBUG: BLAS_LIBRARIES = ${BLAS_LIBRARIES}") - #message("DEBUG: BLAS_LIBRARIES_DIR = ${BLAS_LIBRARIES_DIR}") - #message("DEBUG: BLAS_FOUND = ${BLAS_FOUND}") - -endif(BLAS_LIBRARIES_DIR OR BLAS_LIBRARIES) +endif(BLA_F95) + +set(CMAKE_FIND_LIBRARY_SUFFIXES ${_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) + +if (BLAS_FOUND) + list(GET BLAS_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)") + string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}") + set(BLAS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of BLAS library" FORCE) + else() + set(BLAS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of BLAS library" FORCE) + endif() +endif() +mark_as_advanced(BLAS_DIR) +mark_as_advanced(BLAS_DIR_FOUND) diff --git a/external/eigen3/cmake/FindBLASEXT.cmake b/external/eigen3/cmake/FindBLASEXT.cmake new file mode 100644 index 0000000..0fe7fb8 --- /dev/null +++ b/external/eigen3/cmake/FindBLASEXT.cmake @@ -0,0 +1,380 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find BLAS EXTENDED for MORSE projects: find include dirs and libraries +# +# This module allows to find BLAS libraries by calling the official FindBLAS module +# and handles the creation of different library lists whether the user wishes to link +# with a sequential BLAS or a multihreaded (BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES). +# BLAS is detected with a FindBLAS call then if the BLAS vendor is Intel10_64lp, ACML +# or IBMESSLMT then the module attempts to find the corresponding multithreaded libraries. +# +# The following variables have been added to manage links with sequential or multithreaded +# versions: +# BLAS_INCLUDE_DIRS - BLAS include directories +# BLAS_LIBRARY_DIRS - Link directories for BLAS libraries +# BLAS_SEQ_LIBRARIES - BLAS component libraries to be linked (sequential) +# BLAS_PAR_LIBRARIES - BLAS component libraries to be linked (multithreaded) + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +# macro to factorize this call +macro(find_package_blas) + if(BLASEXT_FIND_REQUIRED) + if(BLASEXT_FIND_QUIETLY) + find_package(BLAS REQUIRED QUIET) + else() + find_package(BLAS REQUIRED) + endif() + else() + if(BLASEXT_FIND_QUIETLY) + find_package(BLAS QUIET) + else() + find_package(BLAS) + endif() + endif() +endmacro() + +# add a cache variable to let the user specify the BLAS vendor +set(BLA_VENDOR "" CACHE STRING "list of possible BLAS vendor: + Open, Eigen, Goto, ATLAS PhiPACK, CXML, DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, + Intel10_32 (intel mkl v10 32 bit), + Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model), + Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model), + Intel( older versions of mkl 32 and 64 bit), + ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") + +if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "In FindBLASEXT") + message(STATUS "If you want to force the use of one specific library, " + "\n please specify the BLAS vendor by setting -DBLA_VENDOR=blas_vendor_name" + "\n at cmake configure.") + message(STATUS "List of possible BLAS vendor: Goto, ATLAS PhiPACK, CXML, " + "\n DXML, SunPerf, SCSL, SGIMATH, IBMESSL, IBMESSLMT, Intel10_32 (intel mkl v10 32 bit)," + "\n Intel10_64lp (intel mkl v10 64 bit, lp thread model, lp64 model)," + "\n Intel10_64lp_seq (intel mkl v10 64 bit, sequential code, lp64 model)," + "\n Intel( older versions of mkl 32 and 64 bit)," + "\n ACML, ACML_MP, ACML_GPU, Apple, NAS, Generic") +endif() + +if (NOT BLAS_FOUND) + # First try to detect two cases: + # 1: only SEQ libs are handled + # 2: both SEQ and PAR libs are handled + find_package_blas() +endif () + +# detect the cases where SEQ and PAR libs are handled +if(BLA_VENDOR STREQUAL "All" AND + (BLAS_mkl_core_LIBRARY OR BLAS_mkl_core_dll_LIBRARY) + ) + set(BLA_VENDOR "Intel") + if(BLAS_mkl_intel_LIBRARY) + set(BLA_VENDOR "Intel10_32") + endif() + if(BLAS_mkl_intel_lp64_LIBRARY) + set(BLA_VENDOR "Intel10_64lp") + endif() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the MKL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_acml_LIBRARY) + set(BLA_VENDOR "ACML") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ACML." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +elseif(BLA_VENDOR STREQUAL "All" AND BLAS_essl_LIBRARY) + set(BLA_VENDOR "IBMESSL") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "A BLAS library has been found (${BLAS_LIBRARIES}) but we" + "\n have also potentially detected some multithreaded BLAS libraries from the ESSL." + "\n We try to find both libraries lists (Sequential/Multithreaded).") + endif() + set(BLAS_FOUND "") +endif() + +# Intel case +if(BLA_VENDOR MATCHES "Intel*") + + ### + # look for include path if the BLAS vendor is Intel + ### + + # gather system include paths + unset(_inc_env) + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + set(ENV_MKLROOT "$ENV{MKLROOT}") + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() + list(REMOVE_DUPLICATES _inc_env) + + # find mkl.h inside known include paths + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + if(BLAS_INCDIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_INCDIR}) + else() + if(BLAS_DIR) + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${BLAS_DIR} + PATH_SUFFIXES include) + else() + set(BLAS_mkl.h_INCLUDE_DIRS "BLAS_mkl.h_INCLUDE_DIRS-NOTFOUND") + find_path(BLAS_mkl.h_INCLUDE_DIRS + NAMES mkl.h + HINTS ${_inc_env}) + endif() + endif() + mark_as_advanced(BLAS_mkl.h_INCLUDE_DIRS) + ## Print status if not found + ## ------------------------- + #if (NOT BLAS_mkl.h_INCLUDE_DIRS AND MORSE_VERBOSE) + # Print_Find_Header_Status(blas mkl.h) + #endif () + set(BLAS_INCLUDE_DIRS "") + if(BLAS_mkl.h_INCLUDE_DIRS) + list(APPEND BLAS_INCLUDE_DIRS "${BLAS_mkl.h_INCLUDE_DIRS}" ) + endif() + + ### + # look for libs + ### + # if Intel 10 64 bit -> look for sequential and multithreaded versions + if(BLA_VENDOR MATCHES "Intel10_64lp*") + + ## look for the sequential version + set(BLA_VENDOR "Intel10_64lp_seq") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the sequential version Intel10_64lp_seq") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "Intel10_64lp") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "Look for the multithreaded version Intel10_64lp") + endif() + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + else() + + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + endif() + + # ACML case +elseif(BLA_VENDOR MATCHES "ACML*") + + ## look for the sequential version + set(BLA_VENDOR "ACML") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "ACML_MP") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + + # IBMESSL case +elseif(BLA_VENDOR MATCHES "IBMESSL*") + + ## look for the sequential version + set(BLA_VENDOR "IBMESSL") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + + ## look for the multithreaded version + set(BLA_VENDOR "IBMESSLMT") + find_package_blas() + if(BLAS_FOUND) + set(BLAS_PAR_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + endif() + +else() + + if(BLAS_FOUND) + # define the SEQ libs as the BLAS_LIBRARIES + set(BLAS_SEQ_LIBRARIES "${BLAS_LIBRARIES}") + else() + set(BLAS_SEQ_LIBRARIES "${BLAS_SEQ_LIBRARIES-NOTFOUND}") + endif() + set(BLAS_PAR_LIBRARIES "${BLAS_PAR_LIBRARIES-NOTFOUND}") + +endif() + + +if(BLAS_SEQ_LIBRARIES) + set(BLAS_LIBRARIES "${BLAS_SEQ_LIBRARIES}") +endif() + +# extract libs paths +# remark: because it is not given by find_package(BLAS) +set(BLAS_LIBRARY_DIRS "") +string(REPLACE " " ";" BLAS_LIBRARIES "${BLAS_LIBRARIES}") +foreach(blas_lib ${BLAS_LIBRARIES}) + if (EXISTS "${blas_lib}") + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + else() + string(REPLACE "-L" "" blas_lib "${blas_lib}") + if (EXISTS "${blas_lib}") + list(APPEND BLAS_LIBRARY_DIRS "${blas_lib}" ) + else() + get_filename_component(a_blas_lib_dir "${blas_lib}" PATH) + if (EXISTS "${a_blas_lib_dir}") + list(APPEND BLAS_LIBRARY_DIRS "${a_blas_lib_dir}" ) + endif() + endif() + endif() +endforeach() +if (BLAS_LIBRARY_DIRS) + list(REMOVE_DUPLICATES BLAS_LIBRARY_DIRS) +endif () + +# check that BLAS has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +if(BLA_VENDOR MATCHES "Intel*") + if(BLA_VENDOR MATCHES "Intel10_64lp*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is Intel MKL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() + else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS + BLAS_INCLUDE_DIRS) + endif() +elseif(BLA_VENDOR MATCHES "ACML*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ACML:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +elseif(BLA_VENDOR MATCHES "IBMESSL*") + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS found is ESSL:" + "\n we manage two lists of libs, one sequential and one parallel if found" + "\n (see BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES)") + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) + if(BLAS_PAR_LIBRARIES) + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS parallel libraries stored in BLAS_PAR_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_PAR_LIBRARIES) + endif() +else() + if(NOT BLASEXT_FIND_QUIETLY) + message(STATUS "BLAS sequential libraries stored in BLAS_SEQ_LIBRARIES") + endif() + find_package_handle_standard_args(BLAS DEFAULT_MSG + BLAS_SEQ_LIBRARIES + BLAS_LIBRARY_DIRS) +endif() diff --git a/external/eigen3/cmake/FindComputeCpp.cmake b/external/eigen3/cmake/FindComputeCpp.cmake new file mode 100644 index 0000000..07ebed6 --- /dev/null +++ b/external/eigen3/cmake/FindComputeCpp.cmake @@ -0,0 +1,245 @@ +#.rst: +# FindComputeCpp +#--------------- +# +# Copyright 2016 Codeplay Software Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use these files except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +######################### +# FindComputeCpp.cmake +######################### +# +# Tools for finding and building with ComputeCpp. +# +# User must define COMPUTECPP_PACKAGE_ROOT_DIR pointing to the ComputeCpp +# installation. +# +# Latest version of this file can be found at: +# https://github.com/codeplaysoftware/computecpp-sdk + +# Require CMake version 3.2.2 or higher +cmake_minimum_required(VERSION 3.2.2) + +# Check that a supported host compiler can be found +if(CMAKE_COMPILER_IS_GNUCXX) + # Require at least gcc 4.8 + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) + message(FATAL_ERROR + "host compiler - Not found! (gcc version must be at least 4.8)") + # Require the GCC dual ABI to be disabled for 5.1 or higher + elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1) + set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True") + message(STATUS + "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)") + else() + message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}") + endif() +elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + # Require at least clang 3.6 + if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6) + message(FATAL_ERROR + "host compiler - Not found! (clang version must be at least 3.6)") + else() + message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}") + endif() +else() + message(WARNING + "host compiler - Not found! (ComputeCpp supports GCC and Clang, see readme)") +endif() + +set(COMPUTECPP_64_BIT_DEFAULT ON) +option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode" + ${COMPUTECPP_64_BIT_DEFAULT}) +mark_as_advanced(COMPUTECPP_64_BIT_CODE) + +# Find OpenCL package +find_package(OpenCL REQUIRED) + +# Find ComputeCpp packagee +if(NOT COMPUTECPP_PACKAGE_ROOT_DIR) + message(FATAL_ERROR + "ComputeCpp package - Not found! (please set COMPUTECPP_PACKAGE_ROOT_DIR") +else() + message(STATUS "ComputeCpp package - Found") +endif() +option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package") + +# Obtain the path to compute++ +find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS + ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin) +if (EXISTS ${COMPUTECPP_DEVICE_COMPILER}) + mark_as_advanced(COMPUTECPP_DEVICE_COMPILER) + message(STATUS "compute++ - Found") +else() + message(FATAL_ERROR "compute++ - Not found! (${COMPUTECPP_DEVICE_COMPILER})") +endif() + +# Obtain the path to computecpp_info +find_program(COMPUTECPP_INFO_TOOL computecpp_info PATHS + ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin) +if (EXISTS ${COMPUTECPP_INFO_TOOL}) + mark_as_advanced(${COMPUTECPP_INFO_TOOL}) + message(STATUS "computecpp_info - Found") +else() + message(FATAL_ERROR "computecpp_info - Not found! (${COMPUTECPP_INFO_TOOL})") +endif() + +# Obtain the path to the ComputeCpp runtime library +find_library(COMPUTECPP_RUNTIME_LIBRARY ComputeCpp PATHS ${COMPUTECPP_PACKAGE_ROOT_DIR} + HINTS ${COMPUTECPP_PACKAGE_ROOT_DIR}/lib PATH_SUFFIXES lib + DOC "ComputeCpp Runtime Library" NO_DEFAULT_PATH) + +if (EXISTS ${COMPUTECPP_RUNTIME_LIBRARY}) + mark_as_advanced(COMPUTECPP_RUNTIME_LIBRARY) + message(STATUS "libComputeCpp.so - Found") +else() + message(FATAL_ERROR "libComputeCpp.so - Not found!") +endif() + +# Obtain the ComputeCpp include directory +set(COMPUTECPP_INCLUDE_DIRECTORY ${COMPUTECPP_PACKAGE_ROOT_DIR}/include/) +if (NOT EXISTS ${COMPUTECPP_INCLUDE_DIRECTORY}) + message(FATAL_ERROR "ComputeCpp includes - Not found!") +else() + message(STATUS "ComputeCpp includes - Found") +endif() + +# Obtain the package version +execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-version" + OUTPUT_VARIABLE COMPUTECPP_PACKAGE_VERSION + RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") + message(FATAL_ERROR "Package version - Error obtaining version!") +else() + mark_as_advanced(COMPUTECPP_PACKAGE_VERSION) + message(STATUS "Package version - ${COMPUTECPP_PACKAGE_VERSION}") +endif() + +# Obtain the device compiler flags +execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-device-compiler-flags" + OUTPUT_VARIABLE COMPUTECPP_DEVICE_COMPILER_FLAGS + RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") + message(FATAL_ERROR "compute++ flags - Error obtaining compute++ flags!") +else() + mark_as_advanced(COMPUTECPP_COMPILER_FLAGS) + message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}") +endif() + +set(COMPUTECPP_DEVICE_COMPILER_FLAGS ${COMPUTECPP_DEVICE_COMPILER_FLAGS} -sycl-compress-name -no-serial-memop -DEIGEN_NO_ASSERTION_CHECKING=1) + +# Check if the platform is supported +execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported" + OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED + RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") + message(FATAL_ERROR "platform - Error checking platform support!") +else() + mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED) + if (COMPUTECPP_PLATFORM_IS_SUPPORTED) + message(STATUS "platform - your system can support ComputeCpp") + else() + message(STATUS "platform - your system CANNOT support ComputeCpp") + endif() +endif() + +#################### +# __build_sycl +#################### +# +# Adds a custom target for running compute++ and adding a dependency for the +# resulting integration header. +# +# targetName : Name of the target. +# sourceFile : Source file to be compiled. +# binaryDir : Intermediate directory to output the integration header. +# +function(__build_spir targetName sourceFile binaryDir) + + # Retrieve source file name. + get_filename_component(sourceFileName ${sourceFile} NAME) + + # Set the path to the Sycl file. + set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl) + + # Add any user-defined include to the device compiler + get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY + INCLUDE_DIRECTORIES) + set(device_compiler_includes "") + foreach(directory ${includeDirectories}) + set(device_compiler_includes "-I${directory}" ${device_compiler_includes}) + endforeach() + if (CMAKE_INCLUDE_PATH) + foreach(directory ${CMAKE_INCLUDE_PATH}) + set(device_compiler_includes "-I${directory}" + ${device_compiler_includes}) + endforeach() + endif() + + # Convert argument list format + separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS) + + # Add custom command for running compute++ + add_custom_command( + OUTPUT ${outputSyclFile} + COMMAND ${COMPUTECPP_DEVICE_COMPILER} + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + -isystem ${COMPUTECPP_INCLUDE_DIRECTORY} + ${COMPUTECPP_PLATFORM_SPECIFIC_ARGS} + ${device_compiler_includes} + -o ${outputSyclFile} + -c ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile} + DEPENDS ${sourceFile} + WORKING_DIRECTORY ${binaryDir} + COMMENT "Building ComputeCpp integration header file ${outputSyclFile}") + + # Add a custom target for the generated integration header + add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile}) + + # Add a dependency on the integration header + add_dependencies(${targetName} ${targetName}_integration_header) + + # Set the host compiler C++ standard to C++11 + set_property(TARGET ${targetName} PROPERTY CXX_STANDARD 11) + + # Disable GCC dual ABI on GCC 5.1 and higher + if(COMPUTECPP_DISABLE_GCC_DUAL_ABI) + set_property(TARGET ${targetName} APPEND PROPERTY COMPILE_DEFINITIONS + "_GLIBCXX_USE_CXX11_ABI=0") + endif() + +endfunction() + +####################### +# add_sycl_to_target +####################### +# +# Adds a SYCL compilation custom command associated with an existing +# target and sets a dependancy on that new command. +# +# targetName : Name of the target to add a SYCL to. +# sourceFile : Source file to be compiled for SYCL. +# binaryDir : Intermediate directory to output the integration header. +# +function(add_sycl_to_target targetName sourceFile binaryDir) + + # Add custom target to run compute++ and generate the integration header + __build_spir(${targetName} ${sourceFile} ${binaryDir}) + + # Link with the ComputeCpp runtime library + target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY} + PUBLIC ${OpenCL_LIBRARIES}) + +endfunction(add_sycl_to_target) diff --git a/external/eigen3/cmake/FindEigen3.cmake b/external/eigen3/cmake/FindEigen3.cmake index 9c546a0..9e96978 100644 --- a/external/eigen3/cmake/FindEigen3.cmake +++ b/external/eigen3/cmake/FindEigen3.cmake @@ -9,6 +9,12 @@ # EIGEN3_FOUND - system has eigen lib with correct version # EIGEN3_INCLUDE_DIR - the eigen include directory # EIGEN3_VERSION - eigen version +# +# This module reads hints about search locations from +# the following enviroment variables: +# +# EIGEN3_ROOT +# EIGEN3_ROOT_DIR # Copyright (c) 2006, 2007 Montel Laurent, # Copyright (c) 2008, 2009 Gael Guennebaud, @@ -60,13 +66,23 @@ if (EIGEN3_INCLUDE_DIR) set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) else (EIGEN3_INCLUDE_DIR) - - find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library - PATHS - ${CMAKE_INSTALL_PREFIX}/include - ${KDE4_INCLUDE_DIR} - PATH_SUFFIXES eigen3 eigen - ) + + # search first if an Eigen3Config.cmake is available in the system, + # if successful this would set EIGEN3_INCLUDE_DIR and the rest of + # the script will work as usual + find_package(Eigen3 ${Eigen3_FIND_VERSION} NO_MODULE QUIET) + + if(NOT EIGEN3_INCLUDE_DIR) + find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library + HINTS + ENV EIGEN3_ROOT + ENV EIGEN3_ROOT_DIR + PATHS + ${CMAKE_INSTALL_PREFIX}/include + ${KDE4_INCLUDE_DIR} + PATH_SUFFIXES eigen3 eigen + ) + endif(NOT EIGEN3_INCLUDE_DIR) if(EIGEN3_INCLUDE_DIR) _eigen3_check_version() diff --git a/external/eigen3/cmake/FindHWLOC.cmake b/external/eigen3/cmake/FindHWLOC.cmake new file mode 100644 index 0000000..a831b5c --- /dev/null +++ b/external/eigen3/cmake/FindHWLOC.cmake @@ -0,0 +1,331 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find HWLOC include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(HWLOC +# [REQUIRED]) # Fail with error if hwloc is not found +# +# This module finds headers and hwloc library. +# Results are reported in variables: +# HWLOC_FOUND - True if headers and requested libraries were found +# HWLOC_INCLUDE_DIRS - hwloc include directories +# HWLOC_LIBRARY_DIRS - Link directories for hwloc libraries +# HWLOC_LIBRARIES - hwloc component libraries to be linked +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DHWLOC_DIR=path/to/hwloc): +# HWLOC_DIR - Where to find the base directory of hwloc +# HWLOC_INCDIR - Where to find the header files +# HWLOC_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: HWLOC_DIR, HWLOC_INCDIR, HWLOC_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +include(CheckStructHasMember) +include(CheckCSourceCompiles) + +if (NOT HWLOC_FOUND) + set(HWLOC_DIR "" CACHE PATH "Installation directory of HWLOC library") + if (NOT HWLOC_FIND_QUIETLY) + message(STATUS "A cache variable, namely HWLOC_DIR, has been set to specify the install directory of HWLOC") + endif() +endif() + +set(ENV_HWLOC_DIR "$ENV{HWLOC_DIR}") +set(ENV_HWLOC_INCDIR "$ENV{HWLOC_INCDIR}") +set(ENV_HWLOC_LIBDIR "$ENV{HWLOC_LIBDIR}") +set(HWLOC_GIVEN_BY_USER "FALSE") +if ( HWLOC_DIR OR ( HWLOC_INCDIR AND HWLOC_LIBDIR) OR ENV_HWLOC_DIR OR (ENV_HWLOC_INCDIR AND ENV_HWLOC_LIBDIR) ) + set(HWLOC_GIVEN_BY_USER "TRUE") +endif() + +# Optionally use pkg-config to detect include/library dirs (if pkg-config is available) +# ------------------------------------------------------------------------------------- +include(FindPkgConfig) +find_package(PkgConfig QUIET) +if( PKG_CONFIG_EXECUTABLE AND NOT HWLOC_GIVEN_BY_USER ) + + pkg_search_module(HWLOC hwloc) + if (NOT HWLOC_FIND_QUIETLY) + if (HWLOC_FOUND AND HWLOC_LIBRARIES) + message(STATUS "Looking for HWLOC - found using PkgConfig") + #if(NOT HWLOC_INCLUDE_DIRS) + # message("${Magenta}HWLOC_INCLUDE_DIRS is empty using PkgConfig." + # "Perhaps the path to hwloc headers is already present in your" + # "C(PLUS)_INCLUDE_PATH environment variable.${ColourReset}") + #endif() + else() + message(STATUS "${Magenta}Looking for HWLOC - not found using PkgConfig." + "\n Perhaps you should add the directory containing hwloc.pc to" + "\n the PKG_CONFIG_PATH environment variable.${ColourReset}") + endif() + endif() + +endif( PKG_CONFIG_EXECUTABLE AND NOT HWLOC_GIVEN_BY_USER ) + +if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HWLOC_FOUND) OR (HWLOC_GIVEN_BY_USER) ) + + if (NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for HWLOC - PkgConfig not used") + endif() + + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + if(ENV_HWLOC_INCDIR) + list(APPEND _inc_env "${ENV_HWLOC_INCDIR}") + elseif(ENV_HWLOC_DIR) + list(APPEND _inc_env "${ENV_HWLOC_DIR}") + list(APPEND _inc_env "${ENV_HWLOC_DIR}/include") + list(APPEND _inc_env "${ENV_HWLOC_DIR}/include/hwloc") + else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_inc_env}") + + # Try to find the hwloc header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(HWLOC_INCDIR) + set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") + find_path(HWLOC_hwloc.h_DIRS + NAMES hwloc.h + HINTS ${HWLOC_INCDIR}) + else() + if(HWLOC_DIR) + set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") + find_path(HWLOC_hwloc.h_DIRS + NAMES hwloc.h + HINTS ${HWLOC_DIR} + PATH_SUFFIXES "include" "include/hwloc") + else() + set(HWLOC_hwloc.h_DIRS "HWLOC_hwloc.h_DIRS-NOTFOUND") + find_path(HWLOC_hwloc.h_DIRS + NAMES hwloc.h + HINTS ${PATH_TO_LOOK_FOR} + PATH_SUFFIXES "hwloc") + endif() + endif() + mark_as_advanced(HWLOC_hwloc.h_DIRS) + + # Add path to cmake variable + # ------------------------------------ + if (HWLOC_hwloc.h_DIRS) + set(HWLOC_INCLUDE_DIRS "${HWLOC_hwloc.h_DIRS}") + else () + set(HWLOC_INCLUDE_DIRS "HWLOC_INCLUDE_DIRS-NOTFOUND") + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc -- hwloc.h not found") + endif() + endif () + + if (HWLOC_INCLUDE_DIRS) + list(REMOVE_DUPLICATES HWLOC_INCLUDE_DIRS) + endif () + + + # Looking for lib + # --------------- + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + if(ENV_HWLOC_LIBDIR) + list(APPEND _lib_env "${ENV_HWLOC_LIBDIR}") + elseif(ENV_HWLOC_DIR) + list(APPEND _lib_env "${ENV_HWLOC_DIR}") + list(APPEND _lib_env "${ENV_HWLOC_DIR}/lib") + else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif() + list(REMOVE_DUPLICATES _lib_env) + + # set paths where to look for + set(PATH_TO_LOOK_FOR "${_lib_env}") + + # Try to find the hwloc lib in the given paths + # ---------------------------------------------- + + # call cmake macro to find the lib path + if(HWLOC_LIBDIR) + set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") + find_library(HWLOC_hwloc_LIBRARY + NAMES hwloc + HINTS ${HWLOC_LIBDIR}) + else() + if(HWLOC_DIR) + set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") + find_library(HWLOC_hwloc_LIBRARY + NAMES hwloc + HINTS ${HWLOC_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(HWLOC_hwloc_LIBRARY "HWLOC_hwloc_LIBRARY-NOTFOUND") + find_library(HWLOC_hwloc_LIBRARY + NAMES hwloc + HINTS ${PATH_TO_LOOK_FOR}) + endif() + endif() + mark_as_advanced(HWLOC_hwloc_LIBRARY) + + # If found, add path to cmake variable + # ------------------------------------ + if (HWLOC_hwloc_LIBRARY) + get_filename_component(hwloc_lib_path ${HWLOC_hwloc_LIBRARY} PATH) + # set cmake variables (respects naming convention) + set(HWLOC_LIBRARIES "${HWLOC_hwloc_LIBRARY}") + set(HWLOC_LIBRARY_DIRS "${hwloc_lib_path}") + else () + set(HWLOC_LIBRARIES "HWLOC_LIBRARIES-NOTFOUND") + set(HWLOC_LIBRARY_DIRS "HWLOC_LIBRARY_DIRS-NOTFOUND") + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc -- lib hwloc not found") + endif() + endif () + + if (HWLOC_LIBRARY_DIRS) + list(REMOVE_DUPLICATES HWLOC_LIBRARY_DIRS) + endif () + + # check a function to validate the find + if(HWLOC_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # HWLOC + if (HWLOC_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${HWLOC_INCLUDE_DIRS}") + endif() + if (HWLOC_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${HWLOC_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${HWLOC_LIBRARIES}") + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(HWLOC_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(hwloc_topology_init HWLOC_WORKS) + mark_as_advanced(HWLOC_WORKS) + + if(NOT HWLOC_WORKS) + if(NOT HWLOC_FIND_QUIETLY) + message(STATUS "Looking for hwloc : test of hwloc_topology_init with hwloc library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + endif(HWLOC_LIBRARIES) + +endif( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT HWLOC_FOUND) OR (HWLOC_GIVEN_BY_USER) ) + +if (HWLOC_LIBRARIES) + if (HWLOC_LIBRARY_DIRS) + list(GET HWLOC_LIBRARY_DIRS 0 first_lib_path) + else() + list(GET HWLOC_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + endif() + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(HWLOC_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of HWLOC library" FORCE) + else() + set(HWLOC_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of HWLOC library" FORCE) + endif() +endif() +mark_as_advanced(HWLOC_DIR) +mark_as_advanced(HWLOC_DIR_FOUND) + +# check that HWLOC has been found +# ------------------------------- +include(FindPackageHandleStandardArgs) +if (PKG_CONFIG_EXECUTABLE AND HWLOC_FOUND) + find_package_handle_standard_args(HWLOC DEFAULT_MSG + HWLOC_LIBRARIES) +else() + find_package_handle_standard_args(HWLOC DEFAULT_MSG + HWLOC_LIBRARIES + HWLOC_WORKS) +endif() + +if (HWLOC_FOUND) + set(HWLOC_SAVE_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) + list(APPEND CMAKE_REQUIRED_INCLUDES ${HWLOC_INCLUDE_DIRS}) + + # test headers to guess the version + check_struct_has_member( "struct hwloc_obj" parent hwloc.h HAVE_HWLOC_PARENT_MEMBER ) + check_struct_has_member( "struct hwloc_cache_attr_s" size hwloc.h HAVE_HWLOC_CACHE_ATTR ) + check_c_source_compiles( "#include + int main(void) { hwloc_obj_t o; o->type = HWLOC_OBJ_PU; return 0;}" HAVE_HWLOC_OBJ_PU) + include(CheckLibraryExists) + check_library_exists(${HWLOC_LIBRARIES} hwloc_bitmap_free "" HAVE_HWLOC_BITMAP) + + set(CMAKE_REQUIRED_INCLUDES ${HWLOC_SAVE_CMAKE_REQUIRED_INCLUDES}) +endif() diff --git a/external/eigen3/cmake/FindMetis.cmake b/external/eigen3/cmake/FindMetis.cmake index 6a0ce79..da2f1f1 100644 --- a/external/eigen3/cmake/FindMetis.cmake +++ b/external/eigen3/cmake/FindMetis.cmake @@ -1,59 +1,264 @@ -# Pastix requires METIS or METIS (partitioning and reordering tools) - -if (METIS_INCLUDES AND METIS_LIBRARIES) - set(METIS_FIND_QUIETLY TRUE) -endif (METIS_INCLUDES AND METIS_LIBRARIES) - -find_path(METIS_INCLUDES - NAMES - metis.h - PATHS - $ENV{METISDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - . - metis - include -) - -macro(_metis_check_version) - file(READ "${METIS_INCLUDES}/metis.h" _metis_version_header) - - string(REGEX MATCH "define[ \t]+METIS_VER_MAJOR[ \t]+([0-9]+)" _metis_major_version_match "${_metis_version_header}") - set(METIS_MAJOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+METIS_VER_MINOR[ \t]+([0-9]+)" _metis_minor_version_match "${_metis_version_header}") - set(METIS_MINOR_VERSION "${CMAKE_MATCH_1}") - string(REGEX MATCH "define[ \t]+METIS_VER_SUBMINOR[ \t]+([0-9]+)" _metis_subminor_version_match "${_metis_version_header}") - set(METIS_SUBMINOR_VERSION "${CMAKE_MATCH_1}") - if(NOT METIS_MAJOR_VERSION) - message(STATUS "Could not determine Metis version. Assuming version 4.0.0") - set(METIS_VERSION 4.0.0) +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find METIS include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(METIS +# [REQUIRED] # Fail with error if metis is not found +# ) +# +# This module finds headers and metis library. +# Results are reported in variables: +# METIS_FOUND - True if headers and requested libraries were found +# METIS_INCLUDE_DIRS - metis include directories +# METIS_LIBRARY_DIRS - Link directories for metis libraries +# METIS_LIBRARIES - metis component libraries to be linked +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DMETIS_DIR=path/to/metis): +# METIS_DIR - Where to find the base directory of metis +# METIS_INCDIR - Where to find the header files +# METIS_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: METIS_DIR, METIS_INCDIR, METIS_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +if (NOT METIS_FOUND) + set(METIS_DIR "" CACHE PATH "Installation directory of METIS library") + if (NOT METIS_FIND_QUIETLY) + message(STATUS "A cache variable, namely METIS_DIR, has been set to specify the install directory of METIS") + endif() +endif() + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_METIS_DIR "$ENV{METIS_DIR}") +set(ENV_METIS_INCDIR "$ENV{METIS_INCDIR}") +if(ENV_METIS_INCDIR) + list(APPEND _inc_env "${ENV_METIS_INCDIR}") +elseif(ENV_METIS_DIR) + list(APPEND _inc_env "${ENV_METIS_DIR}") + list(APPEND _inc_env "${ENV_METIS_DIR}/include") + list(APPEND _inc_env "${ENV_METIS_DIR}/include/metis") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") else() - set(METIS_VERSION ${METIS_MAJOR_VERSION}.${METIS_MINOR_VERSION}.${METIS_SUBMINOR_VERSION}) + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") endif() - if(${METIS_VERSION} VERSION_LESS ${Metis_FIND_VERSION}) - set(METIS_VERSION_OK FALSE) +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the metis header in the given paths +# ------------------------------------------------- +# call cmake macro to find the header path +if(METIS_INCDIR) + set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") + find_path(METIS_metis.h_DIRS + NAMES metis.h + HINTS ${METIS_INCDIR}) +else() + if(METIS_DIR) + set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") + find_path(METIS_metis.h_DIRS + NAMES metis.h + HINTS ${METIS_DIR} + PATH_SUFFIXES "include" "include/metis") else() - set(METIS_VERSION_OK TRUE) + set(METIS_metis.h_DIRS "METIS_metis.h_DIRS-NOTFOUND") + find_path(METIS_metis.h_DIRS + NAMES metis.h + HINTS ${_inc_env}) endif() +endif() +mark_as_advanced(METIS_metis.h_DIRS) - if(NOT METIS_VERSION_OK) - message(STATUS "Metis version ${METIS_VERSION} found in ${METIS_INCLUDES}, " - "but at least version ${Metis_FIND_VERSION} is required") - endif(NOT METIS_VERSION_OK) -endmacro(_metis_check_version) - if(METIS_INCLUDES AND Metis_FIND_VERSION) - _metis_check_version() +# If found, add path to cmake variable +# ------------------------------------ +if (METIS_metis.h_DIRS) + set(METIS_INCLUDE_DIRS "${METIS_metis.h_DIRS}") +else () + set(METIS_INCLUDE_DIRS "METIS_INCLUDE_DIRS-NOTFOUND") + if(NOT METIS_FIND_QUIETLY) + message(STATUS "Looking for metis -- metis.h not found") + endif() +endif() + + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_METIS_LIBDIR "$ENV{METIS_LIBDIR}") +if(ENV_METIS_LIBDIR) + list(APPEND _lib_env "${ENV_METIS_LIBDIR}") +elseif(ENV_METIS_DIR) + list(APPEND _lib_env "${ENV_METIS_DIR}") + list(APPEND _lib_env "${ENV_METIS_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") else() - set(METIS_VERSION_OK TRUE) + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the metis lib in the given paths +# ---------------------------------------------- +# call cmake macro to find the lib path +if(METIS_LIBDIR) + set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") + find_library(METIS_metis_LIBRARY + NAMES metis + HINTS ${METIS_LIBDIR}) +else() + if(METIS_DIR) + set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") + find_library(METIS_metis_LIBRARY + NAMES metis + HINTS ${METIS_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(METIS_metis_LIBRARY "METIS_metis_LIBRARY-NOTFOUND") + find_library(METIS_metis_LIBRARY + NAMES metis + HINTS ${_lib_env}) + endif() +endif() +mark_as_advanced(METIS_metis_LIBRARY) -find_library(METIS_LIBRARIES metis PATHS $ENV{METISDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) +# If found, add path to cmake variable +# ------------------------------------ +if (METIS_metis_LIBRARY) + get_filename_component(metis_lib_path "${METIS_metis_LIBRARY}" PATH) + # set cmake variables + set(METIS_LIBRARIES "${METIS_metis_LIBRARY}") + set(METIS_LIBRARY_DIRS "${metis_lib_path}") +else () + set(METIS_LIBRARIES "METIS_LIBRARIES-NOTFOUND") + set(METIS_LIBRARY_DIRS "METIS_LIBRARY_DIRS-NOTFOUND") + if(NOT METIS_FIND_QUIETLY) + message(STATUS "Looking for metis -- lib metis not found") + endif() +endif () +# check a function to validate the find +if(METIS_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # METIS + if (METIS_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${METIS_INCLUDE_DIRS}") + endif() + if (METIS_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${METIS_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${METIS_LIBRARIES}") + # m + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(METIS_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(METIS_NodeND METIS_WORKS) + mark_as_advanced(METIS_WORKS) + + if(NOT METIS_WORKS) + if(NOT METIS_FIND_QUIETLY) + message(STATUS "Looking for METIS : test of METIS_NodeND with METIS library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif(METIS_LIBRARIES) + +if (METIS_LIBRARIES) + list(GET METIS_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(METIS_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of METIS library" FORCE) + else() + set(METIS_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of METIS library" FORCE) + endif() +endif() +mark_as_advanced(METIS_DIR) +mark_as_advanced(METIS_DIR_FOUND) + +# check that METIS has been found +# --------------------------------- include(FindPackageHandleStandardArgs) find_package_handle_standard_args(METIS DEFAULT_MSG - METIS_INCLUDES METIS_LIBRARIES METIS_VERSION_OK) - -mark_as_advanced(METIS_INCLUDES METIS_LIBRARIES) + METIS_LIBRARIES + METIS_WORKS) +# +# TODO: Add possibility to check for specific functions in the library +# diff --git a/external/eigen3/cmake/FindPTSCOTCH.cmake b/external/eigen3/cmake/FindPTSCOTCH.cmake new file mode 100644 index 0000000..1396d05 --- /dev/null +++ b/external/eigen3/cmake/FindPTSCOTCH.cmake @@ -0,0 +1,423 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find PTSCOTCH include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(PTSCOTCH +# [REQUIRED] # Fail with error if ptscotch is not found +# [COMPONENTS ...] # dependencies +# ) +# +# PTSCOTCH depends on the following libraries: +# - Threads +# - MPI +# +# COMPONENTS can be some of the following: +# - ESMUMPS: to activate detection of PT-Scotch with the esmumps interface +# +# This module finds headers and ptscotch library. +# Results are reported in variables: +# PTSCOTCH_FOUND - True if headers and requested libraries were found +# PTSCOTCH_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# PTSCOTCH_INCLUDE_DIRS - ptscotch include directories +# PTSCOTCH_LIBRARY_DIRS - Link directories for ptscotch libraries +# PTSCOTCH_LIBRARIES - ptscotch component libraries to be linked +# PTSCOTCH_INCLUDE_DIRS_DEP - ptscotch + dependencies include directories +# PTSCOTCH_LIBRARY_DIRS_DEP - ptscotch + dependencies link directories +# PTSCOTCH_LIBRARIES_DEP - ptscotch libraries + dependencies +# PTSCOTCH_INTSIZE - Number of octets occupied by a SCOTCH_Num +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DPTSCOTCH=path/to/ptscotch): +# PTSCOTCH_DIR - Where to find the base directory of ptscotch +# PTSCOTCH_INCDIR - Where to find the header files +# PTSCOTCH_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: PTSCOTCH_DIR, PTSCOTCH_INCDIR, PTSCOTCH_LIBDIR + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +if (NOT PTSCOTCH_FOUND) + set(PTSCOTCH_DIR "" CACHE PATH "Installation directory of PTSCOTCH library") + if (NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "A cache variable, namely PTSCOTCH_DIR, has been set to specify the install directory of PTSCOTCH") + endif() +endif() + +# Set the version to find +set(PTSCOTCH_LOOK_FOR_ESMUMPS OFF) + +if( PTSCOTCH_FIND_COMPONENTS ) + foreach( component ${PTSCOTCH_FIND_COMPONENTS} ) + if (${component} STREQUAL "ESMUMPS") + # means we look for esmumps library + set(PTSCOTCH_LOOK_FOR_ESMUMPS ON) + endif() + endforeach() +endif() + +# PTSCOTCH depends on Threads, try to find it +if (NOT THREADS_FOUND) + if (PTSCOTCH_FIND_REQUIRED) + find_package(Threads REQUIRED) + else() + find_package(Threads) + endif() +endif() + +# PTSCOTCH depends on MPI, try to find it +if (NOT MPI_FOUND) + if (PTSCOTCH_FIND_REQUIRED) + find_package(MPI REQUIRED) + else() + find_package(MPI) + endif() +endif() + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_PTSCOTCH_DIR "$ENV{PTSCOTCH_DIR}") +set(ENV_PTSCOTCH_INCDIR "$ENV{PTSCOTCH_INCDIR}") +if(ENV_PTSCOTCH_INCDIR) + list(APPEND _inc_env "${ENV_PTSCOTCH_INCDIR}") +elseif(ENV_PTSCOTCH_DIR) + list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}") + list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}/include") + list(APPEND _inc_env "${ENV_PTSCOTCH_DIR}/include/ptscotch") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the ptscotch header in the given paths +# ------------------------------------------------- + +set(PTSCOTCH_hdrs_to_find "ptscotch.h;scotch.h") + +# call cmake macro to find the header path +if(PTSCOTCH_INCDIR) + foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") + find_path(PTSCOTCH_${ptscotch_hdr}_DIRS + NAMES ${ptscotch_hdr} + HINTS ${PTSCOTCH_INCDIR}) + mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) + endforeach() +else() + if(PTSCOTCH_DIR) + foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") + find_path(PTSCOTCH_${ptscotch_hdr}_DIRS + NAMES ${ptscotch_hdr} + HINTS ${PTSCOTCH_DIR} + PATH_SUFFIXES "include" "include/scotch") + mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) + endforeach() + else() + foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + set(PTSCOTCH_${ptscotch_hdr}_DIRS "PTSCOTCH_${ptscotch_hdr}_DIRS-NOTFOUND") + find_path(PTSCOTCH_${ptscotch_hdr}_DIRS + NAMES ${ptscotch_hdr} + HINTS ${_inc_env} + PATH_SUFFIXES "scotch") + mark_as_advanced(PTSCOTCH_${ptscotch_hdr}_DIRS) + endforeach() + endif() +endif() + +# If found, add path to cmake variable +# ------------------------------------ +foreach(ptscotch_hdr ${PTSCOTCH_hdrs_to_find}) + if (PTSCOTCH_${ptscotch_hdr}_DIRS) + list(APPEND PTSCOTCH_INCLUDE_DIRS "${PTSCOTCH_${ptscotch_hdr}_DIRS}") + else () + set(PTSCOTCH_INCLUDE_DIRS "PTSCOTCH_INCLUDE_DIRS-NOTFOUND") + if (NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "Looking for ptscotch -- ${ptscotch_hdr} not found") + endif() + endif() +endforeach() +list(REMOVE_DUPLICATES PTSCOTCH_INCLUDE_DIRS) + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_PTSCOTCH_LIBDIR "$ENV{PTSCOTCH_LIBDIR}") +if(ENV_PTSCOTCH_LIBDIR) + list(APPEND _lib_env "${ENV_PTSCOTCH_LIBDIR}") +elseif(ENV_PTSCOTCH_DIR) + list(APPEND _lib_env "${ENV_PTSCOTCH_DIR}") + list(APPEND _lib_env "${ENV_PTSCOTCH_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the ptscotch lib in the given paths +# ---------------------------------------------- + +set(PTSCOTCH_libs_to_find "ptscotch;ptscotcherr") +if (PTSCOTCH_LOOK_FOR_ESMUMPS) + list(INSERT PTSCOTCH_libs_to_find 0 "ptesmumps") + list(APPEND PTSCOTCH_libs_to_find "esmumps" ) +endif() +list(APPEND PTSCOTCH_libs_to_find "scotch;scotcherr") + +# call cmake macro to find the lib path +if(PTSCOTCH_LIBDIR) + foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") + find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY + NAMES ${ptscotch_lib} + HINTS ${PTSCOTCH_LIBDIR}) + endforeach() +else() + if(PTSCOTCH_DIR) + foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") + find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY + NAMES ${ptscotch_lib} + HINTS ${PTSCOTCH_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + set(PTSCOTCH_${ptscotch_lib}_LIBRARY "PTSCOTCH_${ptscotch_lib}_LIBRARY-NOTFOUND") + find_library(PTSCOTCH_${ptscotch_lib}_LIBRARY + NAMES ${ptscotch_lib} + HINTS ${_lib_env}) + endforeach() + endif() +endif() + +set(PTSCOTCH_LIBRARIES "") +set(PTSCOTCH_LIBRARY_DIRS "") +# If found, add path to cmake variable +# ------------------------------------ +foreach(ptscotch_lib ${PTSCOTCH_libs_to_find}) + + if (PTSCOTCH_${ptscotch_lib}_LIBRARY) + get_filename_component(${ptscotch_lib}_lib_path "${PTSCOTCH_${ptscotch_lib}_LIBRARY}" PATH) + # set cmake variables + list(APPEND PTSCOTCH_LIBRARIES "${PTSCOTCH_${ptscotch_lib}_LIBRARY}") + list(APPEND PTSCOTCH_LIBRARY_DIRS "${${ptscotch_lib}_lib_path}") + else () + list(APPEND PTSCOTCH_LIBRARIES "${PTSCOTCH_${ptscotch_lib}_LIBRARY}") + if (NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "Looking for ptscotch -- lib ${ptscotch_lib} not found") + endif() + endif () + + mark_as_advanced(PTSCOTCH_${ptscotch_lib}_LIBRARY) + +endforeach() +list(REMOVE_DUPLICATES PTSCOTCH_LIBRARY_DIRS) + +# check a function to validate the find +if(PTSCOTCH_LIBRARIES) + + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # PTSCOTCH + if (PTSCOTCH_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${PTSCOTCH_INCLUDE_DIRS}") + endif() + if (PTSCOTCH_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${PTSCOTCH_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${PTSCOTCH_LIBRARIES}") + # MPI + if (MPI_FOUND) + if (MPI_C_INCLUDE_PATH) + list(APPEND CMAKE_REQUIRED_INCLUDES "${MPI_C_INCLUDE_PATH}") + endif() + if (MPI_C_LINK_FLAGS) + if (${MPI_C_LINK_FLAGS} MATCHES " -") + string(REGEX REPLACE " -" "-" MPI_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) + endif() + list(APPEND REQUIRED_LDFLAGS "${MPI_C_LINK_FLAGS}") + endif() + list(APPEND REQUIRED_LIBS "${MPI_C_LIBRARIES}") + endif() + # THREADS + if(CMAKE_THREAD_LIBS_INIT) + list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") + endif() + set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") + find_library(Z_LIBRARY NAMES z) + mark_as_advanced(Z_LIBRARY) + if(Z_LIBRARY) + list(APPEND REQUIRED_LIBS "-lz") + endif() + set(M_LIBRARY "M_LIBRARY-NOTFOUND") + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") + find_library(RT_LIBRARY NAMES rt) + mark_as_advanced(RT_LIBRARY) + if(RT_LIBRARY) + list(APPEND REQUIRED_LIBS "-lrt") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(PTSCOTCH_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(SCOTCH_dgraphInit PTSCOTCH_WORKS) + mark_as_advanced(PTSCOTCH_WORKS) + + if(PTSCOTCH_WORKS) + # save link with dependencies + set(PTSCOTCH_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(PTSCOTCH_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(PTSCOTCH_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(PTSCOTCH_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES PTSCOTCH_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES PTSCOTCH_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES PTSCOTCH_LINKER_FLAGS) + else() + if(NOT PTSCOTCH_FIND_QUIETLY) + message(STATUS "Looking for PTSCOTCH : test of SCOTCH_dgraphInit with PTSCOTCH library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif(PTSCOTCH_LIBRARIES) + +if (PTSCOTCH_LIBRARIES) + list(GET PTSCOTCH_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(PTSCOTCH_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of PTSCOTCH library" FORCE) + else() + set(PTSCOTCH_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of PTSCOTCH library" FORCE) + endif() +endif() +mark_as_advanced(PTSCOTCH_DIR) +mark_as_advanced(PTSCOTCH_DIR_FOUND) + +# Check the size of SCOTCH_Num +# --------------------------------- +set(CMAKE_REQUIRED_INCLUDES ${PTSCOTCH_INCLUDE_DIRS}) + +include(CheckCSourceRuns) +#stdio.h and stdint.h should be included by scotch.h directly +set(PTSCOTCH_C_TEST_SCOTCH_Num_4 " +#include +#include +#include +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 4) + return 0; + else + return 1; +} +") + +set(PTSCOTCH_C_TEST_SCOTCH_Num_8 " +#include +#include +#include +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 8) + return 0; + else + return 1; +} +") +check_c_source_runs("${PTSCOTCH_C_TEST_SCOTCH_Num_4}" PTSCOTCH_Num_4) +if(NOT PTSCOTCH_Num_4) + check_c_source_runs("${PTSCOTCH_C_TEST_SCOTCH_Num_8}" PTSCOTCH_Num_8) + if(NOT PTSCOTCH_Num_8) + set(PTSCOTCH_INTSIZE -1) + else() + set(PTSCOTCH_INTSIZE 8) + endif() +else() + set(PTSCOTCH_INTSIZE 4) +endif() +set(CMAKE_REQUIRED_INCLUDES "") + +# check that PTSCOTCH has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PTSCOTCH DEFAULT_MSG + PTSCOTCH_LIBRARIES + PTSCOTCH_WORKS) +# +# TODO: Add possibility to check for specific functions in the library +# diff --git a/external/eigen3/cmake/FindPastix.cmake b/external/eigen3/cmake/FindPastix.cmake index e2e6c81..470477f 100644 --- a/external/eigen3/cmake/FindPastix.cmake +++ b/external/eigen3/cmake/FindPastix.cmake @@ -1,25 +1,704 @@ -# Pastix lib requires linking to a blas library. -# It is up to the user of this module to find a BLAS and link to it. -# Pastix requires SCOTCH or METIS (partitioning and reordering tools) as well +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find PASTIX include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(PASTIX +# [REQUIRED] # Fail with error if pastix is not found +# [COMPONENTS ...] # dependencies +# ) +# +# PASTIX depends on the following libraries: +# - Threads, m, rt +# - MPI +# - HWLOC +# - BLAS +# +# COMPONENTS are optional libraries PASTIX could be linked with, +# Use it to drive detection of a specific compilation chain +# COMPONENTS can be some of the following: +# - MPI: to activate detection of the parallel MPI version (default) +# it looks for Threads, HWLOC, BLAS, MPI and ScaLAPACK libraries +# - SEQ: to activate detection of the sequential version (exclude MPI version) +# - STARPU: to activate detection of StarPU version +# it looks for MPI version of StarPU (default behaviour) +# if SEQ and STARPU are given, it looks for a StarPU without MPI +# - STARPU_CUDA: to activate detection of StarPU with CUDA +# - STARPU_FXT: to activate detection of StarPU with FxT +# - SCOTCH: to activate detection of PASTIX linked with SCOTCH +# - PTSCOTCH: to activate detection of PASTIX linked with SCOTCH +# - METIS: to activate detection of PASTIX linked with SCOTCH +# +# This module finds headers and pastix library. +# Results are reported in variables: +# PASTIX_FOUND - True if headers and requested libraries were found +# PASTIX_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# PASTIX_INCLUDE_DIRS - pastix include directories +# PASTIX_LIBRARY_DIRS - Link directories for pastix libraries +# PASTIX_LIBRARIES - pastix libraries +# PASTIX_INCLUDE_DIRS_DEP - pastix + dependencies include directories +# PASTIX_LIBRARY_DIRS_DEP - pastix + dependencies link directories +# PASTIX_LIBRARIES_DEP - pastix libraries + dependencies +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DPASTIX_DIR=path/to/pastix): +# PASTIX_DIR - Where to find the base directory of pastix +# PASTIX_INCDIR - Where to find the header files +# PASTIX_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: PASTIX_DIR, PASTIX_INCDIR, PASTIX_LIBDIR -if (PASTIX_INCLUDES AND PASTIX_LIBRARIES) - set(PASTIX_FIND_QUIETLY TRUE) -endif (PASTIX_INCLUDES AND PASTIX_LIBRARIES) +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) -find_path(PASTIX_INCLUDES - NAMES - pastix_nompi.h - PATHS - $ENV{PASTIXDIR} - ${INCLUDE_INSTALL_DIR} -) -find_library(PASTIX_LIBRARIES pastix PATHS $ENV{PASTIXDIR} ${LIB_INSTALL_DIR}) +if (NOT PASTIX_FOUND) + set(PASTIX_DIR "" CACHE PATH "Installation directory of PASTIX library") + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "A cache variable, namely PASTIX_DIR, has been set to specify the install directory of PASTIX") + endif() +endif() +# Set the version to find +set(PASTIX_LOOK_FOR_MPI ON) +set(PASTIX_LOOK_FOR_SEQ OFF) +set(PASTIX_LOOK_FOR_STARPU OFF) +set(PASTIX_LOOK_FOR_STARPU_CUDA OFF) +set(PASTIX_LOOK_FOR_STARPU_FXT OFF) +set(PASTIX_LOOK_FOR_SCOTCH ON) +set(PASTIX_LOOK_FOR_PTSCOTCH OFF) +set(PASTIX_LOOK_FOR_METIS OFF) +if( PASTIX_FIND_COMPONENTS ) + foreach( component ${PASTIX_FIND_COMPONENTS} ) + if (${component} STREQUAL "SEQ") + # means we look for the sequential version of PaStiX (without MPI) + set(PASTIX_LOOK_FOR_SEQ ON) + set(PASTIX_LOOK_FOR_MPI OFF) + endif() + if (${component} STREQUAL "MPI") + # means we look for the MPI version of PaStiX (default) + set(PASTIX_LOOK_FOR_SEQ OFF) + set(PASTIX_LOOK_FOR_MPI ON) + endif() + if (${component} STREQUAL "STARPU") + # means we look for PaStiX with StarPU + set(PASTIX_LOOK_FOR_STARPU ON) + endif() + if (${component} STREQUAL "STARPU_CUDA") + # means we look for PaStiX with StarPU + CUDA + set(PASTIX_LOOK_FOR_STARPU ON) + set(PASTIX_LOOK_FOR_STARPU_CUDA ON) + endif() + if (${component} STREQUAL "STARPU_FXT") + # means we look for PaStiX with StarPU + FxT + set(PASTIX_LOOK_FOR_STARPU_FXT ON) + endif() + if (${component} STREQUAL "SCOTCH") + set(PASTIX_LOOK_FOR_SCOTCH ON) + endif() + if (${component} STREQUAL "SCOTCH") + set(PASTIX_LOOK_FOR_PTSCOTCH ON) + endif() + if (${component} STREQUAL "METIS") + set(PASTIX_LOOK_FOR_METIS ON) + endif() + endforeach() +endif() +# Dependencies detection +# ---------------------- + + +# Required dependencies +# --------------------- + +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect pthread") +endif() +if (PASTIX_FIND_REQUIRED) + find_package(Threads REQUIRED QUIET) +else() + find_package(Threads QUIET) +endif() +set(PASTIX_EXTRA_LIBRARIES "") +if( THREADS_FOUND ) + list(APPEND PASTIX_EXTRA_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) +endif () + +# Add math library to the list of extra +# it normally exists on all common systems provided with a C compiler +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect libm") +endif() +set(PASTIX_M_LIBRARIES "") +if(UNIX OR WIN32) + find_library( + PASTIX_M_m_LIBRARY + NAMES m + ) + mark_as_advanced(PASTIX_M_m_LIBRARY) + if (PASTIX_M_m_LIBRARY) + list(APPEND PASTIX_M_LIBRARIES "${PASTIX_M_m_LIBRARY}") + list(APPEND PASTIX_EXTRA_LIBRARIES "${PASTIX_M_m_LIBRARY}") + else() + if (PASTIX_FIND_REQUIRED) + message(FATAL_ERROR "Could NOT find libm on your system." + "Are you sure to a have a C compiler installed?") + endif() + endif() +endif() + +# Try to find librt (libposix4 - POSIX.1b Realtime Extensions library) +# on Unix systems except Apple ones because it does not exist on it +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect librt") +endif() +set(PASTIX_RT_LIBRARIES "") +if(UNIX AND NOT APPLE) + find_library( + PASTIX_RT_rt_LIBRARY + NAMES rt + ) + mark_as_advanced(PASTIX_RT_rt_LIBRARY) + if (PASTIX_RT_rt_LIBRARY) + list(APPEND PASTIX_RT_LIBRARIES "${PASTIX_RT_rt_LIBRARY}") + list(APPEND PASTIX_EXTRA_LIBRARIES "${PASTIX_RT_rt_LIBRARY}") + else() + if (PASTIX_FIND_REQUIRED) + message(FATAL_ERROR "Could NOT find librt on your system") + endif() + endif() +endif() + +# PASTIX depends on HWLOC +#------------------------ +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect HWLOC") +endif() +if (PASTIX_FIND_REQUIRED) + find_package(HWLOC REQUIRED QUIET) +else() + find_package(HWLOC QUIET) +endif() + +# PASTIX depends on BLAS +#----------------------- +if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect BLAS") +endif() +if (PASTIX_FIND_REQUIRED) + find_package(BLASEXT REQUIRED QUIET) +else() + find_package(BLASEXT QUIET) +endif() + +# Optional dependencies +# --------------------- + +# PASTIX may depend on MPI +#------------------------- +if (NOT MPI_FOUND AND PASTIX_LOOK_FOR_MPI) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect MPI") + endif() + # allows to use an external mpi compilation by setting compilers with + # -DMPI_C_COMPILER=path/to/mpicc -DMPI_Fortran_COMPILER=path/to/mpif90 + # at cmake configure + if(NOT MPI_C_COMPILER) + set(MPI_C_COMPILER mpicc) + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_MPI) + find_package(MPI REQUIRED QUIET) + else() + find_package(MPI QUIET) + endif() + if (MPI_FOUND) + mark_as_advanced(MPI_LIBRARY) + mark_as_advanced(MPI_EXTRA_LIBRARY) + endif() +endif (NOT MPI_FOUND AND PASTIX_LOOK_FOR_MPI) + +# PASTIX may depend on STARPU +#---------------------------- +if( NOT STARPU_FOUND AND PASTIX_LOOK_FOR_STARPU) + + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect StarPU") + endif() + + set(PASTIX_STARPU_VERSION "1.1" CACHE STRING "oldest STARPU version desired") + + # create list of components in order to make a single call to find_package(starpu...) + # we explicitly need a StarPU version built with hwloc + set(STARPU_COMPONENT_LIST "HWLOC") + + # StarPU may depend on MPI + # allows to use an external mpi compilation by setting compilers with + # -DMPI_C_COMPILER=path/to/mpicc -DMPI_Fortran_COMPILER=path/to/mpif90 + # at cmake configure + if (PASTIX_LOOK_FOR_MPI) + if(NOT MPI_C_COMPILER) + set(MPI_C_COMPILER mpicc) + endif() + list(APPEND STARPU_COMPONENT_LIST "MPI") + endif() + if (PASTIX_LOOK_FOR_STARPU_CUDA) + list(APPEND STARPU_COMPONENT_LIST "CUDA") + endif() + if (PASTIX_LOOK_FOR_STARPU_FXT) + list(APPEND STARPU_COMPONENT_LIST "FXT") + endif() + # set the list of optional dependencies we may discover + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_STARPU) + find_package(STARPU ${PASTIX_STARPU_VERSION} REQUIRED + COMPONENTS ${STARPU_COMPONENT_LIST}) + else() + find_package(STARPU ${PASTIX_STARPU_VERSION} + COMPONENTS ${STARPU_COMPONENT_LIST}) + endif() + +endif( NOT STARPU_FOUND AND PASTIX_LOOK_FOR_STARPU) + +# PASTIX may depends on SCOTCH +#----------------------------- +if (NOT SCOTCH_FOUND AND PASTIX_LOOK_FOR_SCOTCH) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect SCOTCH") + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_SCOTCH) + find_package(SCOTCH REQUIRED QUIET) + else() + find_package(SCOTCH QUIET) + endif() +endif() + +# PASTIX may depends on PTSCOTCH +#------------------------------- +if (NOT PTSCOTCH_FOUND AND PASTIX_LOOK_FOR_PTSCOTCH) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect PTSCOTCH") + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_PTSCOTCH) + find_package(PTSCOTCH REQUIRED QUIET) + else() + find_package(PTSCOTCH QUIET) + endif() +endif() + +# PASTIX may depends on METIS +#---------------------------- +if (NOT METIS_FOUND AND PASTIX_LOOK_FOR_METIS) + if (NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX - Try to detect METIS") + endif() + if (PASTIX_FIND_REQUIRED AND PASTIX_FIND_REQUIRED_METIS) + find_package(METIS REQUIRED QUIET) + else() + find_package(METIS QUIET) + endif() +endif() + +# Error if pastix required and no partitioning lib found +if (PASTIX_FIND_REQUIRED AND NOT SCOTCH_FOUND AND NOT PTSCOTCH_FOUND AND NOT METIS_FOUND) + message(FATAL_ERROR "Could NOT find any partitioning library on your system" + " (install scotch, ptscotch or metis)") +endif() + + +# Looking for PaStiX +# ------------------ + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_PASTIX_DIR "$ENV{PASTIX_DIR}") +set(ENV_PASTIX_INCDIR "$ENV{PASTIX_INCDIR}") +if(ENV_PASTIX_INCDIR) + list(APPEND _inc_env "${ENV_PASTIX_INCDIR}") +elseif(ENV_PASTIX_DIR) + list(APPEND _inc_env "${ENV_PASTIX_DIR}") + list(APPEND _inc_env "${ENV_PASTIX_DIR}/include") + list(APPEND _inc_env "${ENV_PASTIX_DIR}/include/pastix") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the pastix header in the given paths +# --------------------------------------------------- +# call cmake macro to find the header path +if(PASTIX_INCDIR) + set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") + find_path(PASTIX_pastix.h_DIRS + NAMES pastix.h + HINTS ${PASTIX_INCDIR}) +else() + if(PASTIX_DIR) + set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") + find_path(PASTIX_pastix.h_DIRS + NAMES pastix.h + HINTS ${PASTIX_DIR} + PATH_SUFFIXES "include" "include/pastix") + else() + set(PASTIX_pastix.h_DIRS "PASTIX_pastix.h_DIRS-NOTFOUND") + find_path(PASTIX_pastix.h_DIRS + NAMES pastix.h + HINTS ${_inc_env} + PATH_SUFFIXES "pastix") + endif() +endif() +mark_as_advanced(PASTIX_pastix.h_DIRS) + +# If found, add path to cmake variable +# ------------------------------------ +if (PASTIX_pastix.h_DIRS) + set(PASTIX_INCLUDE_DIRS "${PASTIX_pastix.h_DIRS}") +else () + set(PASTIX_INCLUDE_DIRS "PASTIX_INCLUDE_DIRS-NOTFOUND") + if(NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for pastix -- pastix.h not found") + endif() +endif() + + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_PASTIX_LIBDIR "$ENV{PASTIX_LIBDIR}") +if(ENV_PASTIX_LIBDIR) + list(APPEND _lib_env "${ENV_PASTIX_LIBDIR}") +elseif(ENV_PASTIX_DIR) + list(APPEND _lib_env "${ENV_PASTIX_DIR}") + list(APPEND _lib_env "${ENV_PASTIX_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the pastix lib in the given paths +# ------------------------------------------------ + +# create list of libs to find +set(PASTIX_libs_to_find "pastix_murge;pastix") + +# call cmake macro to find the lib path +if(PASTIX_LIBDIR) + foreach(pastix_lib ${PASTIX_libs_to_find}) + set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") + find_library(PASTIX_${pastix_lib}_LIBRARY + NAMES ${pastix_lib} + HINTS ${PASTIX_LIBDIR}) + endforeach() +else() + if(PASTIX_DIR) + foreach(pastix_lib ${PASTIX_libs_to_find}) + set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") + find_library(PASTIX_${pastix_lib}_LIBRARY + NAMES ${pastix_lib} + HINTS ${PASTIX_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(pastix_lib ${PASTIX_libs_to_find}) + set(PASTIX_${pastix_lib}_LIBRARY "PASTIX_${pastix_lib}_LIBRARY-NOTFOUND") + find_library(PASTIX_${pastix_lib}_LIBRARY + NAMES ${pastix_lib} + HINTS ${_lib_env}) + endforeach() + endif() +endif() + +# If found, add path to cmake variable +# ------------------------------------ +foreach(pastix_lib ${PASTIX_libs_to_find}) + + get_filename_component(${pastix_lib}_lib_path ${PASTIX_${pastix_lib}_LIBRARY} PATH) + # set cmake variables (respects naming convention) + if (PASTIX_LIBRARIES) + list(APPEND PASTIX_LIBRARIES "${PASTIX_${pastix_lib}_LIBRARY}") + else() + set(PASTIX_LIBRARIES "${PASTIX_${pastix_lib}_LIBRARY}") + endif() + if (PASTIX_LIBRARY_DIRS) + list(APPEND PASTIX_LIBRARY_DIRS "${${pastix_lib}_lib_path}") + else() + set(PASTIX_LIBRARY_DIRS "${${pastix_lib}_lib_path}") + endif() + mark_as_advanced(PASTIX_${pastix_lib}_LIBRARY) + +endforeach(pastix_lib ${PASTIX_libs_to_find}) + +# check a function to validate the find +if(PASTIX_LIBRARIES) + + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # PASTIX + if (PASTIX_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${PASTIX_INCLUDE_DIRS}") + endif() + foreach(libdir ${PASTIX_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + set(REQUIRED_LIBS "${PASTIX_LIBRARIES}") + # STARPU + if (PASTIX_LOOK_FOR_STARPU AND STARPU_FOUND) + if (STARPU_INCLUDE_DIRS_DEP) + list(APPEND REQUIRED_INCDIRS "${STARPU_INCLUDE_DIRS_DEP}") + elseif (STARPU_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${STARPU_INCLUDE_DIRS}") + endif() + if(STARPU_LIBRARY_DIRS_DEP) + list(APPEND REQUIRED_LIBDIRS "${STARPU_LIBRARY_DIRS_DEP}") + elseif(STARPU_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${STARPU_LIBRARY_DIRS}") + endif() + if (STARPU_LIBRARIES_DEP) + list(APPEND REQUIRED_LIBS "${STARPU_LIBRARIES_DEP}") + elseif (STARPU_LIBRARIES) + foreach(lib ${STARPU_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + endif() + endif() + # CUDA + if (PASTIX_LOOK_FOR_STARPU_CUDA AND CUDA_FOUND) + if (CUDA_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${CUDA_INCLUDE_DIRS}") + endif() + foreach(libdir ${CUDA_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${CUDA_CUBLAS_LIBRARIES};${CUDA_LIBRARIES}") + endif() + # MPI + if (PASTIX_LOOK_FOR_MPI AND MPI_FOUND) + if (MPI_C_INCLUDE_PATH) + list(APPEND REQUIRED_INCDIRS "${MPI_C_INCLUDE_PATH}") + endif() + if (MPI_C_LINK_FLAGS) + if (${MPI_C_LINK_FLAGS} MATCHES " -") + string(REGEX REPLACE " -" "-" MPI_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) + endif() + list(APPEND REQUIRED_LDFLAGS "${MPI_C_LINK_FLAGS}") + endif() + list(APPEND REQUIRED_LIBS "${MPI_C_LIBRARIES}") + endif() + # HWLOC + if (HWLOC_FOUND) + if (HWLOC_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${HWLOC_INCLUDE_DIRS}") + endif() + foreach(libdir ${HWLOC_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + foreach(lib ${HWLOC_LIBRARIES}) + if (EXISTS ${lib} OR ${lib} MATCHES "^-") + list(APPEND REQUIRED_LIBS "${lib}") + else() + list(APPEND REQUIRED_LIBS "-l${lib}") + endif() + endforeach() + endif() + # BLAS + if (BLAS_FOUND) + if (BLAS_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${BLAS_INCLUDE_DIRS}") + endif() + foreach(libdir ${BLAS_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${BLAS_LIBRARIES}") + if (BLAS_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${BLAS_LINKER_FLAGS}") + endif() + endif() + # SCOTCH + if (PASTIX_LOOK_FOR_SCOTCH AND SCOTCH_FOUND) + if (SCOTCH_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${SCOTCH_INCLUDE_DIRS}") + endif() + foreach(libdir ${SCOTCH_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${SCOTCH_LIBRARIES}") + endif() + # PTSCOTCH + if (PASTIX_LOOK_FOR_PTSCOTCH AND PTSCOTCH_FOUND) + if (PTSCOTCH_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${PTSCOTCH_INCLUDE_DIRS}") + endif() + foreach(libdir ${PTSCOTCH_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${PTSCOTCH_LIBRARIES}") + endif() + # METIS + if (PASTIX_LOOK_FOR_METIS AND METIS_FOUND) + if (METIS_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${METIS_INCLUDE_DIRS}") + endif() + foreach(libdir ${METIS_LIBRARY_DIRS}) + if (libdir) + list(APPEND REQUIRED_LIBDIRS "${libdir}") + endif() + endforeach() + list(APPEND REQUIRED_LIBS "${METIS_LIBRARIES}") + endif() + # Fortran + if (CMAKE_C_COMPILER_ID MATCHES "GNU") + find_library( + FORTRAN_gfortran_LIBRARY + NAMES gfortran + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_gfortran_LIBRARY) + if (FORTRAN_gfortran_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_gfortran_LIBRARY}") + endif() + elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") + find_library( + FORTRAN_ifcore_LIBRARY + NAMES ifcore + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_ifcore_LIBRARY) + if (FORTRAN_ifcore_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_ifcore_LIBRARY}") + endif() + endif() + # EXTRA LIBS such that pthread, m, rt + list(APPEND REQUIRED_LIBS ${PASTIX_EXTRA_LIBRARIES}) + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + list(APPEND CMAKE_REQUIRED_FLAGS "${REQUIRED_FLAGS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(PASTIX_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(pastix PASTIX_WORKS) + mark_as_advanced(PASTIX_WORKS) + + if(PASTIX_WORKS) + # save link with dependencies + set(PASTIX_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(PASTIX_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(PASTIX_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(PASTIX_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES PASTIX_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES PASTIX_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES PASTIX_LINKER_FLAGS) + else() + if(NOT PASTIX_FIND_QUIETLY) + message(STATUS "Looking for PASTIX : test of pastix() fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + message(STATUS "Maybe PASTIX is linked with specific libraries. " + "Have you tried with COMPONENTS (MPI/SEQ, STARPU, STARPU_CUDA, SCOTCH, PTSCOTCH, METIS)? " + "See the explanation in FindPASTIX.cmake.") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif(PASTIX_LIBRARIES) + +if (PASTIX_LIBRARIES) + list(GET PASTIX_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(PASTIX_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of PASTIX library" FORCE) + else() + set(PASTIX_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of PASTIX library" FORCE) + endif() +endif() +mark_as_advanced(PASTIX_DIR) +mark_as_advanced(PASTIX_DIR_FOUND) + +# check that PASTIX has been found +# --------------------------------- include(FindPackageHandleStandardArgs) find_package_handle_standard_args(PASTIX DEFAULT_MSG - PASTIX_INCLUDES PASTIX_LIBRARIES) - -mark_as_advanced(PASTIX_INCLUDES PASTIX_LIBRARIES) + PASTIX_LIBRARIES + PASTIX_WORKS) diff --git a/external/eigen3/cmake/FindScotch.cmake b/external/eigen3/cmake/FindScotch.cmake index 530340b..89d295a 100644 --- a/external/eigen3/cmake/FindScotch.cmake +++ b/external/eigen3/cmake/FindScotch.cmake @@ -1,24 +1,369 @@ -# Pastix requires SCOTCH or METIS (partitioning and reordering tools) +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find SCOTCH include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(SCOTCH +# [REQUIRED] # Fail with error if scotch is not found +# [COMPONENTS ...] # dependencies +# ) +# +# COMPONENTS can be some of the following: +# - ESMUMPS: to activate detection of Scotch with the esmumps interface +# +# This module finds headers and scotch library. +# Results are reported in variables: +# SCOTCH_FOUND - True if headers and requested libraries were found +# SCOTCH_INCLUDE_DIRS - scotch include directories +# SCOTCH_LIBRARY_DIRS - Link directories for scotch libraries +# SCOTCH_LIBRARIES - scotch component libraries to be linked +# SCOTCH_INTSIZE - Number of octets occupied by a SCOTCH_Num +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DSCOTCH=path/to/scotch): +# SCOTCH_DIR - Where to find the base directory of scotch +# SCOTCH_INCDIR - Where to find the header files +# SCOTCH_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: SCOTCH_DIR, SCOTCH_INCDIR, SCOTCH_LIBDIR -if (SCOTCH_INCLUDES AND SCOTCH_LIBRARIES) - set(SCOTCH_FIND_QUIETLY TRUE) -endif (SCOTCH_INCLUDES AND SCOTCH_LIBRARIES) +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) -find_path(SCOTCH_INCLUDES - NAMES - scotch.h - PATHS - $ENV{SCOTCHDIR} - ${INCLUDE_INSTALL_DIR} - PATH_SUFFIXES - scotch -) +if (NOT SCOTCH_FOUND) + set(SCOTCH_DIR "" CACHE PATH "Installation directory of SCOTCH library") + if (NOT SCOTCH_FIND_QUIETLY) + message(STATUS "A cache variable, namely SCOTCH_DIR, has been set to specify the install directory of SCOTCH") + endif() +endif() +# Set the version to find +set(SCOTCH_LOOK_FOR_ESMUMPS OFF) -find_library(SCOTCH_LIBRARIES scotch PATHS $ENV{SCOTCHDIR} ${LIB_INSTALL_DIR}) +if( SCOTCH_FIND_COMPONENTS ) + foreach( component ${SCOTCH_FIND_COMPONENTS} ) + if (${component} STREQUAL "ESMUMPS") + # means we look for esmumps library + set(SCOTCH_LOOK_FOR_ESMUMPS ON) + endif() + endforeach() +endif() +# SCOTCH may depend on Threads, try to find it +if (NOT THREADS_FOUND) + if (SCOTCH_FIND_REQUIRED) + find_package(Threads REQUIRED) + else() + find_package(Threads) + endif() +endif() + +# Looking for include +# ------------------- + +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_SCOTCH_DIR "$ENV{SCOTCH_DIR}") +set(ENV_SCOTCH_INCDIR "$ENV{SCOTCH_INCDIR}") +if(ENV_SCOTCH_INCDIR) + list(APPEND _inc_env "${ENV_SCOTCH_INCDIR}") +elseif(ENV_SCOTCH_DIR) + list(APPEND _inc_env "${ENV_SCOTCH_DIR}") + list(APPEND _inc_env "${ENV_SCOTCH_DIR}/include") + list(APPEND _inc_env "${ENV_SCOTCH_DIR}/include/scotch") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + + +# Try to find the scotch header in the given paths +# ------------------------------------------------- +# call cmake macro to find the header path +if(SCOTCH_INCDIR) + set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") + find_path(SCOTCH_scotch.h_DIRS + NAMES scotch.h + HINTS ${SCOTCH_INCDIR}) +else() + if(SCOTCH_DIR) + set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") + find_path(SCOTCH_scotch.h_DIRS + NAMES scotch.h + HINTS ${SCOTCH_DIR} + PATH_SUFFIXES "include" "include/scotch") + else() + set(SCOTCH_scotch.h_DIRS "SCOTCH_scotch.h_DIRS-NOTFOUND") + find_path(SCOTCH_scotch.h_DIRS + NAMES scotch.h + HINTS ${_inc_env} + PATH_SUFFIXES "scotch") + endif() +endif() +mark_as_advanced(SCOTCH_scotch.h_DIRS) + +# If found, add path to cmake variable +# ------------------------------------ +if (SCOTCH_scotch.h_DIRS) + set(SCOTCH_INCLUDE_DIRS "${SCOTCH_scotch.h_DIRS}") +else () + set(SCOTCH_INCLUDE_DIRS "SCOTCH_INCLUDE_DIRS-NOTFOUND") + if (NOT SCOTCH_FIND_QUIETLY) + message(STATUS "Looking for scotch -- scotch.h not found") + endif() +endif() +list(REMOVE_DUPLICATES SCOTCH_INCLUDE_DIRS) + +# Looking for lib +# --------------- + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_SCOTCH_LIBDIR "$ENV{SCOTCH_LIBDIR}") +if(ENV_SCOTCH_LIBDIR) + list(APPEND _lib_env "${ENV_SCOTCH_LIBDIR}") +elseif(ENV_SCOTCH_DIR) + list(APPEND _lib_env "${ENV_SCOTCH_DIR}") + list(APPEND _lib_env "${ENV_SCOTCH_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + +# Try to find the scotch lib in the given paths +# ---------------------------------------------- + +set(SCOTCH_libs_to_find "scotch;scotcherrexit") +if (SCOTCH_LOOK_FOR_ESMUMPS) + list(INSERT SCOTCH_libs_to_find 0 "esmumps") +endif() + +# call cmake macro to find the lib path +if(SCOTCH_LIBDIR) + foreach(scotch_lib ${SCOTCH_libs_to_find}) + set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") + find_library(SCOTCH_${scotch_lib}_LIBRARY + NAMES ${scotch_lib} + HINTS ${SCOTCH_LIBDIR}) + endforeach() +else() + if(SCOTCH_DIR) + foreach(scotch_lib ${SCOTCH_libs_to_find}) + set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") + find_library(SCOTCH_${scotch_lib}_LIBRARY + NAMES ${scotch_lib} + HINTS ${SCOTCH_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() + else() + foreach(scotch_lib ${SCOTCH_libs_to_find}) + set(SCOTCH_${scotch_lib}_LIBRARY "SCOTCH_${scotch_lib}_LIBRARY-NOTFOUND") + find_library(SCOTCH_${scotch_lib}_LIBRARY + NAMES ${scotch_lib} + HINTS ${_lib_env}) + endforeach() + endif() +endif() + +set(SCOTCH_LIBRARIES "") +set(SCOTCH_LIBRARY_DIRS "") +# If found, add path to cmake variable +# ------------------------------------ +foreach(scotch_lib ${SCOTCH_libs_to_find}) + + if (SCOTCH_${scotch_lib}_LIBRARY) + get_filename_component(${scotch_lib}_lib_path "${SCOTCH_${scotch_lib}_LIBRARY}" PATH) + # set cmake variables + list(APPEND SCOTCH_LIBRARIES "${SCOTCH_${scotch_lib}_LIBRARY}") + list(APPEND SCOTCH_LIBRARY_DIRS "${${scotch_lib}_lib_path}") + else () + list(APPEND SCOTCH_LIBRARIES "${SCOTCH_${scotch_lib}_LIBRARY}") + if (NOT SCOTCH_FIND_QUIETLY) + message(STATUS "Looking for scotch -- lib ${scotch_lib} not found") + endif() + endif () + + mark_as_advanced(SCOTCH_${scotch_lib}_LIBRARY) + +endforeach() +list(REMOVE_DUPLICATES SCOTCH_LIBRARY_DIRS) + +# check a function to validate the find +if(SCOTCH_LIBRARIES) + + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # SCOTCH + if (SCOTCH_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${SCOTCH_INCLUDE_DIRS}") + endif() + if (SCOTCH_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${SCOTCH_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${SCOTCH_LIBRARIES}") + # THREADS + if(CMAKE_THREAD_LIBS_INIT) + list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") + endif() + set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") + find_library(Z_LIBRARY NAMES z) + mark_as_advanced(Z_LIBRARY) + if(Z_LIBRARY) + list(APPEND REQUIRED_LIBS "-lz") + endif() + set(M_LIBRARY "M_LIBRARY-NOTFOUND") + find_library(M_LIBRARY NAMES m) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") + find_library(RT_LIBRARY NAMES rt) + mark_as_advanced(RT_LIBRARY) + if(RT_LIBRARY) + list(APPEND REQUIRED_LIBS "-lrt") + endif() + + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(SCOTCH_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(SCOTCH_graphInit SCOTCH_WORKS) + mark_as_advanced(SCOTCH_WORKS) + + if(SCOTCH_WORKS) + # save link with dependencies + set(SCOTCH_LIBRARIES "${REQUIRED_LIBS}") + else() + if(NOT SCOTCH_FIND_QUIETLY) + message(STATUS "Looking for SCOTCH : test of SCOTCH_graphInit with SCOTCH library fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) +endif(SCOTCH_LIBRARIES) + +if (SCOTCH_LIBRARIES) + list(GET SCOTCH_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "/lib(32|64)?$") + string(REGEX REPLACE "/lib(32|64)?$" "" not_cached_dir "${first_lib_path}") + set(SCOTCH_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of SCOTCH library" FORCE) + else() + set(SCOTCH_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of SCOTCH library" FORCE) + endif() +endif() +mark_as_advanced(SCOTCH_DIR) +mark_as_advanced(SCOTCH_DIR_FOUND) + +# Check the size of SCOTCH_Num +# --------------------------------- +set(CMAKE_REQUIRED_INCLUDES ${SCOTCH_INCLUDE_DIRS}) + +include(CheckCSourceRuns) +#stdio.h and stdint.h should be included by scotch.h directly +set(SCOTCH_C_TEST_SCOTCH_Num_4 " +#include +#include +#include +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 4) + return 0; + else + return 1; +} +") + +set(SCOTCH_C_TEST_SCOTCH_Num_8 " +#include +#include +#include +int main(int argc, char **argv) { + if (sizeof(SCOTCH_Num) == 8) + return 0; + else + return 1; +} +") +check_c_source_runs("${SCOTCH_C_TEST_SCOTCH_Num_4}" SCOTCH_Num_4) +if(NOT SCOTCH_Num_4) + check_c_source_runs("${SCOTCH_C_TEST_SCOTCH_Num_8}" SCOTCH_Num_8) + if(NOT SCOTCH_Num_8) + set(SCOTCH_INTSIZE -1) + else() + set(SCOTCH_INTSIZE 8) + endif() +else() + set(SCOTCH_INTSIZE 4) +endif() +set(CMAKE_REQUIRED_INCLUDES "") + +# check that SCOTCH has been found +# --------------------------------- include(FindPackageHandleStandardArgs) find_package_handle_standard_args(SCOTCH DEFAULT_MSG - SCOTCH_INCLUDES SCOTCH_LIBRARIES) - -mark_as_advanced(SCOTCH_INCLUDES SCOTCH_LIBRARIES) + SCOTCH_LIBRARIES + SCOTCH_WORKS) +# +# TODO: Add possibility to check for specific functions in the library +# diff --git a/external/eigen3/cmake/FindSuperLU.cmake b/external/eigen3/cmake/FindSuperLU.cmake index 8a3df36..f38146e 100644 --- a/external/eigen3/cmake/FindSuperLU.cmake +++ b/external/eigen3/cmake/FindSuperLU.cmake @@ -17,10 +17,81 @@ find_path(SUPERLU_INCLUDES SRC ) -find_library(SUPERLU_LIBRARIES superlu PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) - +find_library(SUPERLU_LIBRARIES + NAMES "superlu_5.2.1" "superlu_5.2" "superlu_5.1.1" "superlu_5.1" "superlu_5.0" "superlu_4.3" "superlu_4.2" "superlu_4.1" "superlu_4.0" "superlu_3.1" "superlu_3.0" "superlu" + PATHS $ENV{SUPERLUDIR} ${LIB_INSTALL_DIR} + PATH_SUFFIXES lib) + +if(SUPERLU_INCLUDES AND SUPERLU_LIBRARIES) + +include(CheckCXXSourceCompiles) +include(CMakePushCheckState) +cmake_push_check_state() + +set(CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES} ${SUPERLU_INCLUDES}) + +# check whether struct mem_usage_t is globally defined +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main() { + mem_usage_t mem; + return 0; +}" +SUPERLU_HAS_GLOBAL_MEM_USAGE_T) + + +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main() { + return SLU_SINGLE; +}" +SUPERLU_HAS_CLEAN_ENUMS) + +check_cxx_source_compiles(" +typedef int int_t; +#include +#include +int main(void) +{ + GlobalLU_t glu; + return 0; +}" +SUPERLU_HAS_GLOBALLU_T) + +if(SUPERLU_HAS_GLOBALLU_T) + # at least 5.0 + set(SUPERLU_VERSION_VAR "5.0") +elseif(SUPERLU_HAS_CLEAN_ENUMS) + # at least 4.3 + set(SUPERLU_VERSION_VAR "4.3") +elseif(SUPERLU_HAS_GLOBAL_MEM_USAGE_T) + # at least 4.0 + set(SUPERLU_VERSION_VAR "4.0") +else() + set(SUPERLU_VERSION_VAR "3.0") +endif() + +cmake_pop_check_state() + +if(SuperLU_FIND_VERSION) + if(${SUPERLU_VERSION_VAR} VERSION_LESS ${SuperLU_FIND_VERSION}) + set(SUPERLU_VERSION_OK FALSE) + else() + set(SUPERLU_VERSION_OK TRUE) + endif() +else() + set(SUPERLU_VERSION_OK TRUE) +endif() + +endif() + include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(SUPERLU DEFAULT_MSG - SUPERLU_INCLUDES SUPERLU_LIBRARIES) +find_package_handle_standard_args(SUPERLU + REQUIRED_VARS SUPERLU_INCLUDES SUPERLU_LIBRARIES SUPERLU_VERSION_OK + VERSION_VAR SUPERLU_VERSION_VAR) mark_as_advanced(SUPERLU_INCLUDES SUPERLU_LIBRARIES) diff --git a/external/eigen3/cmake/UseEigen3.cmake b/external/eigen3/cmake/UseEigen3.cmake new file mode 100644 index 0000000..a38bac8 --- /dev/null +++ b/external/eigen3/cmake/UseEigen3.cmake @@ -0,0 +1,6 @@ +# -*- cmake -*- +# +# UseEigen3.cmake + +add_definitions ( ${EIGEN3_DEFINITIONS} ) +include_directories ( ${EIGEN3_INCLUDE_DIRS} ) diff --git a/external/eigen3/cmake/language_support.cmake b/external/eigen3/cmake/language_support.cmake index 231f7ff..2f14f30 100644 --- a/external/eigen3/cmake/language_support.cmake +++ b/external/eigen3/cmake/language_support.cmake @@ -43,7 +43,7 @@ function(workaround_9220 language language_works) if(return_code EQUAL 0) # Second run execute_process ( - COMMAND ${CMAKE_COMMAND} . + COMMAND ${CMAKE_COMMAND} . -G "${CMAKE_GENERATOR}" WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/language_tests/${language} RESULT_VARIABLE return_code OUTPUT_QUIET @@ -64,3 +64,4 @@ endfunction(workaround_9220) #message("CXX_language_works = ${CXX_language_works}") #workaround_9220(CXXp CXXp_language_works) #message("CXXp_language_works = ${CXXp_language_works}") + diff --git a/external/eigen3/debug/gdb/printers.py b/external/eigen3/debug/gdb/printers.py index 86996a4..0d67a5f 100644 --- a/external/eigen3/debug/gdb/printers.py +++ b/external/eigen3/debug/gdb/printers.py @@ -49,7 +49,7 @@ class EigenMatrixPrinter: regex = re.compile('\<.*\>') m = regex.findall(tag)[0][1:-1] template_params = m.split(',') - template_params = map(lambda x:x.replace(" ", ""), template_params) + template_params = [x.replace(" ", "") for x in template_params] if template_params[1] == '-0x00000000000000001' or template_params[1] == '-0x000000001' or template_params[1] == '-1': self.rows = val['m_storage']['m_rows'] @@ -88,8 +88,11 @@ class EigenMatrixPrinter: def __iter__ (self): return self - + def next(self): + return self.__next__() # Python 2.x compatibility + + def __next__(self): row = self.currentRow col = self.currentCol @@ -151,8 +154,11 @@ class EigenQuaternionPrinter: def __iter__ (self): return self - + def next(self): + return self.__next__() # Python 2.x compatibility + + def __next__(self): element = self.currentElement if self.currentElement >= 4: #there are 4 elements in a quanternion diff --git a/external/eigen3/debug/msvc/eigen.natvis b/external/eigen3/debug/msvc/eigen.natvis index da89857..22cf346 100644 --- a/external/eigen3/debug/msvc/eigen.natvis +++ b/external/eigen3/debug/msvc/eigen.natvis @@ -1,235 +1,235 @@ - - - - - - - - [{$T2}, {$T3}] (fixed matrix) - - - 2 - $i==0 ? $T2 : $T3 - m_storage.m_data.array - - - Backward - 2 - $i==0 ? $T2 : $T3 - m_storage.m_data.array - - - - - - - - [2, 2] (fixed matrix) - - - ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}) - - - ({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]}) - - - ({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]}) - - - ({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]}) - - - - - - - - [3, 3] (fixed matrix) - - - ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}) - - - ({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]}) - - - ({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}) - - - ({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]}) - - - ({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]}) - - - ({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]}) - - - - - - - - [4, 4] (fixed matrix) - - - ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]}) - - - ({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]}) - - - ({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}) - - - ({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]}) - - - ({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]}) - - - ({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]}) - - - ({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]}) - - - ({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]}) - - - - - - - - empty - [{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix) - - - 2 - $i==0 ? m_storage.m_rows : m_storage.m_cols - m_storage.m_data - - - Backward - 2 - $i==0 ? m_storage.m_rows : m_storage.m_cols - m_storage.m_data - - - - - - - - empty - [{$T2}, {m_storage.m_cols}] (dynamic column matrix) - - - 2 - $i==0 ? $T2 : m_storage.m_cols - m_storage.m_data - - - Backward - 2 - $i==0 ? $T2 : m_storage.m_cols - m_storage.m_data - - - - - - - - empty - [{m_storage.m_rows}, {$T2}] (dynamic row matrix) - - - 2 - $i==0 ? m_storage.m_rows : $T2 - m_storage.m_data - - - Backward - 2 - $i==0 ? m_storage.m_rows : $T2 - m_storage.m_data - - - - - - - - empty - [{m_storage.m_cols}] (dynamic column vector) - - m_storage.m_cols - - m_storage.m_cols - m_storage.m_data - - - - - - - - empty - [{m_storage.m_rows}] (dynamic row vector) - - m_storage.m_rows - - m_storage.m_rows - m_storage.m_data - - - - - - - - [1] ({m_storage.m_data.array[0]}) - - m_storage.m_data.array[0] - - - - - - - - [2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}) - - m_storage.m_data.array[0] - m_storage.m_data.array[1] - - - - - - - - [3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}) - - m_storage.m_data.array[0] - m_storage.m_data.array[1] - m_storage.m_data.array[2] - - - - - - - - [4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]}) - - m_storage.m_data.array[0] - m_storage.m_data.array[1] - m_storage.m_data.array[2] - m_storage.m_data.array[3] - - - - + + + + + + + + [{$T2}, {$T3}] (fixed matrix) + + + 2 + $i==0 ? $T2 : $T3 + m_storage.m_data.array + + + Backward + 2 + $i==0 ? $T2 : $T3 + m_storage.m_data.array + + + + + + + + [2, 2] (fixed matrix) + + + ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}) + + + ({m_storage.m_data.array[0]}, {m_storage.m_data.array[2]}) + + + ({m_storage.m_data.array[2]}, {m_storage.m_data.array[3]}) + + + ({m_storage.m_data.array[1]}, {m_storage.m_data.array[3]}) + + + + + + + + [3, 3] (fixed matrix) + + + ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}) + + + ({m_storage.m_data.array[0]}, {m_storage.m_data.array[3]}, {m_storage.m_data.array[6]}) + + + ({m_storage.m_data.array[3]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}) + + + ({m_storage.m_data.array[1]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[7]}) + + + ({m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[8]}) + + + ({m_storage.m_data.array[2]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[8]}) + + + + + + + + [4, 4] (fixed matrix) + + + ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]}) + + + ({m_storage.m_data.array[0]}, {m_storage.m_data.array[4]}, {m_storage.m_data.array[8]}, {m_storage.m_data.array[12]}) + + + ({m_storage.m_data.array[4]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[7]}) + + + ({m_storage.m_data.array[1]}, {m_storage.m_data.array[5]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[13]}) + + + ({m_storage.m_data.array[8]}, {m_storage.m_data.array[9]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[11]}) + + + ({m_storage.m_data.array[2]}, {m_storage.m_data.array[6]}, {m_storage.m_data.array[10]}, {m_storage.m_data.array[14]}) + + + ({m_storage.m_data.array[12]}, {m_storage.m_data.array[13]}, {m_storage.m_data.array[14]}, {m_storage.m_data.array[15]}) + + + ({m_storage.m_data.array[3]}, {m_storage.m_data.array[7]}, {m_storage.m_data.array[11]}, {m_storage.m_data.array[15]}) + + + + + + + + empty + [{m_storage.m_rows}, {m_storage.m_cols}] (dynamic matrix) + + + 2 + $i==0 ? m_storage.m_rows : m_storage.m_cols + m_storage.m_data + + + Backward + 2 + $i==0 ? m_storage.m_rows : m_storage.m_cols + m_storage.m_data + + + + + + + + empty + [{$T2}, {m_storage.m_cols}] (dynamic column matrix) + + + 2 + $i==0 ? $T2 : m_storage.m_cols + m_storage.m_data + + + Backward + 2 + $i==0 ? $T2 : m_storage.m_cols + m_storage.m_data + + + + + + + + empty + [{m_storage.m_rows}, {$T2}] (dynamic row matrix) + + + 2 + $i==0 ? m_storage.m_rows : $T2 + m_storage.m_data + + + Backward + 2 + $i==0 ? m_storage.m_rows : $T2 + m_storage.m_data + + + + + + + + empty + [{m_storage.m_cols}] (dynamic column vector) + + m_storage.m_cols + + m_storage.m_cols + m_storage.m_data + + + + + + + + empty + [{m_storage.m_rows}] (dynamic row vector) + + m_storage.m_rows + + m_storage.m_rows + m_storage.m_data + + + + + + + + [1] ({m_storage.m_data.array[0]}) + + m_storage.m_data.array[0] + + + + + + + + [2] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}) + + m_storage.m_data.array[0] + m_storage.m_data.array[1] + + + + + + + + [3] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}) + + m_storage.m_data.array[0] + m_storage.m_data.array[1] + m_storage.m_data.array[2] + + + + + + + + [4] ({m_storage.m_data.array[0]}, {m_storage.m_data.array[1]}, {m_storage.m_data.array[2]}, {m_storage.m_data.array[3]}) + + m_storage.m_data.array[0] + m_storage.m_data.array[1] + m_storage.m_data.array[2] + m_storage.m_data.array[3] + + + + diff --git a/external/eigen3/debug/msvc/eigen_autoexp_part.dat b/external/eigen3/debug/msvc/eigen_autoexp_part.dat index 07aa437..ba7eefc 100644 --- a/external/eigen3/debug/msvc/eigen_autoexp_part.dat +++ b/external/eigen3/debug/msvc/eigen_autoexp_part.dat @@ -1,295 +1,295 @@ -; *************************************************************** -; * Eigen Visualizer -; * -; * Author: Hauke Heibel -; * -; * Support the enhanced debugging of the following Eigen -; * types (*: any, +:fixed dimension) : -; * -; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*> -; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*> -; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*> -; * - Eigen::Matrix<*,-1,-1,*,*,*> -; * - Eigen::Matrix<*,+,-1,*,*,*> -; * - Eigen::Matrix<*,-1,+,*,*,*> -; * - Eigen::Matrix<*,+,+,*,*,*> -; * -; * Matrices are displayed properly independantly of the memory -; * alignment (RowMajor vs. ColMajor). -; * -; * This file is distributed WITHOUT ANY WARRANTY. Please ensure -; * that your original autoexp.dat file is copied to a safe -; * place before proceeding with its modification. -; *************************************************************** - -[Visualizer] - -; Fixed size 4-vectors -Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0], - y : ($c.m_storage.m_data.array)[1], - z : ($c.m_storage.m_data.array)[2], - w : ($c.m_storage.m_data.array)[3] - ) - ) - - preview - ( - #( - "[", - 4, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 4), - ")" - ) - ) -} - -; Fixed size 3-vectors -Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0], - y : ($c.m_storage.m_data.array)[1], - z : ($c.m_storage.m_data.array)[2] - ) - ) - - preview - ( - #( - "[", - 3, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 3), - ")" - ) - ) -} - -; Fixed size 2-vectors -Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0], - y : ($c.m_storage.m_data.array)[1] - ) - ) - - preview - ( - #( - "[", - 2, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 2), - ")" - ) - ) -} - -; Fixed size 1-vectors -Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - x : ($c.m_storage.m_data.array)[0] - ) - ) - - preview - ( - #( - "[", - 1, - "](", - #array(expr: $e.m_storage.m_data.array[$i], size: 1), - ")" - ) - ) -} - -; Dynamic matrices (ColMajor and RowMajor support) -Eigen::Matrix<*,-1,-1,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.m_storage.m_rows, - cols: $c.m_storage.m_cols, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[$i], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.m_storage.m_rows, - ",", - $c.m_storage.m_cols, - "](", - #array( - expr : [($c.m_storage.m_data)[$i],g], - size : $c.m_storage.m_rows*$c.m_storage.m_cols - ), - ")" - ) - ) -} - -; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support) -Eigen::Matrix<*,*,-1,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.RowsAtCompileTime, - cols: $c.m_storage.m_cols, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[$i], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.RowsAtCompileTime, - ",", - $c.m_storage.m_cols, - "](", - #array( - expr : [($c.m_storage.m_data)[$i],g], - size : $c.RowsAtCompileTime*$c.m_storage.m_cols - ), - ")" - ) - ) -} - -; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support) -Eigen::Matrix<*,-1,*,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.m_storage.m_rows, - cols: $c.ColsAtCompileTime, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data)[$i], - size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.m_storage.m_rows, - ",", - $c.ColsAtCompileTime, - "](", - #array( - expr : [($c.m_storage.m_data)[$i],g], - size : $c.m_storage.m_rows*$c.ColsAtCompileTime - ), - ")" - ) - ) -} - -; Fixed size matrix (ColMajor and RowMajor support) -Eigen::Matrix<*,*,*,*,*,*>{ - children - ( - #( - [internals]: [$c,!], - rows: $c.RowsAtCompileTime, - cols: $c.ColsAtCompileTime, - ; Check for RowMajorBit - #if ($c.Flags & 0x1) ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime - ) - ) #else ( - #array( - rank: 2, - base: 0, - expr: ($c.m_storage.m_data.array)[$i], - size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime - ) - ) - ) - ) - - preview - ( - #( - "[", - $c.RowsAtCompileTime, - ",", - $c.ColsAtCompileTime, - "](", - #array( - expr : [($c.m_storage.m_data.array)[$i],g], - size : $c.RowsAtCompileTime*$c.ColsAtCompileTime - ), - ")" - ) - ) -} +; *************************************************************** +; * Eigen Visualizer +; * +; * Author: Hauke Heibel +; * +; * Support the enhanced debugging of the following Eigen +; * types (*: any, +:fixed dimension) : +; * +; * - Eigen::Matrix<*,4,1,*,*,*> and Eigen::Matrix<*,1,4,*,*,*> +; * - Eigen::Matrix<*,3,1,*,*,*> and Eigen::Matrix<*,1,3,*,*,*> +; * - Eigen::Matrix<*,2,1,*,*,*> and Eigen::Matrix<*,1,2,*,*,*> +; * - Eigen::Matrix<*,-1,-1,*,*,*> +; * - Eigen::Matrix<*,+,-1,*,*,*> +; * - Eigen::Matrix<*,-1,+,*,*,*> +; * - Eigen::Matrix<*,+,+,*,*,*> +; * +; * Matrices are displayed properly independantly of the memory +; * alignment (RowMajor vs. ColMajor). +; * +; * This file is distributed WITHOUT ANY WARRANTY. Please ensure +; * that your original autoexp.dat file is copied to a safe +; * place before proceeding with its modification. +; *************************************************************** + +[Visualizer] + +; Fixed size 4-vectors +Eigen::Matrix<*,4,1,*,*,*>|Eigen::Matrix<*,1,4,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0], + y : ($c.m_storage.m_data.array)[1], + z : ($c.m_storage.m_data.array)[2], + w : ($c.m_storage.m_data.array)[3] + ) + ) + + preview + ( + #( + "[", + 4, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 4), + ")" + ) + ) +} + +; Fixed size 3-vectors +Eigen::Matrix<*,3,1,*,*,*>|Eigen::Matrix<*,1,3,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0], + y : ($c.m_storage.m_data.array)[1], + z : ($c.m_storage.m_data.array)[2] + ) + ) + + preview + ( + #( + "[", + 3, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 3), + ")" + ) + ) +} + +; Fixed size 2-vectors +Eigen::Matrix<*,2,1,*,*,*>|Eigen::Matrix<*,1,2,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0], + y : ($c.m_storage.m_data.array)[1] + ) + ) + + preview + ( + #( + "[", + 2, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 2), + ")" + ) + ) +} + +; Fixed size 1-vectors +Eigen::Matrix<*,1,1,*,*,*>|Eigen::Matrix<*,1,1,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + x : ($c.m_storage.m_data.array)[0] + ) + ) + + preview + ( + #( + "[", + 1, + "](", + #array(expr: $e.m_storage.m_data.array[$i], size: 1), + ")" + ) + ) +} + +; Dynamic matrices (ColMajor and RowMajor support) +Eigen::Matrix<*,-1,-1,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.m_storage.m_rows, + cols: $c.m_storage.m_cols, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.m_storage.m_cols + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[$i], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.m_storage.m_cols + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.m_storage.m_rows, + ",", + $c.m_storage.m_cols, + "](", + #array( + expr : [($c.m_storage.m_data)[$i],g], + size : $c.m_storage.m_rows*$c.m_storage.m_cols + ), + ")" + ) + ) +} + +; Fixed rows, dynamic columns matrix (ColMajor and RowMajor support) +Eigen::Matrix<*,*,-1,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.RowsAtCompileTime, + cols: $c.m_storage.m_cols, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[($i % $c.RowsAtCompileTime)*$c.m_storage.m_cols + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[$i], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.m_storage.m_cols + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.RowsAtCompileTime, + ",", + $c.m_storage.m_cols, + "](", + #array( + expr : [($c.m_storage.m_data)[$i],g], + size : $c.RowsAtCompileTime*$c.m_storage.m_cols + ), + ")" + ) + ) +} + +; Dynamic rows, fixed columns matrix (ColMajor and RowMajor support) +Eigen::Matrix<*,-1,*,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.m_storage.m_rows, + cols: $c.ColsAtCompileTime, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[($i % $c.m_storage.m_rows)*$c.ColsAtCompileTime + (($i- $i % $c.m_storage.m_rows)/$c.m_storage.m_rows)], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data)[$i], + size: ($r==1)*$c.m_storage.m_rows+($r==0)*$c.ColsAtCompileTime + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.m_storage.m_rows, + ",", + $c.ColsAtCompileTime, + "](", + #array( + expr : [($c.m_storage.m_data)[$i],g], + size : $c.m_storage.m_rows*$c.ColsAtCompileTime + ), + ")" + ) + ) +} + +; Fixed size matrix (ColMajor and RowMajor support) +Eigen::Matrix<*,*,*,*,*,*>{ + children + ( + #( + [internals]: [$c,!], + rows: $c.RowsAtCompileTime, + cols: $c.ColsAtCompileTime, + ; Check for RowMajorBit + #if ($c.Flags & 0x1) ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data.array)[($i % $c.RowsAtCompileTime)*$c.ColsAtCompileTime + (($i- $i % $c.RowsAtCompileTime)/$c.RowsAtCompileTime)], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime + ) + ) #else ( + #array( + rank: 2, + base: 0, + expr: ($c.m_storage.m_data.array)[$i], + size: ($r==1)*$c.RowsAtCompileTime+($r==0)*$c.ColsAtCompileTime + ) + ) + ) + ) + + preview + ( + #( + "[", + $c.RowsAtCompileTime, + ",", + $c.ColsAtCompileTime, + "](", + #array( + expr : [($c.m_storage.m_data.array)[$i],g], + size : $c.RowsAtCompileTime*$c.ColsAtCompileTime + ), + ")" + ) + ) +} diff --git a/external/eigen3/demos/opengl/quaternion_demo.cpp b/external/eigen3/demos/opengl/quaternion_demo.cpp index 0416561..dd323a4 100644 --- a/external/eigen3/demos/opengl/quaternion_demo.cpp +++ b/external/eigen3/demos/opengl/quaternion_demo.cpp @@ -234,7 +234,7 @@ void RenderingWidget::drawScene() gpu.drawVector(Vector3f::Zero(), length*Vector3f::UnitZ(), Color(0,0,1,1)); // draw the fractal object - float sqrt3 = internal::sqrt(3.); + float sqrt3 = std::sqrt(3.); glLightfv(GL_LIGHT0, GL_AMBIENT, Vector4f(0.5,0.5,0.5,1).data()); glLightfv(GL_LIGHT0, GL_DIFFUSE, Vector4f(0.5,1,0.5,1).data()); glLightfv(GL_LIGHT0, GL_SPECULAR, Vector4f(1,1,1,1).data()); diff --git a/external/eigen3/demos/opengl/trackball.cpp b/external/eigen3/demos/opengl/trackball.cpp index 77ac790..7c2da8e 100644 --- a/external/eigen3/demos/opengl/trackball.cpp +++ b/external/eigen3/demos/opengl/trackball.cpp @@ -23,7 +23,7 @@ void Trackball::track(const Vector2i& point2D) { Vector3f axis = mLastPoint3D.cross(newPoint3D).normalized(); float cos_angle = mLastPoint3D.dot(newPoint3D); - if ( internal::abs(cos_angle) < 1.0 ) + if ( std::abs(cos_angle) < 1.0 ) { float angle = 2. * acos(cos_angle); if (mMode==Around) diff --git a/external/eigen3/doc/A05_PortingFrom2To3.dox b/external/eigen3/doc/A05_PortingFrom2To3.dox index 4d5f3ae..51555f9 100644 --- a/external/eigen3/doc/A05_PortingFrom2To3.dox +++ b/external/eigen3/doc/A05_PortingFrom2To3.dox @@ -2,8 +2,6 @@ namespace Eigen { /** \page Eigen2ToEigen3 Porting from Eigen2 to Eigen3 -
Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3.
- This page lists the most important API changes between Eigen2 and Eigen3, and gives tips to help porting your application from Eigen2 to Eigen3. @@ -11,11 +9,8 @@ and gives tips to help porting your application from Eigen2 to Eigen3. \section CompatibilitySupport Eigen2 compatibility support -In order to ease the switch from Eigen2 to Eigen3, Eigen3 features \subpage Eigen2SupportModes "Eigen2 support modes". - -The quick way to enable this is to define the \c EIGEN2_SUPPORT preprocessor token \b before including any Eigen header (typically it should be set in your project options). - -A more powerful, \em staged migration path is also provided, which may be useful to migrate larger projects from Eigen2 to Eigen3. This is explained in the \ref Eigen2SupportModes "Eigen 2 support modes" page. +Up to version 3.2 %Eigen provides Eigen2 support modes. These are removed now, because they were barely used anymore and became hard to maintain after internal re-designs. +You can still use them by first porting your code to Eigen 3.2. \section Using The USING_PART_OF_NAMESPACE_EIGEN macro @@ -228,7 +223,7 @@ triangular part to work on \section GeometryModule Changes in the Geometry module -The Geometry module is the one that changed the most. If you rely heavily on it, it's probably a good idea to use the \ref Eigen2SupportModes "Eigen 2 support modes" to perform your migration. +The Geometry module is the one that changed the most. If you rely heavily on it, it's probably a good idea to use the "Eigen 2 support modes" to perform your migration. \section Transform The Transform class @@ -266,7 +261,7 @@ use it unless you are sure of what you are doing, i.e., you have rigourosly meas The EIGEN_ALIGN_128 macro has been renamed to EIGEN_ALIGN16. Don't be surprised, it's just that we switched to counting in bytes ;-) -The EIGEN_DONT_ALIGN option still exists in Eigen 3, but it has a new cousin: EIGEN_DONT_ALIGN_STATICALLY. It allows to get rid of all static alignment issues while keeping alignment of dynamic-size heap-allocated arrays, thus keeping vectorization for dynamic-size objects. +The \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN \endlink option still exists in Eigen 3, but it has a new cousin: \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN_STATICALLY.\endlink It allows to get rid of all static alignment issues while keeping alignment of dynamic-size heap-allocated arrays. Vectorization of statically allocated arrays is still preserved (unless you define \link TopicPreprocessorDirectivesPerformance EIGEN_UNALIGNED_VECTORIZE \endlink =0), at the cost of unaligned memory stores. \section AlignedMap Aligned Map objects @@ -283,7 +278,7 @@ result = Vector4f::MapAligned(some_aligned_array); \section StdContainers STL Containers -In Eigen2, #include tweaked std::vector to automatically align elements. The problem was that that was quite invasive. In Eigen3, we only override standard behavior if you use Eigen::aligned_allocator as your allocator type. So for example, if you use std::vector, you need to do the following change (note that aligned_allocator is under namespace Eigen): +In Eigen2, \#include\ tweaked std::vector to automatically align elements. The problem was that that was quite invasive. In Eigen3, we only override standard behavior if you use Eigen::aligned_allocator as your allocator type. So for example, if you use std::vector, you need to do the following change (note that aligned_allocator is under namespace Eigen): diff --git a/external/eigen3/doc/A10_Eigen2SupportModes.dox b/external/eigen3/doc/A10_Eigen2SupportModes.dox deleted file mode 100644 index f3df915..0000000 --- a/external/eigen3/doc/A10_Eigen2SupportModes.dox +++ /dev/null @@ -1,95 +0,0 @@ -namespace Eigen { - -/** \page Eigen2SupportModes Eigen 2 support modes - -
Eigen2 support is deprecated in Eigen 3.2.x and it will be removed in Eigen 3.3.
- -This page documents the Eigen2 support modes, a powerful tool to help migrating your project from Eigen 2 to Eigen 3. -Don't miss our page on \ref Eigen2ToEigen3 "API changes" between Eigen 2 and Eigen 3. - -\eigenAutoToc - -\section EIGEN2_SUPPORT_Macro The quick way: define EIGEN2_SUPPORT - -By defining EIGEN2_SUPPORT before including any Eigen 3 header, you get back a large part of the Eigen 2 API, while keeping the Eigen 3 API and ABI unchanged. - -This defaults to the \ref Stage30 "stage 30" described below. - -The rest of this page describes an optional, more powerful \em staged migration path. - -\section StagedMigrationPathOverview Overview of the staged migration path - -The primary reason why EIGEN2_SUPPORT alone may not be enough to migrate a large project from Eigen 2 to Eigen 3 is that some of the Eigen 2 API is inherently incompatible with the Eigen 3 API. This happens when the same identifier is used in Eigen 2 and in Eigen 3 with different meanings. To help migrate projects that rely on such API, we provide a staged migration path allowing to perform the migration \em incrementally. - -It goes as follows: -\li Step 0: start with a project using Eigen 2. -\li Step 1: build your project against Eigen 3 with \ref Stage10 "Eigen 2 support stage 10". This mode enables maximum compatibility with the Eigen 2 API, with just a few exceptions. -\li Step 2: build your project against Eigen 3 with \ref Stage20 "Eigen 2 support stage 20". This mode forces you to add eigen2_ prefixes to the Eigen2 identifiers that conflict with Eigen 3 API. -\li Step 3: build your project against Eigen 3 with \ref Stage30 "Eigen 2 support stage 30". This mode enables the full Eigen 3 API. -\li Step 4: build your project against Eigen 3 with \ref Stage40 "Eigen 2 support stage 40". This mode enables the full Eigen 3 strictness on matters, such as const-correctness, where Eigen 2 was looser. -\li Step 5: build your project against Eigen 3 without any Eigen 2 support mode. - -\section Stage10 Stage 10: define EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API - -Enable this mode by defining the EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API preprocessor macro before including any Eigen 3 header. - -This mode maximizes support for the Eigen 2 API. As a result, it does not offer the full Eigen 3 API. Also, it doesn't offer quite 100% of the Eigen 2 API. - -The part of the Eigen 3 API that is not present in this mode, is Eigen 3's Geometry module. Indeed, this mode completely replaces it by a copy of Eigen 2's Geometry module. - -The parts of the API that are still not 100% Eigen 2 compatible in this mode are: -\li Dot products over complex numbers. Eigen 2's dot product was linear in the first variable. Eigen 3's dot product is linear in the second variable. In other words, the Eigen 2 code \code x.dot(y) \endcode is equivalent to the Eigen 3 code \code y.dot(x) \endcode In yet other words, dot products are complex-conjugated in Eigen 3 compared to Eigen 2. The switch to the new convention was commanded by common usage, especially with the notation \f$ x^Ty \f$ for dot products of column-vectors. -\li The Sparse module. -\li Certain fine details of linear algebraic decompositions. For example, LDLT decomposition is now pivoting in Eigen 3 whereas it wasn't in Eigen 2, so code that was relying on its underlying matrix structure will break. -\li Usage of Eigen types in STL containers, \ref Eigen2ToEigen3 "as explained on this page". - -\section Stage20 Stage 20: define EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS - -Enable this mode by defining the EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API preprocessor macro before including any Eigen 3 header. - -This mode removes the Eigen 2 API that is directly conflicting with Eigen 3 API. Instead, these bits of Eigen 2 API remain available with eigen2_ prefixes. The main examples of such API are: -\li the whole Geometry module. For example, replace \c Quaternion by \c eigen2_Quaternion, replace \c Transform3f by \c eigen2_Transform3f, etc. -\li the lu() method to obtain a LU decomposition. Replace by eigen2_lu(). - -There is also one more eigen2_-prefixed identifier that you should know about, even though its use is not checked at compile time by this mode: the dot() method. As was discussed above, over complex numbers, its meaning is different between Eigen 2 and Eigen 3. You can use eigen2_dot() to get the Eigen 2 behavior. - -\section Stage30 Stage 30: define EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API - -Enable this mode by defining the EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API preprocessor macro before including any Eigen 3 header. Also, this mode is what you get by default when you just define EIGEN2_SUPPORT. - -This mode gives you the full unaltered Eigen 3 API, while still keeping as much support as possible for the Eigen 2 API. - -The eigen2_-prefixed identifiers are still available, but at this stage you should now replace them by Eigen 3 identifiers. Have a look at our page on \ref Eigen2ToEigen3 "API changes" between Eigen 2 and Eigen 3. - -\section Stage40 Stage 40: define EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS - -Enable this mode by defining the EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS preprocessor macro before including any Eigen 3 header. - -This mode tightens the last bits of strictness, especially const-correctness, that had to be loosened to support what Eigen 2 allowed. For example, this code compiled in Eigen 2: -\code -const float array[4]; -x = Map(array); -\endcode -That allowed to circumvent constness. This is no longer allowed in Eigen 3. If you have to map const data in Eigen 3, map it as a const-qualified type. However, rather than explictly constructing Map objects, we strongly encourage you to use the static Map methods instead, as they take care of all of this for you: -\code -const float array[4]; -x = Vector4f::Map(array); -\endcode -This lets Eigen do the right thing for you and works equally well in Eigen 2 and in Eigen 3. - -\section FinallyDropAllEigen2Support Finally drop all Eigen 2 support - -Stage 40 is the first where it's "comfortable" to stay for a little longer period, since it preserves 100% Eigen 3 compatibility. However, we still encourage you to complete your migration as quickly as possible. While we do run the Eigen 2 test suite against Eigen 3's stage 10 support mode, we can't guarantee the same level of support and quality assurance for Eigen 2 support as we do for Eigen 3 itself, especially not in the long term. \ref Eigen2ToEigen3 "This page" describes a large part of the changes that you may need to perform. - -\section ABICompatibility What about ABI compatibility? - -It goes as follows: -\li Stage 10 already is ABI compatible with Eigen 3 for the basic (Matrix, Array, SparseMatrix...) types. However, since this stage uses a copy of Eigen 2's Geometry module instead of Eigen 3's own Geometry module, the ABI in the Geometry module is not Eigen 3 compatible. -\li Stage 20 removes the Eigen 3-incompatible Eigen 2 Geometry module (it remains available with eigen2_ prefix). So at this stage, all the identifiers that exist in Eigen 3 have the Eigen 3 ABI (and API). -\li Stage 30 introduces the remaining Eigen 3 identifiers. So at this stage, you have the full Eigen 3 ABI. -\li Stage 40 is no different than Stage 30 in these matters. - - -*/ - -} diff --git a/external/eigen3/doc/AsciiQuickReference.txt b/external/eigen3/doc/AsciiQuickReference.txt index b9f497f..0ca54ce 100644 --- a/external/eigen3/doc/AsciiQuickReference.txt +++ b/external/eigen3/doc/AsciiQuickReference.txt @@ -32,17 +32,19 @@ A << 1, 2, 3, // Initialize A. The elements can also be B << A, A, A; // B is three horizontally stacked A's. A.fill(10); // Fill A with all 10's. -// Eigen // Matlab -MatrixXd::Identity(rows,cols) // eye(rows,cols) -C.setIdentity(rows,cols) // C = eye(rows,cols) -MatrixXd::Zero(rows,cols) // zeros(rows,cols) -C.setZero(rows,cols) // C = ones(rows,cols) -MatrixXd::Ones(rows,cols) // ones(rows,cols) -C.setOnes(rows,cols) // C = ones(rows,cols) -MatrixXd::Random(rows,cols) // rand(rows,cols)*2-1 // MatrixXd::Random returns uniform random numbers in (-1, 1). -C.setRandom(rows,cols) // C = rand(rows,cols)*2-1 -VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)' -v.setLinSpaced(size,low,high) // v = linspace(low,high,size)' +// Eigen // Matlab +MatrixXd::Identity(rows,cols) // eye(rows,cols) +C.setIdentity(rows,cols) // C = eye(rows,cols) +MatrixXd::Zero(rows,cols) // zeros(rows,cols) +C.setZero(rows,cols) // C = zeros(rows,cols) +MatrixXd::Ones(rows,cols) // ones(rows,cols) +C.setOnes(rows,cols) // C = ones(rows,cols) +MatrixXd::Random(rows,cols) // rand(rows,cols)*2-1 // MatrixXd::Random returns uniform random numbers in (-1, 1). +C.setRandom(rows,cols) // C = rand(rows,cols)*2-1 +VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)' +v.setLinSpaced(size,low,high) // v = linspace(low,high,size)' +VectorXi::LinSpaced(((hi-low)/step)+1, // low:step:hi + low,low+step*(size-1)) // // Matrix slicing and blocks. All expressions listed here are read/write. @@ -82,17 +84,20 @@ P.bottomRightCorner() // P(end-rows+1:end, end-cols+1:end) // Of particular note is Eigen's swap function which is highly optimized. // Eigen // Matlab -R.row(i) = P.col(j); // R(i, :) = P(:, i) +R.row(i) = P.col(j); // R(i, :) = P(:, j) R.col(j1).swap(mat1.col(j2)); // R(:, [j1 j2]) = R(:, [j2, j1]) -// Views, transpose, etc; all read-write except for .adjoint(). +// Views, transpose, etc; // Eigen // Matlab R.adjoint() // R' -R.transpose() // R.' or conj(R') -R.diagonal() // diag(R) +R.transpose() // R.' or conj(R') // Read-write +R.diagonal() // diag(R) // Read-write x.asDiagonal() // diag(x) -R.transpose().colwise().reverse(); // rot90(R) -R.conjugate() // conj(R) +R.transpose().colwise().reverse() // rot90(R) // Read-write +R.rowwise().reverse() // fliplr(R) +R.colwise().reverse() // flipud(R) +R.replicate(i,j) // repmat(P,i,j) + // All the same as Matlab, but matlab doesn't have *= style operators. // Matrix-vector. Matrix-matrix. Matrix-scalar. @@ -104,37 +109,40 @@ a *= M; R = P + Q; R = P/s; R -= Q; R /= s; // Vectorized operations on each element independently -// Eigen // Matlab -R = P.cwiseProduct(Q); // R = P .* Q -R = P.array() * s.array();// R = P .* s -R = P.cwiseQuotient(Q); // R = P ./ Q -R = P.array() / Q.array();// R = P ./ Q -R = P.array() + s.array();// R = P + s -R = P.array() - s.array();// R = P - s -R.array() += s; // R = R + s -R.array() -= s; // R = R - s -R.array() < Q.array(); // R < Q -R.array() <= Q.array(); // R <= Q -R.cwiseInverse(); // 1 ./ P -R.array().inverse(); // 1 ./ P -R.array().sin() // sin(P) -R.array().cos() // cos(P) -R.array().pow(s) // P .^ s -R.array().square() // P .^ 2 -R.array().cube() // P .^ 3 -R.cwiseSqrt() // sqrt(P) -R.array().sqrt() // sqrt(P) -R.array().exp() // exp(P) -R.array().log() // log(P) -R.cwiseMax(P) // max(R, P) -R.array().max(P.array()) // max(R, P) -R.cwiseMin(P) // min(R, P) -R.array().min(P.array()) // min(R, P) -R.cwiseAbs() // abs(P) -R.array().abs() // abs(P) -R.cwiseAbs2() // abs(P.^2) -R.array().abs2() // abs(P.^2) -(R.array() < s).select(P,Q); // (R < s ? P : Q) +// Eigen // Matlab +R = P.cwiseProduct(Q); // R = P .* Q +R = P.array() * s.array(); // R = P .* s +R = P.cwiseQuotient(Q); // R = P ./ Q +R = P.array() / Q.array(); // R = P ./ Q +R = P.array() + s.array(); // R = P + s +R = P.array() - s.array(); // R = P - s +R.array() += s; // R = R + s +R.array() -= s; // R = R - s +R.array() < Q.array(); // R < Q +R.array() <= Q.array(); // R <= Q +R.cwiseInverse(); // 1 ./ P +R.array().inverse(); // 1 ./ P +R.array().sin() // sin(P) +R.array().cos() // cos(P) +R.array().pow(s) // P .^ s +R.array().square() // P .^ 2 +R.array().cube() // P .^ 3 +R.cwiseSqrt() // sqrt(P) +R.array().sqrt() // sqrt(P) +R.array().exp() // exp(P) +R.array().log() // log(P) +R.cwiseMax(P) // max(R, P) +R.array().max(P.array()) // max(R, P) +R.cwiseMin(P) // min(R, P) +R.array().min(P.array()) // min(R, P) +R.cwiseAbs() // abs(P) +R.array().abs() // abs(P) +R.cwiseAbs2() // abs(P.^2) +R.array().abs2() // abs(P.^2) +(R.array() < s).select(P,Q ); // (R < s ? P : Q) +R = (Q.array()==0).select(P,R) // R(Q==0) = P(Q==0) +R = P.unaryExpr(ptr_fun(func)) // R = arrayfun(func, P) // with: scalar func(const scalar &x); + // Reductions. int r, c; @@ -165,12 +173,12 @@ x.dot(y) // dot(x, y) x.cross(y) // cross(x, y) Requires #include //// Type conversion -// Eigen // Matlab -A.cast(); // double(A) -A.cast(); // single(A) -A.cast(); // int32(A) -A.real(); // real(A) -A.imag(); // imag(A) +// Eigen // Matlab +A.cast(); // double(A) +A.cast(); // single(A) +A.cast(); // int32(A) +A.real(); // real(A) +A.imag(); // imag(A) // if the original type equals destination type, no work is done // Note that for most operations Eigen requires all operands to have the same type: diff --git a/external/eigen3/doc/B01_Experimental.dox b/external/eigen3/doc/B01_Experimental.dox index 5fc0ccd..e1f031d 100644 --- a/external/eigen3/doc/B01_Experimental.dox +++ b/external/eigen3/doc/B01_Experimental.dox @@ -4,7 +4,7 @@ namespace Eigen { \eigenAutoToc -\section summary Summary +\section Experimental_summary Summary With the 2.0 release, Eigen's API is, to a large extent, stable. However, we wish to retain the freedom to make API incompatible changes. To that effect, we call many parts of Eigen "experimental" which means that they are not subject to API stability guarantee. @@ -17,7 +17,7 @@ Experimental features may at any time: \li be subject to an API incompatible change; \li introduce API or ABI incompatible changes in your own code if you let them affect your API or ABI. -\section modules Experimental modules +\section Experimental_modules Experimental modules The following modules are considered entirely experimental, and we make no firm API stability guarantee about them for the time being: \li SVD @@ -26,7 +26,7 @@ The following modules are considered entirely experimental, and we make no firm \li Sparse \li Geometry (this one should be mostly stable, but it's a little too early to make a formal guarantee) -\section core Experimental parts of the Core module +\section Experimental_core Experimental parts of the Core module In the Core module, the only classes subject to ABI stability guarantee (meaning that you can use it for data members in your public ABI) is: \li Matrix diff --git a/external/eigen3/doc/CMakeLists.txt b/external/eigen3/doc/CMakeLists.txt index 02790ee..db413bc 100644 --- a/external/eigen3/doc/CMakeLists.txt +++ b/external/eigen3/doc/CMakeLists.txt @@ -10,12 +10,20 @@ if(CMAKE_COMPILER_IS_GNUCXX) endif(CMAKE_SYSTEM_NAME MATCHES Linux) endif(CMAKE_COMPILER_IS_GNUCXX) +option(EIGEN_INTERNAL_DOCUMENTATION "Build internal documentation" OFF) + + # Set some Doxygen flags set(EIGEN_DOXY_PROJECT_NAME "Eigen") set(EIGEN_DOXY_OUTPUT_DIRECTORY_SUFFIX "") set(EIGEN_DOXY_INPUT "\"${Eigen_SOURCE_DIR}/Eigen\" \"${Eigen_SOURCE_DIR}/doc\"") set(EIGEN_DOXY_HTML_COLORSTYLE_HUE "220") set(EIGEN_DOXY_TAGFILES "") +if(EIGEN_INTERNAL_DOCUMENTATION) + set(EIGEN_DOXY_INTERNAL "YES") +else(EIGEN_INTERNAL_DOCUMENTATION) + set(EIGEN_DOXY_INTERNAL "NO") +endif(EIGEN_INTERNAL_DOCUMENTATION) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in @@ -70,6 +78,8 @@ add_custom_target( COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/html/ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/eigen_navtree_hacks.js ${CMAKE_CURRENT_BINARY_DIR}/html/ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/Eigen_Silly_Professor_64x64.png ${CMAKE_CURRENT_BINARY_DIR}/html/ + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ftv2pnode.png ${CMAKE_CURRENT_BINARY_DIR}/html/ + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ftv2node.png ${CMAKE_CURRENT_BINARY_DIR}/html/ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/AsciiQuickReference.txt ${CMAKE_CURRENT_BINARY_DIR}/html/ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) @@ -80,6 +90,8 @@ add_custom_target( COMMAND ${CMAKE_COMMAND} -E make_directory ${Eigen_BINARY_DIR}/doc/html/unsupported COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/eigen_navtree_hacks.js ${CMAKE_CURRENT_BINARY_DIR}/html/unsupported/ COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/Eigen_Silly_Professor_64x64.png ${CMAKE_CURRENT_BINARY_DIR}/html/unsupported/ + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ftv2pnode.png ${CMAKE_CURRENT_BINARY_DIR}/html/unsupported/ + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/ftv2node.png ${CMAKE_CURRENT_BINARY_DIR}/html/unsupported/ WORKING_DIRECTORY ${Eigen_BINARY_DIR}/doc ) diff --git a/external/eigen3/doc/CoeffwiseMathFunctionsTable.dox b/external/eigen3/doc/CoeffwiseMathFunctionsTable.dox new file mode 100644 index 0000000..3ae9420 --- /dev/null +++ b/external/eigen3/doc/CoeffwiseMathFunctionsTable.dox @@ -0,0 +1,525 @@ +namespace Eigen { + +/** \eigenManualPage CoeffwiseMathFunctions Catalog of coefficient-wise math functions + + + + +This table presents a catalog of the coefficient-wise math functions supported by %Eigen. +In this table, \c a, \c b, refer to Array objects or expressions, and \c m refers to a linear algebra Matrix/Vector object. Standard scalar types are abbreviated as follows: + - \c int: \c i32 + - \c float: \c f + - \c double: \c d + - \c std::complex: \c cf + - \c std::complex: \c cd + +For each row, the first column list the equivalent calls for arrays, and matrices when supported. Of course, all functions are available for matrices by first casting it as an array: \c m.array(). + +The third column gives some hints in the underlying scalar implementation. In most cases, %Eigen does not implement itself the math function but relies on the STL for standard scalar types, or user-provided functions for custom scalar types. +For instance, some simply calls the respective function of the STL while preserving argument-dependent lookup for custom types. +The following: +\code +using std::foo; +foo(a[i]); +\endcode +means that the STL's function \c std::foo will be potentially called if it is compatible with the underlying scalar type. If not, then the user must ensure that an overload of the function foo is available for the given scalar type (usually defined in the same namespace as the given scalar type). +This also means that, unless specified, if the function \c std::foo is available only in some recent c++ versions (e.g., c++11), then the respective %Eigen's function/method will be usable on standard types only if the compiler support the required c++ version. + +
Eigen 2Eigen 3
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
APIDescriptionDefault scalar implementationSIMD
Basic operations
+ \anchor cwisetable_abs + a.\link ArrayBase::abs abs\endlink(); \n + \link Eigen::abs abs\endlink(a); \n + m.\link MatrixBase::cwiseAbs cwiseAbs\endlink(); + absolute value (\f$ |a_i| \f$) + using std::abs; \n + abs(a[i]); + SSE2, AVX (i32,f,d)
+ \anchor cwisetable_inverse + a.\link ArrayBase::inverse inverse\endlink(); \n + \link Eigen::inverse inverse\endlink(a); \n + m.\link MatrixBase::cwiseInverse cwiseInverse\endlink(); + inverse value (\f$ 1/a_i \f$) + 1/a[i]; + All engines (f,d,fc,fd)
+ \anchor cwisetable_conj + a.\link ArrayBase::conjugate conjugate\endlink(); \n + \link Eigen::conj conj\endlink(a); \n + m.\link MatrixBase::conjugate conjugate(); + complex conjugate (\f$ \bar{a_i} \f$),\n + no-op for real + using std::conj; \n + conj(a[i]); + All engines (fc,fd)
Exponential functions
+ \anchor cwisetable_exp + a.\link ArrayBase::exp exp\endlink(); \n + \link Eigen::exp exp\endlink(a); + \f$ e \f$ raised to the given power (\f$ e^{a_i} \f$) + using std::exp; \n + exp(a[i]); + SSE2, AVX (f,d)
+ \anchor cwisetable_log + a.\link ArrayBase::log log\endlink(); \n + \link Eigen::log log\endlink(a); + natural (base \f$ e \f$) logarithm (\f$ \ln({a_i}) \f$) + using std::log; \n + log(a[i]); + SSE2, AVX (f)
+ \anchor cwisetable_log1p + a.\link ArrayBase::log1p log1p\endlink(); \n + \link Eigen::log1p log1p\endlink(a); + natural (base \f$ e \f$) logarithm of 1 plus \n the given number (\f$ \ln({1+a_i}) \f$)built-in generic implementation based on \c log,\n + plus \c using \c std::log1p ; \cpp11
+ \anchor cwisetable_log10 + a.\link ArrayBase::log10 log10\endlink(); \n + \link Eigen::log10 log10\endlink(a); + base 10 logarithm (\f$ \log_{10}({a_i}) \f$) + using std::log10; \n + log10(a[i]); +
Power functions
+ \anchor cwisetable_pow + a.\link ArrayBase::pow pow\endlink(b); \n + \link Eigen::pow pow\endlink(a,b); + raises a number to the given power (\f$ a_i ^ {b_i} \f$) \n \c a and \c b can be either an array or scalar. + using std::pow; \n + pow(a[i],b[i]);\n + (plus builtin for integer types)
+ \anchor cwisetable_sqrt + a.\link ArrayBase::sqrt sqrt\endlink(); \n + \link Eigen::sqrt sqrt\endlink(a);\n + m.\link MatrixBase::cwiseSqrt cwiseSqrt\endlink(); + computes square root (\f$ \sqrt a_i \f$) + using std::sqrt; \n + sqrt(a[i]);SSE2, AVX (f,d)
+ \anchor cwisetable_rsqrt + a.\link ArrayBase::rsqrt rsqrt\endlink(); \n + \link Eigen::rsqrt rsqrt\endlink(a); + reciprocal square root (\f$ 1/{\sqrt a_i} \f$) + using std::sqrt; \n + 1/sqrt(a[i]); \n + SSE2, AVX, AltiVec, ZVector (f,d)\n + (approx + 1 Newton iteration)
+ \anchor cwisetable_square + a.\link ArrayBase::square square\endlink(); \n + \link Eigen::square square\endlink(a); + computes square power (\f$ a_i^2 \f$) + a[i]*a[i]All (i32,f,d,cf,cd)
+ \anchor cwisetable_cube + a.\link ArrayBase::cube cube\endlink(); \n + \link Eigen::cube cube\endlink(a); + computes cubic power (\f$ a_i^3 \f$) + a[i]*a[i]*a[i]All (i32,f,d,cf,cd)
+ \anchor cwisetable_abs2 + a.\link ArrayBase::abs2 abs2\endlink(); \n + \link Eigen::abs2 abs2\endlink(a);\n + m.\link MatrixBase::cwiseAbs2 cwiseAbs2\endlink(); + computes the squared absolute value (\f$ |a_i|^2 \f$) + real: a[i]*a[i] \n + complex: real(a[i])*real(a[i]) \n +        + imag(a[i])*imag(a[i])All (i32,f,d)
Trigonometric functions
+ \anchor cwisetable_sin + a.\link ArrayBase::sin sin\endlink(); \n + \link Eigen::sin sin\endlink(a); + computes sine + using std::sin; \n + sin(a[i]);SSE2, AVX (f)
+ \anchor cwisetable_cos + a.\link ArrayBase::cos cos\endlink(); \n + \link Eigen::cos cos\endlink(a); + computes cosine + using std::cos; \n + cos(a[i]);SSE2, AVX (f)
+ \anchor cwisetable_tan + a.\link ArrayBase::tan tan\endlink(); \n + \link Eigen::tan tan\endlink(a); + computes tangent + using std::tan; \n + tan(a[i]);
+ \anchor cwisetable_asin + a.\link ArrayBase::asin asin\endlink(); \n + \link Eigen::asin asin\endlink(a); + computes arc sine (\f$ \sin^{-1} a_i \f$) + using std::asin; \n + asin(a[i]);
+ \anchor cwisetable_acos + a.\link ArrayBase::acos acos\endlink(); \n + \link Eigen::acos acos\endlink(a); + computes arc cosine (\f$ \cos^{-1} a_i \f$) + using std::acos; \n + acos(a[i]);
+ \anchor cwisetable_atan + a.\link ArrayBase::atan tan\endlink(); \n + \link Eigen::atan atan\endlink(a); + computes arc tangent (\f$ \tan^{-1} a_i \f$) + using std::atan; \n + atan(a[i]);
Hyperbolic functions
+ \anchor cwisetable_sinh + a.\link ArrayBase::sinh sinh\endlink(); \n + \link Eigen::sinh sinh\endlink(a); + computes hyperbolic sine + using std::sinh; \n + sinh(a[i]);
+ \anchor cwisetable_cosh + a.\link ArrayBase::cosh cohs\endlink(); \n + \link Eigen::cosh cosh\endlink(a); + computes hyperbolic cosine + using std::cosh; \n + cosh(a[i]);
+ \anchor cwisetable_tanh + a.\link ArrayBase::tanh tanh\endlink(); \n + \link Eigen::tanh tanh\endlink(a); + computes hyperbolic tangent + using std::tanh; \n + tanh(a[i]);
Nearest integer floating point operations
+ \anchor cwisetable_ceil + a.\link ArrayBase::ceil ceil\endlink(); \n + \link Eigen::ceil ceil\endlink(a); + nearest integer not less than the given value + using std::ceil; \n + ceil(a[i]);SSE4,AVX,ZVector (f,d)
+ \anchor cwisetable_floor + a.\link ArrayBase::floor floor\endlink(); \n + \link Eigen::floor floor\endlink(a); + nearest integer not greater than the given value + using std::floor; \n + floor(a[i]);SSE4,AVX,ZVector (f,d)
+ \anchor cwisetable_round + a.\link ArrayBase::round round\endlink(); \n + \link Eigen::round round\endlink(a); + nearest integer, \n rounding away from zero in halfway casesbuilt-in generic implementation \n based on \c floor and \c ceil,\n + plus \c using \c std::round ; \cpp11SSE4,AVX,ZVector (f,d)
Floating point manipulation functions
Classification and comparison
+ \anchor cwisetable_isfinite + a.\link ArrayBase::isFinite isFinite\endlink(); \n + \link Eigen::isfinite isfinite\endlink(a); + checks if the given number has finite valuebuilt-in generic implementation,\n + plus \c using \c std::isfinite ; \cpp11
+ \anchor cwisetable_isinf + a.\link ArrayBase::isInf isInf\endlink(); \n + \link Eigen::isinf isinf\endlink(a); + checks if the given number is infinitebuilt-in generic implementation,\n + plus \c using \c std::isinf ; \cpp11
+ \anchor cwisetable_isnan + a.\link ArrayBase::isNaN isNaN\endlink(); \n + \link Eigen::isnan isnan\endlink(a); + checks if the given number is not a numberbuilt-in generic implementation,\n + plus \c using \c std::isnan ; \cpp11
Error and gamma functions
Require \c \#include \c
+ \anchor cwisetable_erf + a.\link ArrayBase::erf erf\endlink(); \n + \link Eigen::erf erf\endlink(a); + error function + using std::erf; \cpp11 \n + erf(a[i]); +
+ \anchor cwisetable_erfc + a.\link ArrayBase::erfc erfc\endlink(); \n + \link Eigen::erfc erfc\endlink(a); + complementary error function + using std::erfc; \cpp11 \n + erfc(a[i]); +
+ \anchor cwisetable_lgamma + a.\link ArrayBase::lgamma lgamma\endlink(); \n + \link Eigen::lgamma lgamma\endlink(a); + natural logarithm of the gamma function + using std::lgamma; \cpp11 \n + lgamma(a[i]); +
+ \anchor cwisetable_digamma + a.\link ArrayBase::digamma digamma\endlink(); \n + \link Eigen::digamma digamma\endlink(a); + logarithmic derivative of the gamma function + built-in for float and double +
+ \anchor cwisetable_igamma + \link Eigen::igamma igamma\endlink(a,x); + lower incomplete gamma integral + \n \f$ \gamma(a_i,x_i)= \frac{1}{|a_i|} \int_{0}^{x_i}e^{\text{-}t} t^{a_i-1} \mathrm{d} t \f$ + built-in for float and double,\n but requires \cpp11 +
+ \anchor cwisetable_igammac + \link Eigen::igammac igammac\endlink(a,x); + upper incomplete gamma integral + \n \f$ \Gamma(a_i,x_i) = \frac{1}{|a_i|} \int_{x_i}^{\infty}e^{\text{-}t} t^{a_i-1} \mathrm{d} t \f$ + built-in for float and double,\n but requires \cpp11 +
Special functions
Require \c \#include \c
+ \anchor cwisetable_polygamma + \link Eigen::polygamma polygamma\endlink(n,x); + n-th derivative of digamma at x + built-in generic based on\n \c lgamma , + \c digamma + and \c zeta . +
+ \anchor cwisetable_betainc + \link Eigen::betainc betainc\endlink(a,b,x); + Incomplete beta function + built-in for float and double,\n but requires \cpp11 +
+ \anchor cwisetable_zeta + \link Eigen::zeta zeta\endlink(a,b); + Hurwitz zeta function + \n \f$ \zeta(a_i,b_i)=\sum_{k=0}^{\infty}(b_i+k)^{\text{-}a_i} \f$ + built-in for float and double +
+ +\n + +*/ + +} diff --git a/external/eigen3/doc/CustomizingEigen.dox b/external/eigen3/doc/CustomizingEigen.dox deleted file mode 100644 index 5a0890e..0000000 --- a/external/eigen3/doc/CustomizingEigen.dox +++ /dev/null @@ -1,188 +0,0 @@ -namespace Eigen { - -/** \page TopicCustomizingEigen Customizing/Extending Eigen - -Eigen can be extended in several ways, for instance, by defining global methods, \ref ExtendingMatrixBase "by adding custom methods to MatrixBase", adding support to \ref CustomScalarType "custom types" etc. - -\eigenAutoToc - -\section ExtendingMatrixBase Extending MatrixBase (and other classes) - -In this section we will see how to add custom methods to MatrixBase. Since all expressions and matrix types inherit MatrixBase, adding a method to MatrixBase make it immediately available to all expressions ! A typical use case is, for instance, to make Eigen compatible with another API. - -You certainly know that in C++ it is not possible to add methods to an existing class. So how that's possible ? Here the trick is to include in the declaration of MatrixBase a file defined by the preprocessor token \c EIGEN_MATRIXBASE_PLUGIN: -\code -class MatrixBase { - // ... - #ifdef EIGEN_MATRIXBASE_PLUGIN - #include EIGEN_MATRIXBASE_PLUGIN - #endif -}; -\endcode -Therefore to extend MatrixBase with your own methods you just have to create a file with your method declaration and define EIGEN_MATRIXBASE_PLUGIN before you include any Eigen's header file. - -You can extend many of the other classes used in Eigen by defining similarly named preprocessor symbols. For instance, define \c EIGEN_ARRAYBASE_PLUGIN if you want to extend the ArrayBase class. A full list of classes that can be extended in this way and the corresponding preprocessor symbols can be found on our page \ref TopicPreprocessorDirectives. - -Here is an example of an extension file for adding methods to MatrixBase: \n -\b MatrixBaseAddons.h -\code -inline Scalar at(uint i, uint j) const { return this->operator()(i,j); } -inline Scalar& at(uint i, uint j) { return this->operator()(i,j); } -inline Scalar at(uint i) const { return this->operator[](i); } -inline Scalar& at(uint i) { return this->operator[](i); } - -inline RealScalar squaredLength() const { return squaredNorm(); } -inline RealScalar length() const { return norm(); } -inline RealScalar invLength(void) const { return fast_inv_sqrt(squaredNorm()); } - -template -inline Scalar squaredDistanceTo(const MatrixBase& other) const -{ return (derived() - other.derived()).squaredNorm(); } - -template -inline RealScalar distanceTo(const MatrixBase& other) const -{ return internal::sqrt(derived().squaredDistanceTo(other)); } - -inline void scaleTo(RealScalar l) { RealScalar vl = norm(); if (vl>1e-9) derived() *= (l/vl); } - -inline Transpose transposed() {return this->transpose();} -inline const Transpose transposed() const {return this->transpose();} - -inline uint minComponentId(void) const { int i; this->minCoeff(&i); return i; } -inline uint maxComponentId(void) const { int i; this->maxCoeff(&i); return i; } - -template -void makeFloor(const MatrixBase& other) { derived() = derived().cwiseMin(other.derived()); } -template -void makeCeil(const MatrixBase& other) { derived() = derived().cwiseMax(other.derived()); } - -const CwiseUnaryOp, Derived> -operator+(const Scalar& scalar) const -{ return CwiseUnaryOp, Derived>(derived(), internal::scalar_add_op(scalar)); } - -friend const CwiseUnaryOp, Derived> -operator+(const Scalar& scalar, const MatrixBase& mat) -{ return CwiseUnaryOp, Derived>(mat.derived(), internal::scalar_add_op(scalar)); } -\endcode - -Then one can the following declaration in the config.h or whatever prerequisites header file of his project: -\code -#define EIGEN_MATRIXBASE_PLUGIN "MatrixBaseAddons.h" -\endcode - -\section InheritingFromMatrix Inheriting from Matrix - -Before inheriting from Matrix, be really, i mean REALLY sure that using -EIGEN_MATRIX_PLUGIN is not what you really want (see previous section). -If you just need to add few members to Matrix, this is the way to go. - -An example of when you actually need to inherit Matrix, is when you have -several layers of heritage such as MyVerySpecificVector1,MyVerySpecificVector1 -> MyVector1 -> Matrix and. -MyVerySpecificVector3,MyVerySpecificVector4 -> MyVector2 -> Matrix. - -In order for your object to work within the %Eigen framework, you need to -define a few members in your inherited class. - -Here is a minimalistic example:\n -\code -class MyVectorType : public Eigen::VectorXd -{ -public: - MyVectorType(void):Eigen::VectorXd() {} - - typedef Eigen::VectorXd Base; - - // This constructor allows you to construct MyVectorType from Eigen expressions - template - MyVectorType(const Eigen::MatrixBase& other) - : Eigen::Vector3d(other) - { } - - // This method allows you to assign Eigen expressions to MyVectorType - template - MyVectorType & operator= (const Eigen::MatrixBase & other) - { - this->Base::operator=(other); - return *this; - } -}; -\endcode - -This is the kind of error you can get if you don't provide those methods -\code -error: no match for ‘operator=’ in ‘delta = -(((Eigen::MatrixBase, 10000, 1, 2, 10000, -1> >*)(& delta)) + 8u)->Eigen::MatrixBase::cwise [with Derived = -Eigen::Matrix, 10000, 1, 2, 10000, -1>]().Eigen::Cwise::operator* [with OtherDerived = -Eigen::Matrix, 10000, 1, 2, 10000, 1>, ExpressionType = -Eigen::Matrix, 10000, 1, 2, 10000, 1>](((const -Eigen::MatrixBase, 10000, 1, 2, 10000, 1> ->&)(((const Eigen::MatrixBase, 10000, 1, ->2, 10000, 1> >*)((const spectral1d*)where)) + 8u)))’ -\endcode - -\anchor user_defined_scalars \section CustomScalarType Using custom scalar types - -By default, Eigen currently supports standard floating-point types (\c float, \c double, \c std::complex, \c std::complex, \c long \c double), as well as all native integer types (e.g., \c int, \c unsigned \c int, \c short, etc.), and \c bool. -On x86-64 systems, \c long \c double permits to locally enforces the use of x87 registers with extended accuracy (in comparison to SSE). - -In order to add support for a custom type \c T you need: --# make sure the common operator (+,-,*,/,etc.) are supported by the type \c T --# add a specialization of struct Eigen::NumTraits (see \ref NumTraits) --# define the math functions that makes sense for your type. This includes standard ones like sqrt, pow, sin, tan, conj, real, imag, etc, as well as abs2 which is Eigen specific. - (see the file Eigen/src/Core/MathFunctions.h) - -The math function should be defined in the same namespace than \c T, or in the \c std namespace though that second approach is not recommended. - -Here is a concrete example adding support for the Adolc's \c adouble type. Adolc is an automatic differentiation library. The type \c adouble is basically a real value tracking the values of any number of partial derivatives. - -\code -#ifndef ADOLCSUPPORT_H -#define ADOLCSUPPORT_H - -#define ADOLC_TAPELESS -#include -#include - -namespace Eigen { - -template<> struct NumTraits - : NumTraits // permits to get the epsilon, dummy_precision, lowest, highest functions -{ - typedef adtl::adouble Real; - typedef adtl::adouble NonInteger; - typedef adtl::adouble Nested; - - enum { - IsComplex = 0, - IsInteger = 0, - IsSigned = 1, - RequireInitialization = 1, - ReadCost = 1, - AddCost = 3, - MulCost = 3 - }; -}; - -} - -namespace adtl { - -inline const adouble& conj(const adouble& x) { return x; } -inline const adouble& real(const adouble& x) { return x; } -inline adouble imag(const adouble&) { return 0.; } -inline adouble abs(const adouble& x) { return fabs(x); } -inline adouble abs2(const adouble& x) { return x*x; } - -} - -#endif // ADOLCSUPPORT_H -\endcode - - -\sa \ref TopicPreprocessorDirectives - -*/ - -} diff --git a/external/eigen3/doc/CustomizingEigen_CustomScalar.dox b/external/eigen3/doc/CustomizingEigen_CustomScalar.dox new file mode 100644 index 0000000..1ee78cb --- /dev/null +++ b/external/eigen3/doc/CustomizingEigen_CustomScalar.dox @@ -0,0 +1,120 @@ +namespace Eigen { + +/** \page TopicCustomizing_CustomScalar Using custom scalar types +\anchor user_defined_scalars + +By default, Eigen currently supports standard floating-point types (\c float, \c double, \c std::complex, \c std::complex, \c long \c double), as well as all native integer types (e.g., \c int, \c unsigned \c int, \c short, etc.), and \c bool. +On x86-64 systems, \c long \c double permits to locally enforces the use of x87 registers with extended accuracy (in comparison to SSE). + +In order to add support for a custom type \c T you need: +-# make sure the common operator (+,-,*,/,etc.) are supported by the type \c T +-# add a specialization of struct Eigen::NumTraits (see \ref NumTraits) +-# define the math functions that makes sense for your type. This includes standard ones like sqrt, pow, sin, tan, conj, real, imag, etc, as well as abs2 which is Eigen specific. + (see the file Eigen/src/Core/MathFunctions.h) + +The math function should be defined in the same namespace than \c T, or in the \c std namespace though that second approach is not recommended. + +Here is a concrete example adding support for the Adolc's \c adouble type. Adolc is an automatic differentiation library. The type \c adouble is basically a real value tracking the values of any number of partial derivatives. + +\code +#ifndef ADOLCSUPPORT_H +#define ADOLCSUPPORT_H + +#define ADOLC_TAPELESS +#include +#include + +namespace Eigen { + +template<> struct NumTraits + : NumTraits // permits to get the epsilon, dummy_precision, lowest, highest functions +{ + typedef adtl::adouble Real; + typedef adtl::adouble NonInteger; + typedef adtl::adouble Nested; + + enum { + IsComplex = 0, + IsInteger = 0, + IsSigned = 1, + RequireInitialization = 1, + ReadCost = 1, + AddCost = 3, + MulCost = 3 + }; +}; + +} + +namespace adtl { + +inline const adouble& conj(const adouble& x) { return x; } +inline const adouble& real(const adouble& x) { return x; } +inline adouble imag(const adouble&) { return 0.; } +inline adouble abs(const adouble& x) { return fabs(x); } +inline adouble abs2(const adouble& x) { return x*x; } + +} + +#endif // ADOLCSUPPORT_H +\endcode + +This other example adds support for the \c mpq_class type from GMP. It shows in particular how to change the way Eigen picks the best pivot during LU factorization. It selects the coefficient with the highest score, where the score is by default the absolute value of a number, but we can define a different score, for instance to prefer pivots with a more compact representation (this is an example, not a recommendation). Note that the scores should always be non-negative and only zero is allowed to have a score of zero. Also, this can interact badly with thresholds for inexact scalar types. + +\code +#include +#include +#include + +namespace Eigen { + template<> struct NumTraits : GenericNumTraits + { + typedef mpq_class Real; + typedef mpq_class NonInteger; + typedef mpq_class Nested; + + static inline Real epsilon() { return 0; } + static inline Real dummy_precision() { return 0; } + static inline Real digits10() { return 0; } + + enum { + IsInteger = 0, + IsSigned = 1, + IsComplex = 0, + RequireInitialization = 1, + ReadCost = 6, + AddCost = 150, + MulCost = 100 + }; + }; + + namespace internal { + + template<> struct scalar_score_coeff_op { + struct result_type : boost::totally_ordered1 { + std::size_t len; + result_type(int i = 0) : len(i) {} // Eigen uses Score(0) and Score() + result_type(mpq_class const& q) : + len(mpz_size(q.get_num_mpz_t())+ + mpz_size(q.get_den_mpz_t())-1) {} + friend bool operator<(result_type x, result_type y) { + // 0 is the worst possible pivot + if (x.len == 0) return y.len > 0; + if (y.len == 0) return false; + // Prefer a pivot with a small representation + return x.len > y.len; + } + friend bool operator==(result_type x, result_type y) { + // Only used to test if the score is 0 + return x.len == y.len; + } + }; + result_type operator()(mpq_class const& x) const { return x; } + }; + } +} +\endcode + +*/ + +} diff --git a/external/eigen3/doc/CustomizingEigen_InheritingMatrix.dox b/external/eigen3/doc/CustomizingEigen_InheritingMatrix.dox new file mode 100644 index 0000000..b21e554 --- /dev/null +++ b/external/eigen3/doc/CustomizingEigen_InheritingMatrix.dox @@ -0,0 +1,34 @@ +namespace Eigen { + +/** \page TopicCustomizing_InheritingMatrix Inheriting from Matrix + +Before inheriting from Matrix, be really, I mean REALLY, sure that using +EIGEN_MATRIX_PLUGIN is not what you really want (see previous section). +If you just need to add few members to Matrix, this is the way to go. + +An example of when you actually need to inherit Matrix, is when you +have several layers of heritage such as +MyVerySpecificVector1, MyVerySpecificVector2 -> MyVector1 -> Matrix and +MyVerySpecificVector3, MyVerySpecificVector4 -> MyVector2 -> Matrix. + +In order for your object to work within the %Eigen framework, you need to +define a few members in your inherited class. + +Here is a minimalistic example: + +\include CustomizingEigen_Inheritance.cpp + +Output: \verbinclude CustomizingEigen_Inheritance.out + +This is the kind of error you can get if you don't provide those methods +\verbatim +error: no match for ‘operator=’ in ‘v = Eigen::operator*( +const Eigen::MatrixBase >::Scalar&, +const Eigen::MatrixBase >::StorageBaseType&) +(((const Eigen::MatrixBase >::StorageBaseType&) +((const Eigen::MatrixBase >::StorageBaseType*)(& v))))’ +\endverbatim + +*/ + +} diff --git a/external/eigen3/doc/CustomizingEigen_NullaryExpr.dox b/external/eigen3/doc/CustomizingEigen_NullaryExpr.dox new file mode 100644 index 0000000..37c8dcd --- /dev/null +++ b/external/eigen3/doc/CustomizingEigen_NullaryExpr.dox @@ -0,0 +1,86 @@ +namespace Eigen { + +/** \page TopicCustomizing_NullaryExpr Matrix manipulation via nullary-expressions + + +The main purpose of the class CwiseNullaryOp is to define \em procedural matrices such as constant or random matrices as returned by the Ones(), Zero(), Constant(), Identity() and Random() methods. +Nevertheless, with some imagination it is possible to accomplish very sophisticated matrix manipulation with minimal efforts such that \ref TopicNewExpressionType "implementing new expression" is rarely needed. + +\section NullaryExpr_Circulant Example 1: circulant matrix + +To explore these possibilities let us start with the \em circulant example of the \ref TopicNewExpressionType "implementing new expression" topic. +Let us recall that a circulant matrix is a matrix where each column is the same as the +column to the left, except that it is cyclically shifted downwards. +For example, here is a 4-by-4 circulant matrix: +\f[ \begin{bmatrix} + 1 & 8 & 4 & 2 \\ + 2 & 1 & 8 & 4 \\ + 4 & 2 & 1 & 8 \\ + 8 & 4 & 2 & 1 +\end{bmatrix} \f] +A circulant matrix is uniquely determined by its first column. We wish +to write a function \c makeCirculant which, given the first column, +returns an expression representing the circulant matrix. + +For this exercise, the return type of \c makeCirculant will be a CwiseNullaryOp that we need to instantiate with: +1 - a proper \c circulant_functor storing the input vector and implementing the adequate coefficient accessor \c operator(i,j) +2 - a template instantiation of class Matrix conveying compile-time information such as the scalar type, sizes, and preferred storage layout. + +Calling \c ArgType the type of the input vector, we can construct the equivalent squared Matrix type as follows: + +\snippet make_circulant2.cpp square + +This little helper structure will help us to implement our \c makeCirculant function as follows: + +\snippet make_circulant2.cpp makeCirculant + +As usual, our function takes as argument a \c MatrixBase (see this \ref TopicFunctionTakingEigenTypes "page" for more details). +Then, the CwiseNullaryOp object is constructed through the DenseBase::NullaryExpr static method with the adequate runtime sizes. + +Then, we need to implement our \c circulant_functor, which is a straightforward exercise: + +\snippet make_circulant2.cpp circulant_func + +We are now all set to try our new feature: + +\snippet make_circulant2.cpp main + + +If all the fragments are combined, the following output is produced, +showing that the program works as expected: + +\include make_circulant2.out + +This implementation of \c makeCirculant is much simpler than \ref TopicNewExpressionType "defining a new expression" from scratch. + + +\section NullaryExpr_Indexing Example 2: indexing rows and columns + +The goal here is to mimic MatLab's ability to index a matrix through two vectors of indices referencing the rows and columns to be picked respectively, like this: + +\snippet nullary_indexing.out main1 + +To this end, let us first write a nullary-functor storing references to the input matrix and to the two arrays of indices, and implementing the required \c operator()(i,j): + +\snippet nullary_indexing.cpp functor + +Then, let's create an \c indexing(A,rows,cols) function creating the nullary expression: + +\snippet nullary_indexing.cpp function + +Finally, here is an example of how this function can be used: + +\snippet nullary_indexing.cpp main1 + +This straightforward implementation is already quite powerful as the row or column index arrays can also be expressions to perform offsetting, modulo, striding, reverse, etc. + +\snippet nullary_indexing.cpp main2 + +and the output is: + +\snippet nullary_indexing.out main2 + +*/ + +} + diff --git a/external/eigen3/doc/CustomizingEigen_Plugins.dox b/external/eigen3/doc/CustomizingEigen_Plugins.dox new file mode 100644 index 0000000..d88f240 --- /dev/null +++ b/external/eigen3/doc/CustomizingEigen_Plugins.dox @@ -0,0 +1,69 @@ +namespace Eigen { + +/** \page TopicCustomizing_Plugins Extending MatrixBase (and other classes) + +In this section we will see how to add custom methods to MatrixBase. Since all expressions and matrix types inherit MatrixBase, adding a method to MatrixBase make it immediately available to all expressions ! A typical use case is, for instance, to make Eigen compatible with another API. + +You certainly know that in C++ it is not possible to add methods to an existing class. So how that's possible ? Here the trick is to include in the declaration of MatrixBase a file defined by the preprocessor token \c EIGEN_MATRIXBASE_PLUGIN: +\code +class MatrixBase { + // ... + #ifdef EIGEN_MATRIXBASE_PLUGIN + #include EIGEN_MATRIXBASE_PLUGIN + #endif +}; +\endcode +Therefore to extend MatrixBase with your own methods you just have to create a file with your method declaration and define EIGEN_MATRIXBASE_PLUGIN before you include any Eigen's header file. + +You can extend many of the other classes used in Eigen by defining similarly named preprocessor symbols. For instance, define \c EIGEN_ARRAYBASE_PLUGIN if you want to extend the ArrayBase class. A full list of classes that can be extended in this way and the corresponding preprocessor symbols can be found on our page \ref TopicPreprocessorDirectives. + +Here is an example of an extension file for adding methods to MatrixBase: \n +\b MatrixBaseAddons.h +\code +inline Scalar at(uint i, uint j) const { return this->operator()(i,j); } +inline Scalar& at(uint i, uint j) { return this->operator()(i,j); } +inline Scalar at(uint i) const { return this->operator[](i); } +inline Scalar& at(uint i) { return this->operator[](i); } + +inline RealScalar squaredLength() const { return squaredNorm(); } +inline RealScalar length() const { return norm(); } +inline RealScalar invLength(void) const { return fast_inv_sqrt(squaredNorm()); } + +template +inline Scalar squaredDistanceTo(const MatrixBase& other) const +{ return (derived() - other.derived()).squaredNorm(); } + +template +inline RealScalar distanceTo(const MatrixBase& other) const +{ return internal::sqrt(derived().squaredDistanceTo(other)); } + +inline void scaleTo(RealScalar l) { RealScalar vl = norm(); if (vl>1e-9) derived() *= (l/vl); } + +inline Transpose transposed() {return this->transpose();} +inline const Transpose transposed() const {return this->transpose();} + +inline uint minComponentId(void) const { int i; this->minCoeff(&i); return i; } +inline uint maxComponentId(void) const { int i; this->maxCoeff(&i); return i; } + +template +void makeFloor(const MatrixBase& other) { derived() = derived().cwiseMin(other.derived()); } +template +void makeCeil(const MatrixBase& other) { derived() = derived().cwiseMax(other.derived()); } + +const CwiseBinaryOp, const Derived, const ConstantReturnType> +operator+(const Scalar& scalar) const +{ return CwiseBinaryOp, const Derived, const ConstantReturnType>(derived(), Constant(rows(),cols(),scalar)); } + +friend const CwiseBinaryOp, const ConstantReturnType, Derived> +operator+(const Scalar& scalar, const MatrixBase& mat) +{ return CwiseBinaryOp, const ConstantReturnType, Derived>(Constant(rows(),cols(),scalar), mat.derived()); } +\endcode + +Then one can the following declaration in the config.h or whatever prerequisites header file of his project: +\code +#define EIGEN_MATRIXBASE_PLUGIN "MatrixBaseAddons.h" +\endcode + +*/ + +} diff --git a/external/eigen3/doc/DenseDecompositionBenchmark.dox b/external/eigen3/doc/DenseDecompositionBenchmark.dox new file mode 100644 index 0000000..7be9c70 --- /dev/null +++ b/external/eigen3/doc/DenseDecompositionBenchmark.dox @@ -0,0 +1,42 @@ +namespace Eigen { + +/** \eigenManualPage DenseDecompositionBenchmark Benchmark of dense decompositions + +This page presents a speed comparison of the dense matrix decompositions offered by %Eigen for a wide range of square matrices and overconstrained problems. + +For a more general overview on the features and numerical robustness of linear solvers and decompositions, check this \link TopicLinearAlgebraDecompositions table \endlink. + +This benchmark has been run on a laptop equipped with an Intel core i7 \@ 2,6 GHz, and compiled with clang with \b AVX and \b FMA instruction sets enabled but without multi-threading. +It uses \b single \b precision \b float numbers. For double, you can get a good estimate by multiplying the timings by a factor 2. + +The square matrices are symmetric, and for the overconstrained matrices, the reported timmings include the cost to compute the symmetric covariance matrix \f$ A^T A \f$ for the first four solvers based on Cholesky and LU, as denoted by the \b * symbol (top-right corner part of the table). +Timings are in \b milliseconds, and factors are relative to the LLT decomposition which is the fastest but also the least general and robust. + + + + + + + + + + + + + + +
solver/size8x8 100x100 1000x1000 4000x4000 10000x8 10000x100 10000x1000 10000x4000
LLT0.050.425.83374.556.79 *30.15 *236.34 *3847.17 *
LDLT0.07 (x1.3)0.65 (x1.5)26.86 (x4.6)2361.18 (x6.3)6.81 (x1) *31.91 (x1.1) *252.61 (x1.1) *5807.66 (x1.5) *
PartialPivLU0.08 (x1.5)0.69 (x1.6)15.63 (x2.7)709.32 (x1.9)6.81 (x1) *31.32 (x1) *241.68 (x1) *4270.48 (x1.1) *
FullPivLU0.1 (x1.9)4.48 (x10.6)281.33 (x48.2)-6.83 (x1) *32.67 (x1.1) *498.25 (x2.1) *-
HouseholderQR0.19 (x3.5)2.18 (x5.2)23.42 (x4)1337.52 (x3.6)34.26 (x5)129.01 (x4.3)377.37 (x1.6)4839.1 (x1.3)
ColPivHouseholderQR0.23 (x4.3)2.23 (x5.3)103.34 (x17.7)9987.16 (x26.7)36.05 (x5.3)163.18 (x5.4)2354.08 (x10)37860.5 (x9.8)
CompleteOrthogonalDecomposition0.23 (x4.3)2.22 (x5.2)99.44 (x17.1)10555.3 (x28.2)35.75 (x5.3)169.39 (x5.6)2150.56 (x9.1)36981.8 (x9.6)
FullPivHouseholderQR0.23 (x4.3)4.64 (x11)289.1 (x49.6)-69.38 (x10.2)446.73 (x14.8)4852.12 (x20.5)-
JacobiSVD1.01 (x18.6)71.43 (x168.4)--113.81 (x16.7)1179.66 (x39.1)--
BDCSVD1.07 (x19.7)21.83 (x51.5)331.77 (x56.9)18587.9 (x49.6)110.53 (x16.3)397.67 (x13.2)2975 (x12.6)48593.2 (x12.6)
+ +\b *: This decomposition do not support direct least-square solving for over-constrained problems, and the reported timing include the cost to form the symmetric covariance matrix \f$ A^T A \f$. + +\b Observations: + + LLT is always the fastest solvers. + + For largely over-constrained problems, the cost of Cholesky/LU decompositions is dominated by the computation of the symmetric covariance matrix. + + For large problem sizes, only the decomposition implementing a cache-friendly blocking strategy scale well. Those include LLT, PartialPivLU, HouseholderQR, and BDCSVD. This explain why for a 4k x 4k matrix, HouseholderQR is faster than LDLT. In the future, LDLT and ColPivHouseholderQR will also implement blocking strategies. + + CompleteOrthogonalDecomposition is based on ColPivHouseholderQR and they thus achieve the same level of performance. + +The above table has been generated by the bench/dense_solvers.cpp file, feel-free to hack it to generate a table matching your hardware, compiler, and favorite problem sizes. + +*/ + +} diff --git a/external/eigen3/doc/Doxyfile.in b/external/eigen3/doc/Doxyfile.in index 696dd2a..48bb0a8 100644 --- a/external/eigen3/doc/Doxyfile.in +++ b/external/eigen3/doc/Doxyfile.in @@ -125,7 +125,7 @@ ALWAYS_DETAILED_SEC = NO # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. -INLINE_INHERITED_MEMB = YES +INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set @@ -206,6 +206,7 @@ TAB_SIZE = 8 # You can put \n's in the value part of an alias to insert newlines. ALIASES = "only_for_vectors=This is only for vectors (either row-vectors or column-vectors), i.e. matrices which are known at compile-time to have either one row or one column." \ + "not_reentrant=\warning This function is not re-entrant." \ "array_module=This is defined in the %Array module. \code #include \endcode" \ "cholesky_module=This is defined in the %Cholesky module. \code #include \endcode" \ "eigenvalues_module=This is defined in the %Eigenvalues module. \code #include \endcode" \ @@ -215,6 +216,7 @@ ALIASES = "only_for_vectors=This is only for vectors (either row- "lu_module=This is defined in the %LU module. \code #include \endcode" \ "qr_module=This is defined in the %QR module. \code #include \endcode" \ "svd_module=This is defined in the %SVD module. \code #include \endcode" \ + "specialfunctions_module=This is defined in the \b unsupported SpecialFunctions module. \code #include \endcode" \ "label=\bug" \ "matrixworld=*" \ "arrayworld=*" \ @@ -222,7 +224,13 @@ ALIASES = "only_for_vectors=This is only for vectors (either row- "note_about_using_kernel_to_study_multiple_solutions=If you need a complete analysis of the space of solutions, take the one solution obtained by this method and add to it elements of the kernel, as determined by kernel()." \ "note_about_checking_solutions=This method just tries to find as good a solution as possible. If you want to check whether a solution exists or if it is accurate, just call this function to get a result and then compute the error of this result, or use MatrixBase::isApprox() directly, for instance like this: \code bool a_solution_exists = (A*result).isApprox(b, precision); \endcode This method avoids dividing by zero, so that the non-existence of a solution doesn't by itself mean that you'll get \c inf or \c nan values." \ "note_try_to_help_rvo=This function returns the result by value. In order to make that efficient, it is implemented as just a return statement using a special constructor, hopefully allowing the compiler to perform a RVO (return value optimization)." \ - "nonstableyet=\warning This is not considered to be part of the stable public API yet. Changes may happen in future releases. See \ref Experimental \"Experimental parts of Eigen\"" + "nonstableyet=\warning This is not considered to be part of the stable public API yet. Changes may happen in future releases. See \ref Experimental \"Experimental parts of Eigen\"" \ + "implsparsesolverconcept=This class follows the \link TutorialSparseSolverConcept sparse solver concept \endlink." \ + "blank= " \ + "cpp11=[c++11]" \ + "cpp14=[c++14]" \ + "cpp17=[c++17]" + ALIASES += "eigenAutoToc= " ALIASES += "eigenManualPage=\defgroup" @@ -270,7 +278,7 @@ OPTIMIZE_OUTPUT_VHDL = NO # (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions # you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. -EXTENSION_MAPPING = +EXTENSION_MAPPING = .h=C++ no_extension=C++ # If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all # comments according to the Markdown format, which allows for more readable @@ -458,7 +466,7 @@ HIDE_IN_BODY_DOCS = NO # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. -INTERNAL_DOCS = NO +INTERNAL_DOCS = ${EIGEN_DOXY_INTERNAL} # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also @@ -472,13 +480,13 @@ CASE_SENSE_NAMES = YES # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. -HIDE_SCOPE_NAMES = YES +HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. -SHOW_INCLUDE_FILES = NO +SHOW_INCLUDE_FILES = ${EIGEN_DOXY_INTERNAL} # If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen # will list include files with double quotes in the documentation @@ -544,7 +552,7 @@ STRICT_PROTO_MATCHING = NO # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. -GENERATE_TODOLIST = NO +GENERATE_TODOLIST = ${EIGEN_DOXY_INTERNAL} # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test @@ -556,7 +564,7 @@ GENERATE_TESTLIST = NO # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. -GENERATE_BUGLIST = NO +GENERATE_BUGLIST = ${EIGEN_DOXY_INTERNAL} # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting @@ -719,7 +727,8 @@ RECURSIVE = YES # Note that relative paths are relative to the directory from which doxygen is # run. -EXCLUDE = "${Eigen_SOURCE_DIR}/Eigen/Eigen2Support" \ +EXCLUDE = "${Eigen_SOURCE_DIR}/Eigen/src/Core/products" \ + "${Eigen_SOURCE_DIR}/Eigen/Eigen2Support" \ "${Eigen_SOURCE_DIR}/Eigen/src/Eigen2Support" \ "${Eigen_SOURCE_DIR}/doc/examples" \ "${Eigen_SOURCE_DIR}/doc/special_examples" \ @@ -800,7 +809,7 @@ EXAMPLE_RECURSIVE = NO # directories that contain image that are included in the documentation (see # the \image command). -IMAGE_PATH = +IMAGE_PATH = ${Eigen_BINARY_DIR}/doc/html # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program @@ -864,13 +873,13 @@ STRIP_CODE_COMMENTS = YES # then for each documented function all documented # functions referencing it will be listed. -REFERENCED_BY_RELATION = YES +REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES # then for each documented function all documented entities # called/used by that function will be listed. -REFERENCES_RELATION = YES +REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from @@ -1581,9 +1590,14 @@ PREDEFINED = EIGEN_EMPTY_STRUCT \ EIGEN_VECTORIZE \ EIGEN_QT_SUPPORT \ EIGEN_STRONG_INLINE=inline \ - "EIGEN2_SUPPORT_STAGE=99" \ + EIGEN_DEVICE_FUNC= \ "EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR)=template const CwiseBinaryOp, const Derived, const OtherDerived> METHOD(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const;" \ - "EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS)=CwiseBinaryOp, const LHS, const RHS>" + "EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS)=CwiseBinaryOp, const LHS, const RHS>"\ + "EIGEN_CAT2(a,b)= a ## b"\ + "EIGEN_CAT(a,b)=EIGEN_CAT2(a,b)"\ + "EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME)=CwiseBinaryOp, const LHS, const RHS>"\ + DOXCOMMA=, + # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. @@ -1599,7 +1613,15 @@ EXPAND_AS_DEFINED = EIGEN_MAKE_TYPEDEFS \ EIGEN_CURRENT_STORAGE_BASE_CLASS \ EIGEN_MATHFUNC_IMPL \ _EIGEN_GENERIC_PUBLIC_INTERFACE \ - EIGEN2_SUPPORT + EIGEN_ARRAY_DECLARE_GLOBAL_UNARY \ + EIGEN_EMPTY \ + EIGEN_EULER_ANGLES_TYPEDEFS \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF \ + EIGEN_EULER_SYSTEM_TYPEDEF \ + EIGEN_DOC_UNARY_ADDONS \ + EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL \ + EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF + # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all references to function-like macros diff --git a/external/eigen3/doc/FixedSizeVectorizable.dox b/external/eigen3/doc/FixedSizeVectorizable.dox index 8ae1351..49e38af 100644 --- a/external/eigen3/doc/FixedSizeVectorizable.dox +++ b/external/eigen3/doc/FixedSizeVectorizable.dox @@ -4,7 +4,7 @@ namespace Eigen { The goal of this page is to explain what we mean by "fixed-size vectorizable". -\section summary Executive Summary +\section FixedSizeVectorizable_summary Executive Summary An Eigen object is called "fixed-size vectorizable" if it has fixed size and that size is a multiple of 16 bytes. @@ -21,7 +21,7 @@ Examples include: \li Eigen::Quaterniond \li Eigen::Quaternionf -\section explanation Explanation +\section FixedSizeVectorizable_explanation Explanation First, "fixed-size" should be clear: an Eigen object has fixed size if its number of rows and its number of columns are fixed at compile-time. So for example Matrix3f has fixed size, but MatrixXf doesn't (the opposite of fixed-size is dynamic-size). diff --git a/external/eigen3/doc/InplaceDecomposition.dox b/external/eigen3/doc/InplaceDecomposition.dox new file mode 100644 index 0000000..cb1c6d4 --- /dev/null +++ b/external/eigen3/doc/InplaceDecomposition.dox @@ -0,0 +1,115 @@ +namespace Eigen { + +/** \eigenManualPage InplaceDecomposition Inplace matrix decompositions + +Starting from %Eigen 3.3, the LU, Cholesky, and QR decompositions can operate \em inplace, that is, directly within the given input matrix. +This feature is especially useful when dealing with huge matrices, and or when the available memory is very limited (embedded systems). + +To this end, the respective decomposition class must be instantiated with a Ref<> matrix type, and the decomposition object must be constructed with the input matrix as argument. As an example, let us consider an inplace LU decomposition with partial pivoting. + +Let's start with the basic inclusions, and declaration of a 2x2 matrix \c A: + + + + + + + +
codeoutput
\snippet TutorialInplaceLU.cpp init + \snippet TutorialInplaceLU.out init +
+ +No surprise here! Then, let's declare our inplace LU object \c lu, and check the content of the matrix \c A: + + + + + + +
\snippet TutorialInplaceLU.cpp declaration + \snippet TutorialInplaceLU.out declaration +
+ +Here, the \c lu object computes and stores the \c L and \c U factors within the memory held by the matrix \c A. +The coefficients of \c A have thus been destroyed during the factorization, and replaced by the L and U factors as one can verify: + + + + + + +
\snippet TutorialInplaceLU.cpp matrixLU + \snippet TutorialInplaceLU.out matrixLU +
+ +Then, one can use the \c lu object as usual, for instance to solve the Ax=b problem: + + + + + +
\snippet TutorialInplaceLU.cpp solve + \snippet TutorialInplaceLU.out solve +
+ +Here, since the content of the original matrix \c A has been lost, we had to declared a new matrix \c A0 to verify the result. + +Since the memory is shared between \c A and \c lu, modifying the matrix \c A will make \c lu invalid. +This can easily be verified by modifying the content of \c A and trying to solve the initial problem again: + + + + + + +
\snippet TutorialInplaceLU.cpp modifyA + \snippet TutorialInplaceLU.out modifyA +
+ +Note that there is no shared pointer under the hood, it is the \b responsibility \b of \b the \b user to keep the input matrix \c A in life as long as \c lu is living. + +If one wants to update the factorization with the modified A, one has to call the compute method as usual: + + + + + +
\snippet TutorialInplaceLU.cpp recompute + \snippet TutorialInplaceLU.out recompute +
+ +Note that calling compute does not change the memory which is referenced by the \c lu object. Therefore, if the compute method is called with another matrix \c A1 different than \c A, then the content of \c A1 won't be modified. This is still the content of \c A that will be used to store the L and U factors of the matrix \c A1. +This can easily be verified as follows: + + + + + +
\snippet TutorialInplaceLU.cpp recompute_bis0 + \snippet TutorialInplaceLU.out recompute_bis0 +
+The matrix \c A1 is unchanged, and one can thus solve A1*x=b, and directly check the residual without any copy of \c A1: + + + + + +
\snippet TutorialInplaceLU.cpp recompute_bis1 + \snippet TutorialInplaceLU.out recompute_bis1 +
+ + +Here is the list of matrix decompositions supporting this inplace mechanism: + +- class LLT +- class LDLT +- class PartialPivLU +- class FullPivLU +- class HouseholderQR +- class ColPivHouseholderQR +- class FullPivHouseholderQR +- class CompleteOrthogonalDecomposition + +*/ + +} \ No newline at end of file diff --git a/external/eigen3/doc/LeastSquares.dox b/external/eigen3/doc/LeastSquares.dox new file mode 100644 index 0000000..e2191a2 --- /dev/null +++ b/external/eigen3/doc/LeastSquares.dox @@ -0,0 +1,70 @@ +namespace Eigen { + +/** \eigenManualPage LeastSquares Solving linear least squares systems + +This page describes how to solve linear least squares systems using %Eigen. An overdetermined system +of equations, say \a Ax = \a b, has no solutions. In this case, it makes sense to search for the +vector \a x which is closest to being a solution, in the sense that the difference \a Ax - \a b is +as small as possible. This \a x is called the least square solution (if the Euclidean norm is used). + +The three methods discussed on this page are the SVD decomposition, the QR decomposition and normal +equations. Of these, the SVD decomposition is generally the most accurate but the slowest, normal +equations is the fastest but least accurate, and the QR decomposition is in between. + +\eigenAutoToc + + +\section LeastSquaresSVD Using the SVD decomposition + +The \link JacobiSVD::solve() solve() \endlink method in the JacobiSVD class can be directly used to +solve linear squares systems. It is not enough to compute only the singular values (the default for +this class); you also need the singular vectors but the thin SVD decomposition suffices for +computing least squares solutions: + + + + + + + +
Example:Output:
\include TutorialLinAlgSVDSolve.cpp \verbinclude TutorialLinAlgSVDSolve.out
+ +This is example from the page \link TutorialLinearAlgebra Linear algebra and decompositions \endlink. + + +\section LeastSquaresQR Using the QR decomposition + +The solve() method in QR decomposition classes also computes the least squares solution. There are +three QR decomposition classes: HouseholderQR (no pivoting, so fast but unstable), +ColPivHouseholderQR (column pivoting, thus a bit slower but more accurate) and FullPivHouseholderQR +(full pivoting, so slowest and most stable). Here is an example with column pivoting: + + + + + + + +
Example:Output:
\include LeastSquaresQR.cpp \verbinclude LeastSquaresQR.out
+ + +\section LeastSquaresNormalEquations Using normal equations + +Finding the least squares solution of \a Ax = \a b is equivalent to solving the normal equation +ATAx = ATb. This leads to the following code + + + + + + + +
Example:Output:
\include LeastSquaresNormalEquations.cpp \verbinclude LeastSquaresNormalEquations.out
+ +If the matrix \a A is ill-conditioned, then this is not a good method, because the condition number +of ATA is the square of the condition number of \a A. This means that you +lose twice as many digits using normal equation than if you use the other methods. + +*/ + +} \ No newline at end of file diff --git a/external/eigen3/doc/Manual.dox b/external/eigen3/doc/Manual.dox index 52427f0..342b145 100644 --- a/external/eigen3/doc/Manual.dox +++ b/external/eigen3/doc/Manual.dox @@ -3,19 +3,32 @@ namespace Eigen { +/** \page UserManual_CustomizingEigen Extending/Customizing Eigen + %Eigen can be extended in several ways, for instance, by defining global methods, by inserting custom methods within main %Eigen's classes through the \ref TopicCustomizing_Plugins "plugin" mechanism, by adding support to \ref TopicCustomizing_CustomScalar "custom scalar types" etc. See below for the respective sub-topics. + - \subpage TopicCustomizing_Plugins + - \subpage TopicCustomizing_InheritingMatrix + - \subpage TopicCustomizing_CustomScalar + - \subpage TopicCustomizing_NullaryExpr + - \subpage TopicNewExpressionType + \sa \ref TopicPreprocessorDirectives +*/ + + /** \page UserManual_Generalities General topics - \subpage Eigen2ToEigen3 - \subpage TopicFunctionTakingEigenTypes - \subpage TopicPreprocessorDirectives - \subpage TopicAssertions - - \subpage TopicCustomizingEigen - \subpage TopicMultiThreading + - \subpage TopicUsingBlasLapack - \subpage TopicUsingIntelMKL + - \subpage TopicCUDA - \subpage TopicPitfalls - \subpage TopicTemplateKeyword - \subpage UserManual_UnderstandingEigen + - \subpage TopicCMakeGuide */ - + /** \page UserManual_UnderstandingEigen Understanding Eigen - \subpage TopicInsideEigenExample - \subpage TopicClassHierarchy @@ -57,6 +70,8 @@ namespace Eigen { \ingroup DenseMatrixManipulation_chapter */ /** \addtogroup TutorialMapClass \ingroup DenseMatrixManipulation_chapter */ +/** \addtogroup TutorialReshapeSlicing + \ingroup DenseMatrixManipulation_chapter */ /** \addtogroup TopicAliasing \ingroup DenseMatrixManipulation_chapter */ /** \addtogroup TopicStorageOrders @@ -86,6 +101,9 @@ namespace Eigen { /** \addtogroup Householder_Module \ingroup DenseMatrixManipulation_Reference */ +/** \addtogroup CoeffwiseMathFunctions + \ingroup DenseMatrixManipulation_chapter */ + /** \addtogroup QuickRefPage \ingroup DenseMatrixManipulation_chapter */ @@ -97,6 +115,12 @@ namespace Eigen { \ingroup DenseLinearSolvers_chapter */ /** \addtogroup TopicLinearAlgebraDecompositions \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup LeastSquares + \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup InplaceDecomposition + \ingroup DenseLinearSolvers_chapter */ +/** \addtogroup DenseDecompositionBenchmark + \ingroup DenseLinearSolvers_chapter */ /** \addtogroup DenseLinearSolvers_Reference \ingroup DenseLinearSolvers_chapter */ @@ -159,4 +183,7 @@ namespace Eigen { \ingroup Geometry_Reference */ /** \addtogroup Splines_Module \ingroup Geometry_Reference */ + +/** \internal \brief Namespace containing low-level routines from the %Eigen library. */ +namespace internal {} } diff --git a/external/eigen3/doc/MatrixfreeSolverExample.dox b/external/eigen3/doc/MatrixfreeSolverExample.dox index 9921c72..3efa292 100644 --- a/external/eigen3/doc/MatrixfreeSolverExample.dox +++ b/external/eigen3/doc/MatrixfreeSolverExample.dox @@ -6,13 +6,12 @@ namespace Eigen { \eigenManualPage MatrixfreeSolverExample Matrix-free solvers Iterative solvers such as ConjugateGradient and BiCGSTAB can be used in a matrix free context. To this end, user must provide a wrapper class inheriting EigenBase<> and implementing the following methods: - - Index rows() and Index cols(): returns number of rows and columns respectively - - operator* with and Eigen dense column vector - - resize(rows,cols): needed for source compatibility (can stay empty) + - \c Index \c rows() and \c Index \c cols(): returns number of rows and columns respectively + - \c operator* with your type and an %Eigen dense column vector (its actual implementation goes in a specialization of the internal::generic_product_impl class) -Eigen::internal::traits<> must also be specialized for the wrapper type. +\c Eigen::internal::traits<> must also be specialized for the wrapper type. -For efficiency purpose, one might also want to provide a custom preconditioner. Here is an example using ConjugateGradient and implementing also a custom Jacobi preconditioner: +Here is a complete example wrapping an Eigen::SparseMatrix: \include matrixfree_cg.cpp Output: \verbinclude matrixfree_cg.out diff --git a/external/eigen3/doc/NewExpressionType.dox b/external/eigen3/doc/NewExpressionType.dox new file mode 100644 index 0000000..c2f2433 --- /dev/null +++ b/external/eigen3/doc/NewExpressionType.dox @@ -0,0 +1,143 @@ +namespace Eigen { + +/** \page TopicNewExpressionType Adding a new expression type + + +\warning +Disclaimer: this page is tailored to very advanced users who are not afraid of dealing with some %Eigen's internal aspects. +In most cases, a custom expression can be avoided by either using custom \ref MatrixBase::unaryExpr "unary" or \ref MatrixBase::binaryExpr "binary" functors, +while extremely complex matrix manipulations can be achieved by a nullary functors as described in the \ref TopicCustomizing_NullaryExpr "previous page". + +This page describes with the help of an example how to implement a new +light-weight expression type in %Eigen. This consists of three parts: +the expression type itself, a traits class containing compile-time +information about the expression, and the evaluator class which is +used to evaluate the expression to a matrix. + +\b TO \b DO: Write a page explaining the design, with details on +vectorization etc., and refer to that page here. + + +\eigenAutoToc + +\section TopicSetting The setting + +A circulant matrix is a matrix where each column is the same as the +column to the left, except that it is cyclically shifted downwards. +For example, here is a 4-by-4 circulant matrix: +\f[ \begin{bmatrix} + 1 & 8 & 4 & 2 \\ + 2 & 1 & 8 & 4 \\ + 4 & 2 & 1 & 8 \\ + 8 & 4 & 2 & 1 +\end{bmatrix} \f] +A circulant matrix is uniquely determined by its first column. We wish +to write a function \c makeCirculant which, given the first column, +returns an expression representing the circulant matrix. + +For simplicity, we restrict the \c makeCirculant function to dense +matrices. It may make sense to also allow arrays, or sparse matrices, +but we will not do so here. We also do not want to support +vectorization. + + +\section TopicPreamble Getting started + +We will present the file implementing the \c makeCirculant function +part by part. We start by including the appropriate header files and +forward declaring the expression class, which we will call +\c Circulant. The \c makeCirculant function will return an object of +this type. The class \c Circulant is in fact a class template; the +template argument \c ArgType refers to the type of the vector passed +to the \c makeCirculant function. + +\include make_circulant.cpp.preamble + + +\section TopicTraits The traits class + +For every expression class \c X, there should be a traits class +\c Traits in the \c Eigen::internal namespace containing +information about \c X known as compile time. + +As explained in \ref TopicSetting, we designed the \c Circulant +expression class to refer to dense matrices. The entries of the +circulant matrix have the same type as the entries of the vector +passed to the \c makeCirculant function. The type used to index the +entries is also the same. Again for simplicity, we will only return +column-major matrices. Finally, the circulant matrix is a square +matrix (number of rows equals number of columns), and the number of +rows equals the number of rows of the column vector passed to the +\c makeCirculant function. If this is a dynamic-size vector, then the +size of the circulant matrix is not known at compile-time. + +This leads to the following code: + +\include make_circulant.cpp.traits + + +\section TopicExpression The expression class + +The next step is to define the expression class itself. In our case, +we want to inherit from \c MatrixBase in order to expose the interface +for dense matrices. In the constructor, we check that we are passed a +column vector (see \ref TopicAssertions) and we store the vector from +which we are going to build the circulant matrix in the member +variable \c m_arg. Finally, the expression class should compute the +size of the corresponding circulant matrix. As explained above, this +is a square matrix with as many columns as the vector used to +construct the matrix. + +\b TO \b DO: What about the \c Nested typedef? It seems to be +necessary; is this only temporary? + +\include make_circulant.cpp.expression + + +\section TopicEvaluator The evaluator + +The last big fragment implements the evaluator for the \c Circulant +expression. The evaluator computes the entries of the circulant +matrix; this is done in the \c .coeff() member function. The entries +are computed by finding the corresponding entry of the vector from +which the circulant matrix is constructed. Getting this entry may +actually be non-trivial when the circulant matrix is constructed from +a vector which is given by a complicated expression, so we use the +evaluator which corresponds to the vector. + +The \c CoeffReadCost constant records the cost of computing an entry +of the circulant matrix; we ignore the index computation and say that +this is the same as the cost of computing an entry of the vector from +which the circulant matrix is constructed. + +In the constructor, we save the evaluator for the column vector which +defined the circulant matrix. We also save the size of that vector; +remember that we can query an expression object to find the size but +not the evaluator. + +\include make_circulant.cpp.evaluator + + +\section TopicEntry The entry point + +After all this, the \c makeCirculant function is very simple. It +simply creates an expression object and returns it. + +\include make_circulant.cpp.entry + + +\section TopicMain A simple main function for testing + +Finally, a short \c main function that shows how the \c makeCirculant +function can be called. + +\include make_circulant.cpp.main + +If all the fragments are combined, the following output is produced, +showing that the program works as expected: + +\include make_circulant.out + +*/ +} + diff --git a/external/eigen3/doc/Overview.dox b/external/eigen3/doc/Overview.dox index 9ab9623..dbb49bd 100644 --- a/external/eigen3/doc/Overview.dox +++ b/external/eigen3/doc/Overview.dox @@ -17,7 +17,9 @@ You're a MatLab user? There is also a short AS The \b main \b documentation is organized into \em chapters covering different domains of features. They are themselves composed of \em user \em manual pages describing the different features in a comprehensive way, and \em reference pages that gives you access to the API documentation through the related Eigen's \em modules and \em classes. -Under the \subpage UserManual_Generalities section, you will find documentation on more general topics such as preprocessor directives, controlling assertions, multi-threading, MKL support, some Eigen's internal insights, and much more... +Under the \subpage UserManual_CustomizingEigen section, you will find discussions and examples on extending %Eigen's features and supporting custom scalar types. + +Under the \subpage UserManual_Generalities section, you will find documentation on more general topics such as preprocessor directives, controlling assertions, multi-threading, MKL support, some Eigen's internal insights, and much more... Finally, do not miss the search engine, useful to quickly get to the documentation of a given class or function. diff --git a/external/eigen3/doc/PreprocessorDirectives.dox b/external/eigen3/doc/PreprocessorDirectives.dox index cfaba35..f01b39a 100644 --- a/external/eigen3/doc/PreprocessorDirectives.dox +++ b/external/eigen3/doc/PreprocessorDirectives.dox @@ -5,7 +5,7 @@ namespace Eigen { You can control some aspects of %Eigen by defining the preprocessor tokens using \c \#define. These macros should be defined before any %Eigen headers are included. Often they are best set in the project options. -This page lists the preprocesor tokens recognised by %Eigen. +This page lists the preprocessor tokens recognized by %Eigen. \eigenAutoToc @@ -18,25 +18,67 @@ one option, and other parts (or libraries that you use) are compiled with anothe fail to link or exhibit subtle bugs. Nevertheless, these options can be useful for people who know what they are doing. - - \b EIGEN2_SUPPORT - if defined, enables the Eigen2 compatibility mode. This is meant to ease the transition - of Eigen2 to Eigen3 (see \ref Eigen2ToEigen3). Not defined by default. - - \b EIGEN2_SUPPORT_STAGEnn_xxx (for various values of nn and xxx) - staged migration path from Eigen2 to - Eigen3; see \ref Eigen2SupportModes. + - \b EIGEN2_SUPPORT and \b EIGEN2_SUPPORT_STAGEnn_xxx are disabled starting from the 3.3 release. + Defining one of these will raise a compile-error. If you need to compile Eigen2 code, + check this site. - \b EIGEN_DEFAULT_DENSE_INDEX_TYPE - the type for column and row indices in matrices, vectors and array (DenseBase::Index). Set to \c std::ptrdiff_t by default. - \b EIGEN_DEFAULT_IO_FORMAT - the IOFormat to use when printing a matrix if no %IOFormat is specified. Defaults to the %IOFormat constructed by the default constructor IOFormat::IOFormat(). - \b EIGEN_INITIALIZE_MATRICES_BY_ZERO - if defined, all entries of newly constructed matrices and arrays are initialized to zero, as are new entries in matrices and arrays after resizing. Not defined by default. + \warning The unary (resp. binary) constructor of \c 1x1 (resp. \c 2x1 or \c 1x2) fixed size matrices is + always interpreted as an initialization constructor where the argument(s) are the coefficient values + and not the sizes. For instance, \code Vector2d v(2,1); \endcode will create a vector with coeficients [2,1], + and \b not a \c 2x1 vector initialized with zeros (i.e., [0,0]). If such cases might occur, then it is + recommended to use the default constructor with a explicit call to resize: + \code + Matrix v; + v.resize(size); + Matrix m; + m.resize(rows,cols); + \endcode - \b EIGEN_INITIALIZE_MATRICES_BY_NAN - if defined, all entries of newly constructed matrices and arrays are initialized to NaN, as are new entries in matrices and arrays after resizing. This option is especially useful for debugging purpose, though a memory tool like valgrind is preferable. Not defined by default. + \warning See the documentation of \c EIGEN_INITIALIZE_MATRICES_BY_ZERO for a discussion on a limitations + of these macros when applied to \c 1x1, \c 1x2, and \c 2x1 fixed-size matrices. - \b EIGEN_NO_AUTOMATIC_RESIZING - if defined, the matrices (or arrays) on both sides of an assignment a = b have to be of the same size; otherwise, %Eigen automatically resizes \c a so that it is of the correct size. Not defined by default. +\section TopicPreprocessorDirectivesCppVersion C++ standard features + +By default, %Eigen strive to automatically detect and enable langage features at compile-time based on +the information provided by the compiler. + + - \b EIGEN_MAX_CPP_VER - disables usage of C++ features requiring a version greater than EIGEN_MAX_CPP_VER. + Possible values are: 03, 11, 14, 17, etc. If not defined (the default), %Eigen enables all features supported + by the compiler. + +Individual features can be explicitly enabled or disabled by defining the following token to 0 or 1 respectively. +For instance, one might limit the C++ version to C++03 by defining EIGEN_MAX_CPP_VER=03, but still enable C99 math +functions by defining EIGEN_HAS_C99_MATH=1. + + - \b EIGEN_HAS_C99_MATH - controls the usage of C99 math functions such as erf, erfc, lgamma, etc. + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_CXX11_MATH - controls the implementation of some functions such as round, logp1, isinf, isnan, etc. + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_RVALUE_REFERENCES - defines whetehr rvalue references are supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_STD_RESULT_OF - defines whether std::result_of is supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_VARIADIC_TEMPLATES - defines whether variadic templates are supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_CONSTEXPR - defines whether relaxed const expression are supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<14. + - \b EIGEN_HAS_CXX11_CONTAINERS - defines whether STL's containers follows C++11 specifications + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + - \b EIGEN_HAS_CXX11_NOEXCEPT - defines whether noexcept is supported + Automatic detection disabled if EIGEN_MAX_CPP_VER<11. + \section TopicPreprocessorDirectivesAssertions Assertions The %Eigen library contains many assertions to guard against programming errors, both at compile time and at @@ -55,32 +97,39 @@ run time. However, these assertions do cost time and can thus be turned off. \section TopicPreprocessorDirectivesPerformance Alignment, vectorization and performance tweaking - - \b EIGEN_MALLOC_ALREADY_ALIGNED - Can be set to 0 or 1 to tell whether default system malloc already + - \b \c EIGEN_MALLOC_ALREADY_ALIGNED - Can be set to 0 or 1 to tell whether default system \c malloc already returns aligned buffers. In not defined, then this information is automatically deduced from the compiler and system preprocessor tokens. - - \b EIGEN_DONT_ALIGN - disables alignment completely. %Eigen will not try to align its objects and does not - expect that any objects passed to it are aligned. This will turn off vectorization. Not defined by default. - - \b EIGEN_DONT_ALIGN_STATICALLY - disables alignment of arrays on the stack. Not defined by default, unless - \c EIGEN_DONT_ALIGN is defined. - - \b EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP. + - \b \c EIGEN_MAX_ALIGN_BYTES - Must be a power of two, or 0. Defines an upper bound on the memory boundary in bytes on which dynamically and statically allocated data may be aligned by %Eigen. If not defined, a default value is automatically computed based on architecture, compiler, and OS. + This option is typically used to enforce binary compatibility between code/libraries compiled with different SIMD options. For instance, one may compile AVX code and enforce ABI compatibility with existing SSE code by defining \c EIGEN_MAX_ALIGN_BYTES=16. In the other way round, since by default AVX implies 32 bytes alignment for best performance, one can compile SSE code to be ABI compatible with AVX code by defining \c EIGEN_MAX_ALIGN_BYTES=32. + - \b \c EIGEN_MAX_STATIC_ALIGN_BYTES - Same as \c EIGEN_MAX_ALIGN_BYTES but for statically allocated data only. By default, if only \c EIGEN_MAX_ALIGN_BYTES is defined, then \c EIGEN_MAX_STATIC_ALIGN_BYTES == \c EIGEN_MAX_ALIGN_BYTES, otherwise a default value is automatically computed based on architecture, compiler, and OS (can be smaller than the default value of EIGEN_MAX_ALIGN_BYTES on architectures that do not support stack alignment). + Let us emphasize that \c EIGEN_MAX_*_ALIGN_BYTES define only a diserable upper bound. In practice data is aligned to largest power-of-two common divisor of \c EIGEN_MAX_STATIC_ALIGN_BYTES and the size of the data, such that memory is not wasted. + - \b \c EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP. See \ref TopicMultiThreading for details. - \b EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN. - - \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently + - \b \c EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled). + If set to 0 (disabled), then expression for which the destination cannot be aligned are not vectorized (e.g., unaligned + small fixed size vectors or matrices) + - \b \c EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently enables the SSE vectorization of sin() and cos(), and speedups sqrt() for single precision. Defined to 1 by default. Define it to 0 to disable. - - \b EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable + - \b \c EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable unrolling. The size of a loop here is expressed in %Eigen's own notion of "number of FLOPS", it does not correspond to the number of iterations or the number of instructions. The default is value 100. - - \b EIGEN_STACK_ALLOCATION_LIMIT - defines the maximum bytes for a buffer to be allocated on the stack. For internal + - \b \c EIGEN_STACK_ALLOCATION_LIMIT - defines the maximum bytes for a buffer to be allocated on the stack. For internal temporary buffers, dynamic memory allocation is employed as a fall back. For fixed-size matrices or arrays, exceeding this threshold raises a compile time assertion. Use 0 to set no limit. Default is 128 KB. + - \c EIGEN_DONT_ALIGN - Deprecated, it is a synonym for \c EIGEN_MAX_ALIGN_BYTES=0. It disables alignment completely. %Eigen will not try to align its objects and does not expect that any objects passed to it are aligned. This will turn off vectorization if \b EIGEN_UNALIGNED_VECTORIZE=1. Not defined by default. + - \c EIGEN_DONT_ALIGN_STATICALLY - Deprecated, it is a synonym for \c EIGEN_MAX_STATIC_ALIGN_BYTES=0. It disables alignment of arrays on the stack. Not defined by default, unless \c EIGEN_DONT_ALIGN is defined. + + \section TopicPreprocessorDirectivesPlugins Plugins It is possible to add new methods to many fundamental classes in %Eigen by writing a plugin. As explained in -the section \ref ExtendingMatrixBase, the plugin is specified by defining a \c EIGEN_xxx_PLUGIN macro. The +the section \ref TopicCustomizing_Plugins, the plugin is specified by defining a \c EIGEN_xxx_PLUGIN macro. The following macros are supported; none of them are defined by default. - \b EIGEN_ARRAY_PLUGIN - filename of plugin for extending the Array class. @@ -92,6 +141,7 @@ following macros are supported; none of them are defined by default. - \b EIGEN_MATRIXBASE_PLUGIN - filename of plugin for extending the MatrixBase class. - \b EIGEN_PLAINOBJECTBASE_PLUGIN - filename of plugin for extending the PlainObjectBase class. - \b EIGEN_MAPBASE_PLUGIN - filename of plugin for extending the MapBase class. + - \b EIGEN_QUATERNION_PLUGIN - filename of plugin for extending the Quaternion class. - \b EIGEN_QUATERNIONBASE_PLUGIN - filename of plugin for extending the QuaternionBase class. - \b EIGEN_SPARSEMATRIX_PLUGIN - filename of plugin for extending the SparseMatrix class. - \b EIGEN_SPARSEMATRIXBASE_PLUGIN - filename of plugin for extending the SparseMatrixBase class. diff --git a/external/eigen3/doc/QuickReference.dox b/external/eigen3/doc/QuickReference.dox index a4be0f6..44f5410 100644 --- a/external/eigen3/doc/QuickReference.dox +++ b/external/eigen3/doc/QuickReference.dox @@ -13,17 +13,17 @@ The Eigen library is divided in a Core module and several additional modules. Ea - + - - - - - - - - - + + + + + + + + +
ModuleHeader fileContents
\link Core_Module Core \endlink\code#include \endcodeMatrix and Array classes, basic linear algebra (including triangular and selfadjoint products), array manipulation
\link Core_Module Core \endlink\code#include \endcodeMatrix and Array classes, basic linear algebra (including triangular and selfadjoint products), array manipulation
\link Geometry_Module Geometry \endlink\code#include \endcodeTransform, Translation, Scaling, Rotation2D and 3D rotations (Quaternion, AngleAxis)
\link LU_Module LU \endlink\code#include \endcodeInverse, determinant, LU decompositions with solver (FullPivLU, PartialPivLU)
\link Cholesky_Module Cholesky \endlink\code#include \endcodeLLT and LDLT Cholesky factorization with solver
\link Householder_Module Householder \endlink\code#include \endcodeHouseholder transformations; this module is used by several linear algebra modules
\link SVD_Module SVD \endlink\code#include \endcodeSVD decomposition with least-squares solver (JacobiSVD)
\link QR_Module QR \endlink\code#include \endcodeQR decomposition with solver (HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR)
\link Eigenvalues_Module Eigenvalues \endlink\code#include \endcodeEigenvalue, eigenvector decompositions (EigenSolver, SelfAdjointEigenSolver, ComplexEigenSolver)
\link Sparse_modules Sparse \endlink\code#include \endcode%Sparse matrix storage and related basic linear algebra (SparseMatrix, DynamicSparseMatrix, SparseVector)
\code#include \endcodeIncludes Core, Geometry, LU, Cholesky, SVD, QR, and Eigenvalues header files
\code#include \endcodeIncludes %Dense and %Sparse header files (the whole Eigen library)
\link LU_Module LU \endlink\code#include \endcodeInverse, determinant, LU decompositions with solver (FullPivLU, PartialPivLU)
\link Cholesky_Module Cholesky \endlink\code#include \endcodeLLT and LDLT Cholesky factorization with solver
\link Householder_Module Householder \endlink\code#include \endcodeHouseholder transformations; this module is used by several linear algebra modules
\link SVD_Module SVD \endlink\code#include \endcodeSVD decompositions with least-squares solver (JacobiSVD, BDCSVD)
\link QR_Module QR \endlink\code#include \endcodeQR decomposition with solver (HouseholderQR, ColPivHouseholderQR, FullPivHouseholderQR)
\link Eigenvalues_Module Eigenvalues \endlink\code#include \endcodeEigenvalue, eigenvector decompositions (EigenSolver, SelfAdjointEigenSolver, ComplexEigenSolver)
\link Sparse_Module Sparse \endlink\code#include \endcode%Sparse matrix storage and related basic linear algebra (SparseMatrix, SparseVector) \n (see \ref SparseQuickRefPage for details on sparse modules)
\code#include \endcodeIncludes Core, Geometry, LU, Cholesky, SVD, QR, and Eigenvalues header files
\code#include \endcodeIncludes %Dense and %Sparse header files (the whole Eigen library)
top @@ -340,7 +340,7 @@ mat1 = mat2.adjoint(); mat1.adjointInPlace(); \endcode -\link MatrixBase::dot() dot \endlink product \n inner product \matrixworld\code +\link MatrixBase::dot dot \endlink product \n inner product \matrixworld\code scalar = vec1.dot(vec2); scalar = col1.adjoint() * col2; scalar = (col1.adjoint() * col2).value();\endcode @@ -364,32 +364,10 @@ vec3 = vec1.cross(vec2);\endcode top \section QuickRef_Coeffwise Coefficient-wise \& Array operators -Coefficient-wise operators for matrices and vectors: - - - -
Matrix API \matrixworldVia Array conversions
\code -mat1.cwiseMin(mat2) -mat1.cwiseMax(mat2) -mat1.cwiseAbs2() -mat1.cwiseAbs() -mat1.cwiseSqrt() -mat1.cwiseProduct(mat2) -mat1.cwiseQuotient(mat2)\endcode -\code -mat1.array().min(mat2.array()) -mat1.array().max(mat2.array()) -mat1.array().abs2() -mat1.array().abs() -mat1.array().sqrt() -mat1.array() * mat2.array() -mat1.array() / mat2.array() -\endcode
- -It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with std::ptr_fun: -\code mat1.unaryExpr(std::ptr_fun(foo))\endcode -Array operators:\arrayworld +In addition to the aforementioned operators, Eigen supports numerous coefficient-wise operator and functions. +Most of them unambiguously makes sense in array-world\arrayworld. The following operators are readily available for arrays, +or available through .array() for vectors and matrices: - +
Arithmetic operators\code @@ -400,28 +378,108 @@ array1 + scalar array1 - scalar array1 += scalar array1 -= scalar array1 < array2 array1 > array2 array1 < scalar array1 > scalar array1 <= array2 array1 >= array2 array1 <= scalar array1 >= scalar array1 == array2 array1 != array2 array1 == scalar array1 != scalar +array1.min(array2) array1.max(array2) array1.min(scalar) array1.max(scalar) \endcode
Trigo, power, and \n misc functions \n and the STL variants\code -array1.min(array2) -array1.max(array2) +
Trigo, power, and \n misc functions \n and the STL-like variants\code array1.abs2() array1.abs() abs(array1) array1.sqrt() sqrt(array1) array1.log() log(array1) +array1.log10() log10(array1) array1.exp() exp(array1) -array1.pow(exponent) pow(array1,exponent) +array1.pow(array2) pow(array1,array2) +array1.pow(scalar) pow(array1,scalar) + pow(scalar,array2) array1.square() array1.cube() array1.inverse() + array1.sin() sin(array1) array1.cos() cos(array1) array1.tan() tan(array1) array1.asin() asin(array1) array1.acos() acos(array1) +array1.atan() atan(array1) +array1.sinh() sinh(array1) +array1.cosh() cosh(array1) +array1.tanh() tanh(array1) +array1.arg() arg(array1) + +array1.floor() floor(array1) +array1.ceil() ceil(array1) +array1.round() round(aray1) + +array1.isFinite() isfinite(array1) +array1.isInf() isinf(array1) +array1.isNaN() isnan(array1) +\endcode +
+ + +The following coefficient-wise operators are available for all kind of expressions (matrices, vectors, and arrays), and for both real or complex scalar types: + + + +
Eigen's APISTL-like APIs\arrayworld Comments
\code +mat1.real() +mat1.imag() +mat1.conjugate() +\endcode +\code +real(array1) +imag(array1) +conj(array1) +\endcode + +\code + // read-write, no-op for real expressions + // read-only for real, read-write for complexes + // no-op for real expressions \endcode
+Some coefficient-wise operators are readily available for for matrices and vectors through the following cwise* methods: + + + +
Matrix API \matrixworldVia Array conversions
\code +mat1.cwiseMin(mat2) mat1.cwiseMin(scalar) +mat1.cwiseMax(mat2) mat1.cwiseMax(scalar) +mat1.cwiseAbs2() +mat1.cwiseAbs() +mat1.cwiseSqrt() +mat1.cwiseInverse() +mat1.cwiseProduct(mat2) +mat1.cwiseQuotient(mat2) +mat1.cwiseEqual(mat2) mat1.cwiseEqual(scalar) +mat1.cwiseNotEqual(mat2) +\endcode +\code +mat1.array().min(mat2.array()) mat1.array().min(scalar) +mat1.array().max(mat2.array()) mat1.array().max(scalar) +mat1.array().abs2() +mat1.array().abs() +mat1.array().sqrt() +mat1.array().inverse() +mat1.array() * mat2.array() +mat1.array() / mat2.array() +mat1.array() == mat2.array() mat1.array() == scalar +mat1.array() != mat2.array() +\endcode
+The main difference between the two API is that the one based on cwise* methods returns an expression in the matrix world, +while the second one (based on .array()) returns an array expression. +Recall that .array() has no cost, it only changes the available API and interpretation of the data. + +It is also very simple to apply any user defined function \c foo using DenseBase::unaryExpr together with std::ptr_fun (c++03), std::ref (c++11), or lambdas (c++11): +\code +mat1.unaryExpr(std::ptr_fun(foo)); +mat1.unaryExpr(std::ref(foo)); +mat1.unaryExpr([](double x) { return foo(x); }); +\endcode + + top \section QuickRef_Reductions Reductions diff --git a/external/eigen3/doc/SparseLinearSystems.dox b/external/eigen3/doc/SparseLinearSystems.dox index b66e2ba..fc33b93 100644 --- a/external/eigen3/doc/SparseLinearSystems.dox +++ b/external/eigen3/doc/SparseLinearSystems.dox @@ -4,33 +4,63 @@ In Eigen, there are several methods available to solve linear systems when the c \eigenAutoToc -\section TutorialSparseDirectSolvers Sparse solvers +\section TutorialSparseSolverList List of sparse solvers -%Eigen currently provides a limited set of built-in solvers, as well as wrappers to external solver libraries. -They are summarized in the following table: +%Eigen currently provides a wide set of built-in solvers, as well as wrappers to external solver libraries. +They are summarized in the following tables: + +\subsection TutorialSparseSolverList_Direct Built-in direct solvers - - - - + + + + + - - + + + - - - - - - - + + - - + + + + - - + + +
ClassModuleSolver kindMatrix kindFeatures related to performanceDependencies,License

Notes

SimplicialLLT \link SparseCholesky_Module SparseCholesky \endlinkDirect LLt factorizationSPDFill-in reducingbuilt-in, LGPL
ClassSolver kindMatrix kindFeatures related to performanceLicense

Notes

SimplicialLLT \n \#includeDirect LLt factorizationSPDFill-in reducingLGPL SimplicialLDLT is often preferable
SimplicialLDLT \link SparseCholesky_Module SparseCholesky \endlinkDirect LDLt factorizationSPDFill-in reducingbuilt-in, LGPL
SimplicialLDLT \n \#includeDirect LDLt factorizationSPDFill-in reducingLGPL Recommended for very sparse and not too large problems (e.g., 2D Poisson eq.)
ConjugateGradient\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkClassic iterative CGSPDPreconditionningbuilt-in, MPL2Recommended for large symmetric problems (e.g., 3D Poisson eq.)
BiCGSTAB\link IterativeLinearSolvers_Module IterativeLinearSolvers \endlinkIterative stabilized bi-conjugate gradientSquarePreconditionningbuilt-in, MPL2To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.
SparseLU \link SparseLU_Module SparseLU \endlink LU factorization
SparseLU \n \#include LU factorization Square Fill-in reducing, Leverage fast dense algebra built-in, MPL2 optimized for small and large problems with irregular patterns
SparseQR \link SparseQR_Module SparseQR \endlink QR factorizationMPL2optimized for small and large problems with irregular patterns
SparseQR \n \#include QR factorization Any, rectangular Fill-in reducingbuilt-in, MPL2recommended for least-square problems, has a basic rank-revealing feature
Wrappers to external solvers
MPL2recommended for least-square problems, has a basic rank-revealing feature
+ +\subsection TutorialSparseSolverList_Iterative Built-in iterative solvers + + + + + + + + + + + + + + + + + + + +
ClassSolver kindMatrix kindSupported preconditioners, [default]License

Notes

ConjugateGradient \n \#include Classic iterative CGSPDIdentityPreconditioner, [DiagonalPreconditioner], IncompleteCholeskyMPL2Recommended for large symmetric problems (e.g., 3D Poisson eq.)
LeastSquaresConjugateGradient \n \#includeCG for rectangular least-square problemRectangularIdentityPreconditioner, [LeastSquareDiagonalPreconditioner]MPL2Solve for min |A'Ax-b|^2 without forming A'A
BiCGSTAB \n \#includeIterative stabilized bi-conjugate gradientSquareIdentityPreconditioner, [DiagonalPreconditioner], IncompleteLUTMPL2To speedup the convergence, try it with the \ref IncompleteLUT preconditioner.
+ +\subsection TutorialSparseSolverList_Wrapper Wrappers to external solvers + + + + @@ -53,6 +83,8 @@ They are summarized in the following table: Here \c SPD means symmetric positive definite. +\section TutorialSparseSolverConcept Sparse solver concept + All these solvers follow the same general concept. Here is a typical and general example: \code @@ -104,8 +136,10 @@ x2 = solver.solve(b2); \endcode The compute() method is equivalent to calling both analyzePattern() and factorize(). -Finally, each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on. -More details are availble in the documentations of the respective classes. +Each solver provides some specific features, such as determinant, access to the factors, controls of the iterations, and so on. +More details are available in the documentations of the respective classes. + +Finally, most of the iterative solvers, can also be used in a \b matrix-free context, see the following \link MatrixfreeSolverExample example \endlink. \section TheSparseCompute The Compute Step In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices, LU for non hermitian matrices and QR for rectangular matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). @@ -143,7 +177,16 @@ x2 = solver.solve(b2); For direct methods, the solution are computed at the machine precision. Sometimes, the solution need not be too accurate. In this case, the iterative methods are more suitable and the desired accuracy can be set before the solve step using \b setTolerance(). For all the available functions, please, refer to the documentation of the \link IterativeLinearSolvers_Module Iterative solvers module \endlink. \section BenchmarkRoutine -Most of the time, all you need is to know how much time it will take to qolve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. In the build directory, navigate to bench/spbench and compile the routine by typing \b make \e spbenchsolver. Run it with --help option to get the list of all available options. Basically, the matrices to test should be in MatrixMarket Coordinate format, and the routine returns the statistics from all available solvers in Eigen. +Most of the time, all you need is to know how much time it will take to solve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. In the build directory, navigate to bench/spbench and compile the routine by typing \b make \e spbenchsolver. Run it with --help option to get the list of all available options. Basically, the matrices to test should be in MatrixMarket Coordinate format, and the routine returns the statistics from all available solvers in Eigen. + +To export your matrices and right-hand-side vectors in the matrix-market format, you can the the unsupported SparseExtra module: +\code +#include +... +Eigen::saveMarket(A, "filename.mtx"); +Eigen::saveMarket(A, "filename_SPD.mtx", Eigen::Symmetric); // if A is symmetric-positive-definite +Eigen::saveMarketVector(B, "filename_b.mtx"); +\endcode The following table gives an example of XML statistics from several Eigen built-in and external solvers.
ClassModuleSolver kindMatrix kindFeatures related to performanceDependencies,License

Notes

PastixLLT \n PastixLDLT \n PastixLU\link PaStiXSupport_Module PaStiXSupport \endlinkDirect LLt, LDLt, LU factorizationsSPD \n SPD \n SquareFill-in reducing, Leverage fast dense algebra, Multithreading Requires the PaStiX package, \b CeCILL-C optimized for tough problems and symmetric patterns
diff --git a/external/eigen3/doc/SparseQuickReference.dox b/external/eigen3/doc/SparseQuickReference.dox index e0a30ed..a25622e 100644 --- a/external/eigen3/doc/SparseQuickReference.dox +++ b/external/eigen3/doc/SparseQuickReference.dox @@ -206,7 +206,7 @@ See \ref TutorialSparse_SubMatrices and below for read-write sub-matrices. sm1.innerVectors(start, size); // RW sm1.leftCols(size); // RW sm2.rightCols(size); // RO because sm2 is row-major - sm1.middleRows(start, numRows); // RO becasue sm1 is column-major + sm1.middleRows(start, numRows); // RO because sm1 is column-major sm1.middleCols(start, numCols); // RW sm1.col(j); // RW \endcode @@ -253,6 +253,20 @@ If the matrix is not in compressed form, makeCompressed() should be called befor Note that these functions are mostly provided for interoperability purposes with external libraries.\n A better access to the values of the matrix is done by using the InnerIterator class as described in \link TutorialSparse the Tutorial Sparse \endlink section + + + + +
Mapping external buffers
+\code +int outerIndexPtr[cols+1]; +int innerIndices[nnz]; +double values[nnz]; +Map > sm1(rows,cols,nnz,outerIndexPtr, // read-write + innerIndices,values); +Map > sm2(...); // read-only +\endcode +As for dense matrices, class Map can be used to see external buffers as an %Eigen's SparseMatrix object.
*/ } diff --git a/external/eigen3/doc/StlContainers.dox b/external/eigen3/doc/StlContainers.dox index d8d0d52..e0f8714 100644 --- a/external/eigen3/doc/StlContainers.dox +++ b/external/eigen3/doc/StlContainers.dox @@ -4,7 +4,7 @@ namespace Eigen { \eigenAutoToc -\section summary Executive summary +\section StlContainers_summary Executive summary Using STL containers on \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types", or classes having members of such types, requires taking the following two steps: @@ -28,7 +28,7 @@ std::map, \endcode Note that the third parameter "std::less" is just the default value, but we have to include it because we want to specify the fourth parameter, which is the allocator type. -\section vector The case of std::vector +\section StlContainers_vector The case of std::vector The situation with std::vector was even worse (explanation below) so we had to specialize it for the Eigen::aligned_allocator type. In practice you \b must use the Eigen::aligned_allocator (not another aligned allocator), \b and \#include . diff --git a/external/eigen3/doc/StructHavingEigenMembers.dox b/external/eigen3/doc/StructHavingEigenMembers.dox index 74a8d52..7fbed0e 100644 --- a/external/eigen3/doc/StructHavingEigenMembers.dox +++ b/external/eigen3/doc/StructHavingEigenMembers.dox @@ -4,11 +4,11 @@ namespace Eigen { \eigenAutoToc -\section summary Executive Summary +\section StructHavingEigenMembers_summary Executive Summary -If you define a structure having members of \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types", you must overload its "operator new" so that it generates 16-bytes-aligned pointers. Fortunately, Eigen provides you with a macro EIGEN_MAKE_ALIGNED_OPERATOR_NEW that does that for you. +If you define a structure having members of \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen types", you must overload its "operator new" so that it generates 16-bytes-aligned pointers. Fortunately, %Eigen provides you with a macro EIGEN_MAKE_ALIGNED_OPERATOR_NEW that does that for you. -\section what What kind of code needs to be changed? +\section StructHavingEigenMembers_what What kind of code needs to be changed? The kind of code that needs to be changed is this: @@ -27,7 +27,7 @@ Foo *foo = new Foo; In other words: you have a class that has as a member a \ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen object", and then you dynamically create an object of that class. -\section how How should such code be modified? +\section StructHavingEigenMembers_how How should such code be modified? Very easy, you just need to put a EIGEN_MAKE_ALIGNED_OPERATOR_NEW macro in a public part of your class, like this: @@ -48,9 +48,9 @@ Foo *foo = new Foo; This macro makes "new Foo" always return an aligned pointer. -If this approach is too intrusive, see also the \ref othersolutions. +If this approach is too intrusive, see also the \ref StructHavingEigenMembers_othersolutions "other solutions". -\section why Why is this needed? +\section StructHavingEigenMembers_why Why is this needed? OK let's say that your code looks like this: @@ -67,7 +67,7 @@ class Foo Foo *foo = new Foo; \endcode -A Eigen::Vector2d consists of 2 doubles, which is 128 bits. Which is exactly the size of a SSE packet, which makes it possible to use SSE for all sorts of operations on this vector. But SSE instructions (at least the ones that Eigen uses, which are the fast ones) require 128-bit alignment. Otherwise you get a segmentation fault. +A Eigen::Vector2d consists of 2 doubles, which is 128 bits. Which is exactly the size of a SSE packet, which makes it possible to use SSE for all sorts of operations on this vector. But SSE instructions (at least the ones that %Eigen uses, which are the fast ones) require 128-bit alignment. Otherwise you get a segmentation fault. For this reason, Eigen takes care by itself to require 128-bit alignment for Eigen::Vector2d, by doing two things: \li Eigen requires 128-bit alignment for the Eigen::Vector2d's array (of 2 doubles). With GCC, this is done with a __attribute__ ((aligned(16))). @@ -81,7 +81,7 @@ The alignment attribute of the member v is then relative to the start of the cla The solution is to let class Foo have an aligned "operator new", as we showed in the previous section. -\section movetotop Should I then put all the members of Eigen types at the beginning of my class? +\section StructHavingEigenMembers_movetotop Should I then put all the members of Eigen types at the beginning of my class? That's not required. Since Eigen takes care of declaring 128-bit alignment, all members that need it are automatically 128-bit aligned relatively to the class. So code like this works fine: @@ -95,15 +95,15 @@ public: }; \endcode -\section dynamicsize What about dynamic-size matrices and vectors? +\section StructHavingEigenMembers_dynamicsize What about dynamic-size matrices and vectors? Dynamic-size matrices and vectors, such as Eigen::VectorXd, allocate dynamically their own array of coefficients, so they take care of requiring absolute alignment automatically. So they don't cause this issue. The issue discussed here is only with \ref TopicFixedSizeVectorizable "fixed-size vectorizable matrices and vectors". -\section bugineigen So is this a bug in Eigen? +\section StructHavingEigenMembers_bugineigen So is this a bug in Eigen? No, it's not our bug. It's more like an inherent problem of the C++98 language specification, and seems to be taken care of in the upcoming language revision: see this document. -\section conditional What if I want to do this conditionnally (depending on template parameters) ? +\section StructHavingEigenMembers_conditional What if I want to do this conditionnally (depending on template parameters) ? For this situation, we offer the macro EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign). It will generate aligned operators like EIGEN_MAKE_ALIGNED_OPERATOR_NEW if NeedsToAlign is true. It will generate operators with the default alignment if NeedsToAlign is false. @@ -128,7 +128,7 @@ Foo<3> *foo3 = new Foo<3>; // foo3 has only the system default alignment guarant \endcode -\section othersolutions Other solutions +\section StructHavingEigenMembers_othersolutions Other solutions In case putting the EIGEN_MAKE_ALIGNED_OPERATOR_NEW macro everywhere is too intrusive, there exists at least two other solutions. diff --git a/external/eigen3/doc/TemplateKeyword.dox b/external/eigen3/doc/TemplateKeyword.dox index f4e4f23..b84cfda 100644 --- a/external/eigen3/doc/TemplateKeyword.dox +++ b/external/eigen3/doc/TemplateKeyword.dox @@ -5,7 +5,8 @@ namespace Eigen { There are two uses for the \c template and \c typename keywords in C++. One of them is fairly well known amongst programmers: to define templates. The other use is more obscure: to specify that an expression refers to a template function or a type. This regularly trips up programmers that use the %Eigen library, often -leading to error messages from the compiler that are difficult to understand. +leading to error messages from the compiler that are difficult to understand, such as "expected expression" or +"no match for operator<". \eigenAutoToc @@ -72,23 +73,23 @@ for operator<". The reason that the \c template keyword is necessary in the last example has to do with the rules for how templates are supposed to be compiled in C++. The compiler has to check the code for correct syntax at the point where the template is defined, without knowing the actual value of the template arguments (\c Derived1 -and \c Derived2 in the example). That means that the compiler cannot know that dst.triangularPart is +and \c Derived2 in the example). That means that the compiler cannot know that dst.triangularView is a member template and that the following < symbol is part of the delimiter for the template -parameter. Another possibility would be that dst.triangularPart is a member variable with the < +parameter. Another possibility would be that dst.triangularView is a member variable with the < symbol refering to the operator<() function. In fact, the compiler should choose the second -possibility, according to the standard. If dst.triangularPart is a member template (as in our case), +possibility, according to the standard. If dst.triangularView is a member template (as in our case), the programmer should specify this explicitly with the \c template keyword and write dst.template -triangularPart. +triangularView. The precise rules are rather complicated, but ignoring some subtleties we can summarize them as follows: - A dependent name is name that depends (directly or indirectly) on a template parameter. In the example, \c dst is a dependent name because it is of type MatrixBase<Derived1> which depends on the template parameter \c Derived1. -- If the code contains either one of the contructions xxx.yyy or xxx->yyy and \c xxx is a +- If the code contains either one of the constructs xxx.yyy or xxx->yyy and \c xxx is a dependent name and \c yyy refers to a member template, then the \c template keyword must be used before \c yyy, leading to xxx.template yyy or xxx->template yyy. -- If the code contains the contruction xxx::yyy and \c xxx is a dependent name and \c yyy refers to a - member typedef, then the \c typename keyword must be used before the whole construction, leading to +- If the code contains the construct xxx::yyy and \c xxx is a dependent name and \c yyy refers to a + member typedef, then the \c typename keyword must be used before the whole construct, leading to typename xxx::yyy. As an example where the \c typename keyword is required, consider the following code in \ref TutorialSparse diff --git a/external/eigen3/doc/TopicAliasing.dox b/external/eigen3/doc/TopicAliasing.dox index c2654ae..a8f1644 100644 --- a/external/eigen3/doc/TopicAliasing.dox +++ b/external/eigen3/doc/TopicAliasing.dox @@ -153,10 +153,11 @@ not necessary to evaluate the right-hand side explicitly. \section TopicAliasingMatrixMult Aliasing and matrix multiplication -Matrix multiplication is the only operation in %Eigen that assumes aliasing by default. Thus, if \c matA is a -matrix, then the statement matA = matA * matA; is safe. All other operations in %Eigen assume that -there are no aliasing problems, either because the result is assigned to a different matrix or because it is a -component-wise operation. +Matrix multiplication is the only operation in %Eigen that assumes aliasing by default, under the +condition that the destination matrix is not resized. +Thus, if \c matA is a \b squared matrix, then the statement matA = matA * matA; is safe. +All other operations in %Eigen assume that there are no aliasing problems, +either because the result is assigned to a different matrix or because it is a component-wise operation. @@ -198,6 +199,27 @@ may get wrong results: \verbinclude TopicAliasing_mult3.out
ExampleOutput
+Moreover, starting in Eigen 3.3, aliasing is \b not assumed if the destination matrix is resized and the product is not directly assigned to the destination. +Therefore, the following example is also wrong: + + + + +
ExampleOutput
+\include TopicAliasing_mult4.cpp + +\verbinclude TopicAliasing_mult4.out +
+ +As for any aliasing issue, you can resolve it by explicitly evaluating the expression prior to assignment: + + + +
ExampleOutput
+\include TopicAliasing_mult5.cpp + +\verbinclude TopicAliasing_mult5.out +
\section TopicAliasingSummary Summary diff --git a/external/eigen3/doc/TopicCMakeGuide.dox b/external/eigen3/doc/TopicCMakeGuide.dox new file mode 100644 index 0000000..896cfa8 --- /dev/null +++ b/external/eigen3/doc/TopicCMakeGuide.dox @@ -0,0 +1,52 @@ +namespace Eigen { + +/** + +\page TopicCMakeGuide Using %Eigen in CMake Projects + +%Eigen provides native CMake support which allows the library to be easily +used in CMake projects. + +\note %CMake 3.0 (or later) is required to enable this functionality. + +%Eigen exports a CMake target called `Eigen3::Eigen` which can be imported +using the `find_package` CMake command and used by calling +`target_link_libraries` as in the following example: +\code{.cmake} +cmake_minimum_required (VERSION 3.0) +project (myproject) + +find_package (Eigen3 3.3 REQUIRED NO_MODULE) + +add_executable (example example.cpp) +target_link_libraries (example Eigen3::Eigen) +\endcode + +The above code snippet must be placed in a file called `CMakeLists.txt` alongside +`example.cpp`. After running +\code{.sh} +$ cmake path-to-example-directory +\endcode +CMake will produce project files that generate an executable called `example` +which requires at least version 3.3 of %Eigen. Here, `path-to-example-directory` +is the path to the directory that contains both `CMakeLists.txt` and +`example.cpp`. + +If you have multiple installed version of %Eigen, you can pick your favorite one by setting the \c Eigen3_DIR cmake's variable to the respective path containing the \c Eigen3*.cmake files. For instance: +\code +cmake path-to-example-directory -DEigen3_DIR=$HOME/mypackages/share/eigen3/cmake/ +\endcode + +If the `REQUIRED` option is omitted when locating %Eigen using +`find_package`, one can check whether the package was found as follows: +\code{.cmake} +find_package (Eigen3 3.3 NO_MODULE) + +if (TARGET Eigen3::Eigen) + # Use the imported target +endif (TARGET Eigen3::Eigen) +\endcode + +*/ + +} diff --git a/external/eigen3/doc/TopicLazyEvaluation.dox b/external/eigen3/doc/TopicLazyEvaluation.dox index 393bc41..101ef8c 100644 --- a/external/eigen3/doc/TopicLazyEvaluation.dox +++ b/external/eigen3/doc/TopicLazyEvaluation.dox @@ -36,7 +36,7 @@ Here is now a more involved example: Eigen chooses lazy evaluation at every stage in that example, which is clearly the correct choice. In fact, lazy evaluation is the "default choice" and Eigen will choose it except in a few circumstances. -The first circumstance in which Eigen chooses immediate evaluation, is when it sees an assignment a = b; and the expression \c b has the evaluate-before-assigning \link flags flag\endlink. The most important example of such an expression is the \link GeneralProduct matrix product expression\endlink. For example, when you do +The first circumstance in which Eigen chooses immediate evaluation, is when it sees an assignment a = b; and the expression \c b has the evaluate-before-assigning \link flags flag\endlink. The most important example of such an expression is the \link Product matrix product expression\endlink. For example, when you do \code matrix = matrix * matrix; \endcode @@ -48,7 +48,7 @@ What if you know that the result does no alias the operand of the product and wa Here, since we know that matrix2 is not the same matrix as matrix1, we know that lazy evaluation is not dangerous, so we may force lazy evaluation. Concretely, the effect of noalias() here is to bypass the evaluate-before-assigning \link flags flag\endlink. -The second circumstance in which Eigen chooses immediate evaluation, is when it sees a nested expression such as a + b where \c b is already an expression having the evaluate-before-nesting \link flags flag\endlink. Again, the most important example of such an expression is the \link GeneralProduct matrix product expression\endlink. For example, when you do +The second circumstance in which Eigen chooses immediate evaluation, is when it sees a nested expression such as a + b where \c b is already an expression having the evaluate-before-nesting \link flags flag\endlink. Again, the most important example of such an expression is the \link Product matrix product expression\endlink. For example, when you do \code matrix1 = matrix2 + matrix3 * matrix4; \endcode diff --git a/external/eigen3/doc/TopicLinearAlgebraDecompositions.dox b/external/eigen3/doc/TopicLinearAlgebraDecompositions.dox index 8649cc2..4914706 100644 --- a/external/eigen3/doc/TopicLinearAlgebraDecompositions.dox +++ b/external/eigen3/doc/TopicLinearAlgebraDecompositions.dox @@ -4,6 +4,7 @@ namespace Eigen { This page presents a catalogue of the dense matrix decompositions offered by Eigen. For an introduction on linear solvers and decompositions, check this \link TutorialLinearAlgebra page \endlink. +To get an overview of the true relative speed of the different decomposition, check this \link DenseDecompositionBenchmark benchmark \endlink. \section TopicLinAlgBigTable Catalogue of decompositions offered by Eigen @@ -116,7 +117,7 @@ For an introduction on linear solvers and decompositions, check this \link Tutor JacobiSVD (two-sided) - Slow (but fast for small matrices) - Excellent-Proven3 + Proven3 Yes Singular values/vectors, least squares Yes (and does least squares) @@ -132,7 +133,7 @@ For an introduction on linear solvers and decompositions, check this \link Tutor Yes Eigenvalues/vectors - - Good + Excellent Closed forms for 2x2 and 3x3 @@ -249,13 +250,14 @@ For an introduction on linear solvers and decompositions, check this \link Tutor
Implicit Multi Threading (MT)
Means the algorithm can take advantage of multicore processors via OpenMP. "Implicit" means the algortihm itself is not parallelized, but that it relies on parallelized matrix-matrix product rountines.
Explicit Multi Threading (MT)
-
Means the algorithm is explicitely parallelized to take advantage of multicore processors via OpenMP.
+
Means the algorithm is explicitly parallelized to take advantage of multicore processors via OpenMP.
Meta-unroller
Means the algorithm is automatically and explicitly unrolled for very small fixed size matrices.
+ */ } diff --git a/external/eigen3/doc/TopicMultithreading.dox b/external/eigen3/doc/TopicMultithreading.dox index 7db2b0e..47c9b26 100644 --- a/external/eigen3/doc/TopicMultithreading.dox +++ b/external/eigen3/doc/TopicMultithreading.dox @@ -8,13 +8,13 @@ Some Eigen's algorithms can exploit the multiple cores present in your hardware. * GCC: \c -fopenmp * ICC: \c -openmp * MSVC: check the respective option in the build properties. -You can control the number of thread that will be used using either the OpenMP API or Eiegn's API using the following priority: +You can control the number of thread that will be used using either the OpenMP API or Eigen's API using the following priority: \code OMP_NUM_THREADS=n ./my_program omp_set_num_threads(n); Eigen::setNbThreads(n); \endcode -Unless setNbThreads has been called, Eigen uses the number of threads specified by OpenMP. You can restore this bahavior by calling \code setNbThreads(0); \endcode +Unless setNbThreads has been called, Eigen uses the number of threads specified by OpenMP. You can restore this behavior by calling \code setNbThreads(0); \endcode You can query the number of threads that will be used with: \code n = Eigen::nbThreads( ); @@ -22,8 +22,12 @@ n = Eigen::nbThreads( ); You can disable Eigen's multi threading at compile time by defining the EIGEN_DONT_PARALLELIZE preprocessor token. Currently, the following algorithms can make use of multi-threading: - * general matrix - matrix products - * PartialPivLU + - general dense matrix - matrix products + - PartialPivLU + - row-major-sparse * dense vector/matrix products + - ConjugateGradient with \c Lower|Upper as the \c UpLo template parameter. + - BiCGSTAB with a row-major sparse matrix format. + - LeastSquaresConjugateGradient \section TopicMultiThreading_UsingEigenWithMT Using Eigen in a multi-threaded application @@ -39,6 +43,10 @@ int main(int argc, char** argv) } \endcode +\note With Eigen 3.3, and a fully C++11 compliant compiler (i.e., thread-safe static local variable initialization), then calling \c initParallel() is optional. + +\warning note that all functions generating random matrices are \b not re-entrant nor thread-safe. Those include DenseBase::Random(), and DenseBase::setRandom() despite a call to Eigen::initParallel(). This is because these functions are based on std::rand which is not re-entrant. For thread-safe random generator, we recommend the use of boost::random or c++11 random feature. + In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section. */ diff --git a/external/eigen3/doc/TutorialArrayClass.dox b/external/eigen3/doc/TutorialArrayClass.dox index 6432684..f6f3510 100644 --- a/external/eigen3/doc/TutorialArrayClass.dox +++ b/external/eigen3/doc/TutorialArrayClass.dox @@ -157,7 +157,7 @@ The following example shows how to use array operations on a Matrix object by em * to multiply them coefficient-wise and assigns the result to the matrix variable \c result (this is legal because Eigen allows assigning array expressions to matrix variables). -As a matter of fact, this usage case is so common that Eigen provides a \link MatrixBase::cwiseProduct() const +As a matter of fact, this usage case is so common that Eigen provides a \link MatrixBase::cwiseProduct const .cwiseProduct(.) \endlink method for matrices to compute the coefficient-wise product. This is also shown in the example program. diff --git a/external/eigen3/doc/TutorialGeometry.dox b/external/eigen3/doc/TutorialGeometry.dox index 372a275..2e1420f 100644 --- a/external/eigen3/doc/TutorialGeometry.dox +++ b/external/eigen3/doc/TutorialGeometry.dox @@ -126,11 +126,12 @@ Apply the transformation to a \b vector \code VectorNf vec1, vec2; vec2 = t.linear() * vec1;\endcode -Apply a \em general transformation \n to a \b normal \b vector -(explanations)\code +Apply a \em general transformation \n to a \b normal \b vector \n +\code VectorNf n1, n2; MatrixNf normalMatrix = t.linear().inverse().transpose(); n2 = (normalMatrix * n1).normalized();\endcode +(See subject 5.27 of this faq for the explanations) Apply a transformation with \em pure \em rotation \n to a \b normal \b vector (no scaling, no shear)\code @@ -231,8 +232,8 @@ On the other hand, since there exist 24 different conventions, they are pretty c to create a rotation matrix according to the 2-1-2 convention.\code Matrix3f m; m = AngleAxisf(angle1, Vector3f::UnitZ()) -* * AngleAxisf(angle2, Vector3f::UnitY()) -* * AngleAxisf(angle3, Vector3f::UnitZ()); + * AngleAxisf(angle2, Vector3f::UnitY()) + * AngleAxisf(angle3, Vector3f::UnitZ()); \endcode diff --git a/external/eigen3/doc/TutorialLinearAlgebra.dox b/external/eigen3/doc/TutorialLinearAlgebra.dox index b09f354..cb92cee 100644 --- a/external/eigen3/doc/TutorialLinearAlgebra.dox +++ b/external/eigen3/doc/TutorialLinearAlgebra.dox @@ -40,8 +40,9 @@ depending on your matrix and the trade-off you want to make: Decomposition Method - Requirements on the matrix - Speed + Requirements
on the matrix + Speed
(small-to-medium) + Speed
(large) Accuracy @@ -49,6 +50,7 @@ depending on your matrix and the trade-off you want to make: partialPivLu() Invertible ++ + ++ + @@ -56,6 +58,7 @@ depending on your matrix and the trade-off you want to make: fullPivLu() None - + - - +++ @@ -63,20 +66,23 @@ depending on your matrix and the trade-off you want to make: householderQr() None ++ + ++ + ColPivHouseholderQR colPivHouseholderQr() None - + ++ + - + +++ FullPivHouseholderQR fullPivHouseholderQr() None - + - - +++ @@ -84,21 +90,31 @@ depending on your matrix and the trade-off you want to make: llt() Positive definite +++ + +++ + LDLT ldlt() - Positive or negative semidefinite + Positive or negative
semidefinite +++ + + ++ + + JacobiSVD + jacobiSvd() + None + - - + - - - + +++ + All of these decompositions offer a solve() method that works as in the above example. For example, if your matrix is positive definite, the above table says that a very good -choice is then the LDLT decomposition. Here's an example, also demonstrating that using a general +choice is then the LLT or LDLT decomposition. Here's an example, also demonstrating that using a general matrix (not a vector) as right hand side is possible. @@ -167,8 +183,8 @@ Here is an example: \section TutorialLinAlgLeastsquares Least squares solving -The best way to do least squares solving is with a SVD decomposition. Eigen provides one as the JacobiSVD class, and its solve() -is doing least-squares solving. +The most accurate method to do least squares solving is with a SVD decomposition. Eigen provides one +as the JacobiSVD class, and its solve() is doing least-squares solving. Here is an example:
@@ -179,9 +195,10 @@ Here is an example:
-Another way, potentially faster but less reliable, is to use a LDLT decomposition -of the normal matrix. In any case, just read any reference text on least squares, and it will be very easy for you -to implement any linear least squares computation on top of Eigen. +Another methods, potentially faster but less reliable, are to use a Cholesky decomposition of the +normal matrix or a QR decomposition. Our page on \link LeastSquares least squares solving \endlink +has more details. + \section TutorialLinAlgSeparateComputation Separating the computation from the construction diff --git a/external/eigen3/doc/TutorialReductionsVisitorsBroadcasting.dox b/external/eigen3/doc/TutorialReductionsVisitorsBroadcasting.dox index 992cf6f..f5322b4 100644 --- a/external/eigen3/doc/TutorialReductionsVisitorsBroadcasting.dox +++ b/external/eigen3/doc/TutorialReductionsVisitorsBroadcasting.dox @@ -32,7 +32,7 @@ Eigen also provides the \link MatrixBase::norm() norm() \endlink method, which r These operations can also operate on matrices; in that case, a n-by-p matrix is seen as a vector of size (n*p), so for example the \link MatrixBase::norm() norm() \endlink method returns the "Frobenius" or "Hilbert-Schmidt" norm. We refrain from speaking of the \f$\ell^2\f$ norm of a matrix because that can mean different things. -If you want other \f$\ell^p\f$ norms, use the \link MatrixBase::lpNorm() lpNnorm

() \endlink method. The template parameter \a p can take the special value \a Infinity if you want the \f$\ell^\infty\f$ norm, which is the maximum of the absolute values of the coefficients. +If you want other coefficient-wise \f$\ell^p\f$ norms, use the \link MatrixBase::lpNorm lpNorm

() \endlink method. The template parameter \a p can take the special value \a Infinity if you want the \f$\ell^\infty\f$ norm, which is the maximum of the absolute values of the coefficients. The following example demonstrates these methods. @@ -45,6 +45,17 @@ The following example demonstrates these methods. \verbinclude Tutorial_ReductionsVisitorsBroadcasting_reductions_norm.out +\b Operator \b norm: The 1-norm and \f$\infty\f$-norm matrix operator norms can easily be computed as follows: + + + +
Example:Output:
+\include Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp + +\verbinclude Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.out +
+See below for more explanations on the syntax of these expressions. + \subsection TutorialReductionsVisitorsBroadcastingReductionsBool Boolean reductions The following reductions operate on boolean values: @@ -79,7 +90,7 @@ Array. The arguments passed to a visitor are pointers to the variables where the row and column position are to be stored. These variables should be of type -\link DenseBase::Index Index \endlink, as shown below: +\link Eigen::Index Index \endlink, as shown below: @@ -90,17 +101,16 @@ row and column position are to be stored. These variables should be of type \verbinclude Tutorial_ReductionsVisitorsBroadcasting_visitors.out
Example:Output:
-Note that both functions also return the value of the minimum or maximum coefficient if needed, -as if it was a typical reduction operation. +Both functions also return the value of the minimum or maximum coefficient. \section TutorialReductionsVisitorsBroadcastingPartialReductions Partial reductions Partial reductions are reductions that can operate column- or row-wise on a Matrix or Array, applying the reduction operation on each column or row and -returning a column or row-vector with the corresponding values. Partial reductions are applied +returning a column or row vector with the corresponding values. Partial reductions are applied with \link DenseBase::colwise() colwise() \endlink or \link DenseBase::rowwise() rowwise() \endlink. A simple example is obtaining the maximum of the elements -in each column in a given matrix, storing the result in a row-vector: +in each column in a given matrix, storing the result in a row vector: @@ -122,8 +132,7 @@ The same operation can be performed row-wise: \verbinclude Tutorial_ReductionsVisitorsBroadcasting_rowwise.out
Example:Output:
-Note that column-wise operations return a 'row-vector' while row-wise operations -return a 'column-vector' +Note that column-wise operations return a row vector, while row-wise operations return a column vector. \subsection TutorialReductionsVisitorsBroadcastingPartialReductionsCombined Combining partial reductions with other operations It is also possible to use the result of a partial reduction to do further processing. @@ -165,7 +174,7 @@ The concept behind broadcasting is similar to partial reductions, with the diffe constructs an expression where a vector (column or row) is interpreted as a matrix by replicating it in one direction. -A simple example is to add a certain column-vector to each column in a matrix. +A simple example is to add a certain column vector to each column in a matrix. This can be accomplished with: @@ -242,7 +251,7 @@ is a new matrix whose size is the same as matrix m: \f[ \f] - (m.colwise() - v).colwise().squaredNorm() is a partial reduction, computing the squared norm column-wise. The result of -this operation is a row-vector where each coefficient is the squared Euclidean distance between each column in m and v: \f[ +this operation is a row vector where each coefficient is the squared Euclidean distance between each column in m and v: \f[ \mbox{(m.colwise() - v).colwise().squaredNorm()} = \begin{bmatrix} 1 & 505 & 32 & 50 diff --git a/external/eigen3/doc/TutorialReshapeSlicing.dox b/external/eigen3/doc/TutorialReshapeSlicing.dox new file mode 100644 index 0000000..3730a5d --- /dev/null +++ b/external/eigen3/doc/TutorialReshapeSlicing.dox @@ -0,0 +1,65 @@ +namespace Eigen { + +/** \eigenManualPage TutorialReshapeSlicing Reshape and Slicing + +%Eigen does not expose convenient methods to take slices or to reshape a matrix yet. +Nonetheless, such features can easily be emulated using the Map class. + +\eigenAutoToc + +\section TutorialReshape Reshape + +A reshape operation consists in modifying the sizes of a matrix while keeping the same coefficients. +Instead of modifying the input matrix itself, which is not possible for compile-time sizes, the approach consist in creating a different \em view on the storage using class Map. +Here is a typical example creating a 1D linear view of a matrix: + +
+ + +
Example:Output:
+\include Tutorial_ReshapeMat2Vec.cpp + +\verbinclude Tutorial_ReshapeMat2Vec.out +
+ +Remark how the storage order of the input matrix modifies the order of the coefficients in the linear view. +Here is another example reshaping a 2x6 matrix to a 6x2 one: + + + +
Example:Output:
+\include Tutorial_ReshapeMat2Mat.cpp + +\verbinclude Tutorial_ReshapeMat2Mat.out +
+ + + +\section TutorialSlicing Slicing + +Slicing consists in taking a set of rows, columns, or elements, uniformly spaced within a matrix. +Again, the class Map allows to easily mimic this feature. + +For instance, one can skip every P elements in a vector: + + + +
Example:Output:
+\include Tutorial_SlicingVec.cpp + +\verbinclude Tutorial_SlicingVec.out +
+ +One can olso take one column over three using an adequate outer-stride or inner-stride depending on the actual storage order: + + + +
Example:Output:
+\include Tutorial_SlicingCol.cpp + +\verbinclude Tutorial_SlicingCol.out +
+ +*/ + +} diff --git a/external/eigen3/doc/TutorialSparse.dox b/external/eigen3/doc/TutorialSparse.dox index ee206cc..3529074 100644 --- a/external/eigen3/doc/TutorialSparse.dox +++ b/external/eigen3/doc/TutorialSparse.dox @@ -83,7 +83,7 @@ There is no notion of compressed/uncompressed mode for a SparseVector. \section TutorialSparseExample First example -Before describing each individual class, let's start with the following typical example: solving the Laplace equation \f$ \nabla u = 0 \f$ on a regular 2D grid using a finite difference scheme and Dirichlet boundary conditions. +Before describing each individual class, let's start with the following typical example: solving the Laplace equation \f$ \Delta u = 0 \f$ on a regular 2D grid using a finite difference scheme and Dirichlet boundary conditions. Such problem can be mathematically expressed as a linear problem of the form \f$ Ax=b \f$ where \f$ x \f$ is the vector of \c m unknowns (in our case, the values of the pixels), \f$ b \f$ is the right hand side vector resulting from the boundary conditions, and \f$ A \f$ is an \f$ m \times m \f$ matrix containing only a few non-zero elements resulting from the discretization of the Laplacian operator. @@ -253,15 +253,19 @@ SparseMatrix A, B; B = SparseMatrix(A.transpose()) + A; \endcode -Some binary coefficient-wise operators can also mix sparse and dense expressions: +Binary coefficient wise operators can also mix sparse and dense expressions: \code sm2 = sm1.cwiseProduct(dm1); -dm1 += sm1; +dm2 = sm1 + dm1; +dm2 = dm1 - sm1; \endcode +Performance-wise, the adding/subtracting sparse and dense matrices is better performed in two steps. For instance, instead of doing dm2 = sm1 + dm1, better write: +\code +dm2 = dm1; +dm2 += sm1; +\endcode +This version has the advantage to fully exploit the higher performance of dense storage (no indirection, SIMD, etc.), and to pay the cost of slow sparse evaluation on the few non-zeros of the sparse matrix only. -However, it is not yet possible to add a sparse and a dense matrix as in dm2 = sm1 + dm1. -Please write this as the equivalent dm2 = dm1; dm2 += sm1 (we plan to lift this restriction -in the next release of %Eigen). %Sparse expressions also support transposition: \code diff --git a/external/eigen3/doc/UnalignedArrayAssert.dox b/external/eigen3/doc/UnalignedArrayAssert.dox index b0d6e18..95d95a2 100644 --- a/external/eigen3/doc/UnalignedArrayAssert.dox +++ b/external/eigen3/doc/UnalignedArrayAssert.dox @@ -8,7 +8,7 @@ my_program: path/to/eigen/Eigen/src/Core/DenseStorage.h:44: Eigen::internal::matrix_array::internal::matrix_array() [with T = double, int Size = 2, int MatrixOptions = 2, bool Align = true]: Assertion `(reinterpret_cast(array) & (sizemask)) == 0 && "this assertion -is explained here: http://eigen.tuxfamily.org/dox/group__TopicUnalignedArrayAssert.html +is explained here: http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html **** READ THIS WEB PAGE !!! ****"' failed. @@ -92,27 +92,28 @@ Note that here, Eigen::Quaternionf is only used as an example, more generally th \section explanation General explanation of this assertion -\ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen objects" must absolutely be created at 16-byte-aligned locations, otherwise SIMD instructions adressing them will crash. +\ref TopicFixedSizeVectorizable "fixed-size vectorizable Eigen objects" must absolutely be created at 16-byte-aligned locations, otherwise SIMD instructions addressing them will crash. Eigen normally takes care of these alignment issues for you, by setting an alignment attribute on them and by overloading their "operator new". However there are a few corner cases where these alignment settings get overridden: they are the possible causes for this assertion. -\section getrid I don't care about vectorization, how do I get rid of that stuff? +\section getrid I don't care about optimal vectorization, how do I get rid of that stuff? -Two possibilities: +Three possibilities:
    -
  • Define EIGEN_DONT_ALIGN_STATICALLY. That disables all 128-bit static alignment code, while keeping 128-bit heap alignment. This has the effect of - disabling vectorization for fixed-size objects (like Matrix4d) while keeping vectorization of dynamic-size objects - (like MatrixXd). But do note that this breaks ABI compatibility with the default behavior of 128-bit static alignment.
  • -
  • Or define both EIGEN_DONT_VECTORIZE and EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT. This keeps the - 128-bit alignment code and thus preserves ABI compatibility, but completely disables vectorization.
  • +
  • Use the \c DontAlign option to Matrix, Array, Quaternion, etc. objects that gives you trouble. This way Eigen won't try to align them, and thus won"t assume any special alignment. On the down side, you will pay the cost of unaligned loads/stores for them, but on modern CPUs, the overhead is either null or marginal. See \link StructHavingEigenMembers_othersolutions here \endlink for an example.
  • +
  • Define \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_ALIGN_STATICALLY \endlink. That disables all 16-byte (and above) static alignment code, while keeping 16-byte (or above) heap alignment. This has the effect of + vectorizing fixed-size objects (like Matrix4d) through unaligned stores (as controlled by \link TopicPreprocessorDirectivesPerformance EIGEN_UNALIGNED_VECTORIZE \endlink), while keeping unchanged the vectorization of dynamic-size objects + (like MatrixXd). But do note that this breaks ABI compatibility with the default behavior of static alignment.
  • +
  • Or define both \link TopicPreprocessorDirectivesPerformance EIGEN_DONT_VECTORIZE \endlink and EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT. This keeps the + 16-byte alignment code and thus preserves ABI compatibility, but completely disables vectorization.
-If you want to know why defining EIGEN_DONT_VECTORIZE does not by itself disable 128-bit alignment and the assertion, here's the explanation: +If you want to know why defining EIGEN_DONT_VECTORIZE does not by itself disable 16-byte alignment and the assertion, here's the explanation: It doesn't disable the assertion, because otherwise code that runs fine without vectorization would suddenly crash when enabling vectorization. -It doesn't disable 128bit alignment, because that would mean that vectorized and non-vectorized code are not mutually ABI-compatible. This ABI compatibility is very important, even for people who develop only an in-house application, as for instance one may want to have in the same application a vectorized path and a non-vectorized path. +It doesn't disable 16-byte alignment, because that would mean that vectorized and non-vectorized code are not mutually ABI-compatible. This ABI compatibility is very important, even for people who develop only an in-house application, as for instance one may want to have in the same application a vectorized path and a non-vectorized path. */ diff --git a/external/eigen3/doc/UsingBlasLapackBackends.dox b/external/eigen3/doc/UsingBlasLapackBackends.dox new file mode 100644 index 0000000..caa5971 --- /dev/null +++ b/external/eigen3/doc/UsingBlasLapackBackends.dox @@ -0,0 +1,133 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + Copyright (C) 2011-2016 Gael Guennebaud + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Documentation on the use of BLAS/LAPACK libraries through Eigen + ******************************************************************************** +*/ + +namespace Eigen { + +/** \page TopicUsingBlasLapack Using BLAS/LAPACK from %Eigen + + +Since %Eigen version 3.3 and later, any F77 compatible BLAS or LAPACK libraries can be used as backends for dense matrix products and dense matrix decompositions. +For instance, one can use Intel® MKL, Apple's Accelerate framework on OSX, OpenBLAS, Netlib LAPACK, etc. + +Do not miss this \link TopicUsingIntelMKL page \endlink for further discussions on the specific use of Intel® MKL (also includes VML, PARDISO, etc.) + +In order to use an external BLAS and/or LAPACK library, you must link you own application to the respective libraries and their dependencies. +For LAPACK, you must also link to the standard Lapacke library, which is used as a convenient think layer between %Eigen's C++ code and LAPACK F77 interface. Then you must activate their usage by defining one or multiple of the following macros (\b before including any %Eigen's header): + +\note For Mac users, in order to use the lapack version shipped with the Accelerate framework, you also need the lapacke library. +Using MacPorts, this is as easy as: +\code +sudo port install lapack +\endcode +and then use the following link flags: \c -framework \c Accelerate \c /opt/local/lib/lapack/liblapacke.dylib + +
+ + + +
\c EIGEN_USE_BLAS Enables the use of external BLAS level 2 and 3 routines (compatible with any F77 BLAS interface)
\c EIGEN_USE_LAPACKE Enables the use of external Lapack routines via the Lapacke C interface to Lapack (compatible with any F77 LAPACK interface)
\c EIGEN_USE_LAPACKE_STRICT Same as \c EIGEN_USE_LAPACKE but algorithms of lower numerical robustness are disabled. \n This currently concerns only JacobiSVD which otherwise would be replaced by \c gesvd that is less robust than Jacobi rotations.
+ +When doing so, a number of %Eigen's algorithms are silently substituted with calls to BLAS or LAPACK routines. +These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex, and \c complex. +Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms. + +The breadth of %Eigen functionality that can be substituted is listed in the table below. + + + + + + + + + + +
Functional domainCode exampleBLAS/LAPACK routines
Matrix-matrix operations \n \c EIGEN_USE_BLAS \code +m1*m2.transpose(); +m1.selfadjointView()*m2; +m1*m2.triangularView(); +m1.selfadjointView().rankUpdate(m2,1.0); +\endcode\code +?gemm +?symm/?hemm +?trmm +dsyrk/ssyrk +\endcode
Matrix-vector operations \n \c EIGEN_USE_BLAS \code +m1.adjoint()*b; +m1.selfadjointView()*b; +m1.triangularView()*b; +\endcode\code +?gemv +?symv/?hemv +?trmv +\endcode
LU decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +v1 = m1.lu().solve(v2); +\endcode\code +?getrf +\endcode
Cholesky decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +v1 = m2.selfadjointView().llt().solve(v2); +\endcode\code +?potrf +\endcode
QR decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +m1.householderQr(); +m1.colPivHouseholderQr(); +\endcode\code +?geqrf +?geqp3 +\endcode
Singular value decomposition \n \c EIGEN_USE_LAPACKE \code +JacobiSVD svd; +svd.compute(m1, ComputeThinV); +\endcode\code +?gesvd +\endcode
Eigen-value decompositions \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +EigenSolver es(m1); +ComplexEigenSolver ces(m1); +SelfAdjointEigenSolver saes(m1+m1.transpose()); +GeneralizedSelfAdjointEigenSolver + gsaes(m1+m1.transpose(),m2+m2.transpose()); +\endcode\code +?gees +?gees +?syev/?heev +?syev/?heev, +?potrf +\endcode
Schur decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code +RealSchur schurR(m1); +ComplexSchur schurC(m1); +\endcode\code +?gees +\endcode
+In the examples, m1 and m2 are dense matrices and v1 and v2 are dense vectors. + +*/ + +} diff --git a/external/eigen3/doc/UsingIntelMKL.dox b/external/eigen3/doc/UsingIntelMKL.dox index 84db992..a1a3a18 100644 --- a/external/eigen3/doc/UsingIntelMKL.dox +++ b/external/eigen3/doc/UsingIntelMKL.dox @@ -32,107 +32,45 @@ namespace Eigen { -/** \page TopicUsingIntelMKL Using Intel® Math Kernel Library from Eigen +/** \page TopicUsingIntelMKL Using Intel® MKL from %Eigen -\section TopicUsingIntelMKL_Intro Eigen and Intel® Math Kernel Library (Intel® MKL) + + +Since %Eigen version 3.1 and later, users can benefit from built-in Intel® Math Kernel Library (MKL) optimizations with an installed copy of Intel MKL 10.3 (or later). -Since Eigen version 3.1 and later, users can benefit from built-in Intel MKL optimizations with an installed copy of Intel MKL 10.3 (or later). Intel MKL provides highly optimized multi-threaded mathematical routines for x86-compatible architectures. Intel MKL is available on Linux, Mac and Windows for both Intel64 and IA32 architectures. \note Intel® MKL is a proprietary software and it is the responsibility of users to buy or register for community (free) Intel MKL licenses for their products. Moreover, the license of the user product has to allow linking to proprietary software that excludes any unmodified versions of the GPL. -Using Intel MKL through Eigen is easy: --# define the \c EIGEN_USE_MKL_ALL macro before including any Eigen's header +Using Intel MKL through %Eigen is easy: +-# define the \c EIGEN_USE_MKL_ALL macro before including any %Eigen's header -# link your program to MKL libraries (see the MKL linking advisor) -# on a 64bits system, you must use the LP64 interface (not the ILP64 one) -When doing so, a number of Eigen's algorithms are silently substituted with calls to Intel MKL routines. +When doing so, a number of %Eigen's algorithms are silently substituted with calls to Intel MKL routines. These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex, and \c complex. Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms. -In addition you can coarsely select choose which parts will be substituted by defining one or multiple of the following macros: +In addition you can choose which parts will be substituted by defining one or multiple of the following macros: - - - + + +
\c EIGEN_USE_BLAS Enables the use of external BLAS level 2 and 3 routines (currently works with Intel MKL only)
\c EIGEN_USE_LAPACKE Enables the use of external Lapack routines via the Intel Lapacke C interface to Lapack (currently works with Intel MKL only)
\c EIGEN_USE_LAPACKE_STRICT Same as \c EIGEN_USE_LAPACKE but algorithm of lower robustness are disabled. This currently concerns only JacobiSVD which otherwise would be replaced by \c gesvd that is less robust than Jacobi rotations.
\c EIGEN_USE_BLAS Enables the use of external BLAS level 2 and 3 routines
\c EIGEN_USE_LAPACKE Enables the use of external Lapack routines via the Lapacke C interface to Lapack
\c EIGEN_USE_LAPACKE_STRICT Same as \c EIGEN_USE_LAPACKE but algorithm of lower robustness are disabled. \n This currently concerns only JacobiSVD which otherwise would be replaced by \c gesvd that is less robust than Jacobi rotations.
\c EIGEN_USE_MKL_VML Enables the use of Intel VML (vector operations)
\c EIGEN_USE_MKL_ALL Defines \c EIGEN_USE_BLAS, \c EIGEN_USE_LAPACKE, and \c EIGEN_USE_MKL_VML
-Finally, the PARDISO sparse solver shipped with Intel MKL can be used through the \ref PardisoLU, \ref PardisoLLT and \ref PardisoLDLT classes of the \ref PardisoSupport_Module. - +Note that the BLAS and LAPACKE backends can be enabled for any F77 compatible BLAS and LAPACK libraries. See this \link TopicUsingBlasLapack page \endlink for the details. -\section TopicUsingIntelMKL_SupportedFeatures List of supported features +Finally, the PARDISO sparse solver shipped with Intel MKL can be used through the \ref PardisoLU, \ref PardisoLLT and \ref PardisoLDLT classes of the \ref PardisoSupport_Module. -The breadth of Eigen functionality covered by Intel MKL is listed in the table below. +The following table summarizes the list of functions covered by \c EIGEN_USE_MKL_VML: - - - - - - - - - - +
Functional domainCode exampleMKL routines
Matrix-matrix operations \n \c EIGEN_USE_BLAS \code -m1*m2.transpose(); -m1.selfadjointView()*m2; -m1*m2.triangularView(); -m1.selfadjointView().rankUpdate(m2,1.0); -\endcode\code -?gemm -?symm/?hemm -?trmm -dsyrk/ssyrk -\endcode
Matrix-vector operations \n \c EIGEN_USE_BLAS \code -m1.adjoint()*b; -m1.selfadjointView()*b; -m1.triangularView()*b; -\endcode\code -?gemv -?symv/?hemv -?trmv -\endcode
LU decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -v1 = m1.lu().solve(v2); -\endcode\code -?getrf -\endcode
Cholesky decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -v1 = m2.selfadjointView().llt().solve(v2); -\endcode\code -?potrf -\endcode
QR decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -m1.householderQr(); -m1.colPivHouseholderQr(); -\endcode\code -?geqrf -?geqp3 -\endcode
Singular value decomposition \n \c EIGEN_USE_LAPACKE \code -JacobiSVD svd; -svd.compute(m1, ComputeThinV); -\endcode\code -?gesvd -\endcode
Eigen-value decompositions \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -EigenSolver es(m1); -ComplexEigenSolver ces(m1); -SelfAdjointEigenSolver saes(m1+m1.transpose()); -GeneralizedSelfAdjointEigenSolver - gsaes(m1+m1.transpose(),m2+m2.transpose()); -\endcode\code -?gees -?gees -?syev/?heev -?syev/?heev, -?potrf -\endcode
Schur decomposition \n \c EIGEN_USE_LAPACKE \n \c EIGEN_USE_LAPACKE_STRICT \code -RealSchur schurR(m1); -ComplexSchur schurC(m1); -\endcode\code -?gees -\endcode
Vector Math \n \c EIGEN_USE_MKL_VML \code +
Code exampleMKL routines
\code v2=v1.array().sin(); v2=v1.array().asin(); v2=v1.array().cos(); @@ -156,7 +94,7 @@ v?Sqr v?Powx \endcode
-In the examples, m1 and m2 are dense matrices and v1 and v2 are dense vectors. +In the examples, v1 and v2 are dense vectors. \section TopicUsingIntelMKL_Links Links diff --git a/external/eigen3/doc/UsingNVCC.dox b/external/eigen3/doc/UsingNVCC.dox new file mode 100644 index 0000000..f8e755b --- /dev/null +++ b/external/eigen3/doc/UsingNVCC.dox @@ -0,0 +1,32 @@ + +namespace Eigen { + +/** \page TopicCUDA Using Eigen in CUDA kernels + +\b Disclaimer: this page is about an \b experimental feature in %Eigen. + +Staring from CUDA 5.0, the CUDA compiler, \c nvcc, is able to properly parse %Eigen's code (almost). +A few adaptations of the %Eigen's code already allows to use some parts of %Eigen in your own CUDA kernels. +To this end you need the devel branch of %Eigen, CUDA 5.0 or greater with GCC. + +Known issues: + + - \c nvcc with MS Visual Studio does not work (patch welcome) + + - \c nvcc with \c clang does not work (patch welcome) + + - \c nvcc 5.5 with gcc-4.7 (or greater) has issues with the standard \c \ header file. To workaround this, you can add the following before including any other files: + \code + // workaround issue between gcc >= 4.7 and cuda 5.5 + #if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7) + #undef _GLIBCXX_ATOMIC_BUILTINS + #undef _GLIBCXX_USE_INT128 + #endif + \endcode + + - On 64bits system Eigen uses \c long \c int as the default type for indexes and sizes. On CUDA device, it would make sense to default to 32 bits \c int. + However, to keep host and CUDA code compatible, this cannot be done automatically by %Eigen, and the user is thus required to define \c EIGEN_DEFAULT_DENSE_INDEX_TYPE to \c int throughout his code (or only for CUDA code if there is no interaction between host and CUDA code through %Eigen's object). + +*/ + +} diff --git a/external/eigen3/doc/eigendoxy.css b/external/eigen3/doc/eigendoxy.css index efaeb46..6274e6c 100644 --- a/external/eigen3/doc/eigendoxy.css +++ b/external/eigen3/doc/eigendoxy.css @@ -45,7 +45,7 @@ pre.fragment { /* Common style for all Eigen's tables */ -table.example, table.manual, table.manual-vl { +table.example, table.manual, table.manual-vl, table.manual-hl { max-width:100%; border-collapse: collapse; border-style: solid; @@ -58,7 +58,7 @@ table.example, table.manual, table.manual-vl { -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); } -table.example th, table.manual th, table.manual-vl th { +table.example th, table.manual th, table.manual-vl th, table.manual-hl th { padding: 0.5em 0.5em 0.5em 0.5em; text-align: left; padding-right: 1em; @@ -70,7 +70,7 @@ table.example th, table.manual th, table.manual-vl th { filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#FFFFFF', endColorstr='#F4F4E5'); } -table.example td, table.manual td, table.manual-vl td { +table.example td, table.manual td, table.manual-vl td, table.manual-hl td { vertical-align:top; border-width: 1px; border-color: #cccccc; @@ -108,15 +108,15 @@ table.example td { /* standard class for the manual */ -table.manual, table.manual-vl { +table.manual, table.manual-vl, table.manual-hl { padding: 0.2em 0em 0.5em 0em; } -table.manual th, table.manual-vl th { +table.manual th, table.manual-vl th, table.manual-hl th { margin: 0em 0em 0.3em 0em; } -table.manual td, table.manual-vl td { +table.manual td, table.manual-vl td, table.manual-hl td { padding: 0.3em 0.5em 0.3em 0.5em; vertical-align:top; border-width: 1px; @@ -136,6 +136,16 @@ table.manual-vl th.inter { border-style: solid solid solid solid; } +table.manual-hl td { + border-color: #cccccc; + border-width: 1px; + border-style: solid none solid none; +} + +table td.code { + font-family: monospace; +} + h2 { margin-top:2em; border-style: none none solid none; @@ -166,6 +176,11 @@ div.toc ul { margin: 0.2em 0 0.4em 0.5em; } +span.cpp11,span.cpp14,span.cpp17 { + color: #119911; + font-weight: bold; +} + /**** old Eigen's styles ****/ @@ -177,8 +192,8 @@ table.tutorial_code td { /* Whenever doxygen meets a '\n' or a '
', it will put - * the text containing the characted into a

. - * This little hack togehter with table.tutorial_code td.note + * the text containing the character into a

. + * This little hack together with table.tutorial_code td.note * aims at fixing this issue. */ table.tutorial_code td.note p.starttd { margin: 0px; @@ -199,13 +214,3 @@ h3.version { td.width20em p.endtd { width: 20em; } - -.bigwarning { - font-size:2em; - font-weight:bold; - margin:1em; - padding:1em; - color:red; - border:solid; -} - diff --git a/external/eigen3/doc/examples/CMakeLists.txt b/external/eigen3/doc/examples/CMakeLists.txt index 08cf8ef..f7a1905 100644 --- a/external/eigen3/doc/examples/CMakeLists.txt +++ b/external/eigen3/doc/examples/CMakeLists.txt @@ -14,3 +14,8 @@ foreach(example_src ${examples_SRCS}) ) add_dependencies(all_examples ${example}) endforeach(example_src) + +check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11) +if(EIGEN_COMPILER_SUPPORT_CPP11) +ei_add_target_property(nullary_indexing COMPILE_FLAGS "-std=c++11") +endif() \ No newline at end of file diff --git a/external/eigen3/doc/examples/CustomizingEigen_Inheritance.cpp b/external/eigen3/doc/examples/CustomizingEigen_Inheritance.cpp new file mode 100644 index 0000000..48df64e --- /dev/null +++ b/external/eigen3/doc/examples/CustomizingEigen_Inheritance.cpp @@ -0,0 +1,30 @@ +#include +#include + +class MyVectorType : public Eigen::VectorXd +{ +public: + MyVectorType(void):Eigen::VectorXd() {} + + // This constructor allows you to construct MyVectorType from Eigen expressions + template + MyVectorType(const Eigen::MatrixBase& other) + : Eigen::VectorXd(other) + { } + + // This method allows you to assign Eigen expressions to MyVectorType + template + MyVectorType& operator=(const Eigen::MatrixBase & other) + { + this->Eigen::VectorXd::operator=(other); + return *this; + } +}; + +int main() +{ + MyVectorType v = MyVectorType::Ones(4); + v(2) += 10; + v = 2 * v; + std::cout << v.transpose() << std::endl; +} diff --git a/external/eigen3/doc/examples/Cwise_erf.cpp b/external/eigen3/doc/examples/Cwise_erf.cpp new file mode 100644 index 0000000..e7cd2c1 --- /dev/null +++ b/external/eigen3/doc/examples/Cwise_erf.cpp @@ -0,0 +1,9 @@ +#include +#include +#include +using namespace Eigen; +int main() +{ + Array4d v(-0.5,2,0,-7); + std::cout << v.erf() << std::endl; +} diff --git a/external/eigen3/doc/examples/Cwise_erfc.cpp b/external/eigen3/doc/examples/Cwise_erfc.cpp new file mode 100644 index 0000000..d8bb04c --- /dev/null +++ b/external/eigen3/doc/examples/Cwise_erfc.cpp @@ -0,0 +1,9 @@ +#include +#include +#include +using namespace Eigen; +int main() +{ + Array4d v(-0.5,2,0,-7); + std::cout << v.erfc() << std::endl; +} diff --git a/external/eigen3/doc/examples/Cwise_lgamma.cpp b/external/eigen3/doc/examples/Cwise_lgamma.cpp new file mode 100644 index 0000000..f1c4f50 --- /dev/null +++ b/external/eigen3/doc/examples/Cwise_lgamma.cpp @@ -0,0 +1,9 @@ +#include +#include +#include +using namespace Eigen; +int main() +{ + Array4d v(0.5,10,0,-1); + std::cout << v.lgamma() << std::endl; +} \ No newline at end of file diff --git a/external/eigen3/doc/examples/MatrixBase_cwise_const.cpp b/external/eigen3/doc/examples/MatrixBase_cwise_const.cpp deleted file mode 100644 index 23700e0..0000000 --- a/external/eigen3/doc/examples/MatrixBase_cwise_const.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#define EIGEN2_SUPPORT -#include -#include - -using namespace Eigen; -using namespace std; - -int main() -{ - Matrix3i m = Matrix3i::Random(); - cout << "Here is the matrix m:" << endl << m << endl; - Matrix3i n = Matrix3i::Random(); - cout << "And here is the matrix n:" << endl << n << endl; - cout << "The coefficient-wise product of m and n is:" << endl; - cout << m.cwise() * n << endl; - cout << "Taking the cube of the coefficients of m yields:" << endl; - cout << m.cwise().pow(3) << endl; -} diff --git a/external/eigen3/doc/examples/TutorialInplaceLU.cpp b/external/eigen3/doc/examples/TutorialInplaceLU.cpp new file mode 100644 index 0000000..cb9c59b --- /dev/null +++ b/external/eigen3/doc/examples/TutorialInplaceLU.cpp @@ -0,0 +1,61 @@ +#include +struct init { + init() { std::cout << "[" << "init" << "]" << std::endl; } +}; +init init_obj; +// [init] +#include +#include + +using namespace std; +using namespace Eigen; + +int main() +{ + MatrixXd A(2,2); + A << 2, -1, 1, 3; + cout << "Here is the input matrix A before decomposition:\n" << A << endl; +cout << "[init]" << endl; + +cout << "[declaration]" << endl; + PartialPivLU > lu(A); + cout << "Here is the input matrix A after decomposition:\n" << A << endl; +cout << "[declaration]" << endl; + +cout << "[matrixLU]" << endl; + cout << "Here is the matrix storing the L and U factors:\n" << lu.matrixLU() << endl; +cout << "[matrixLU]" << endl; + +cout << "[solve]" << endl; + MatrixXd A0(2,2); A0 << 2, -1, 1, 3; + VectorXd b(2); b << 1, 2; + VectorXd x = lu.solve(b); + cout << "Residual: " << (A0 * x - b).norm() << endl; +cout << "[solve]" << endl; + +cout << "[modifyA]" << endl; + A << 3, 4, -2, 1; + x = lu.solve(b); + cout << "Residual: " << (A0 * x - b).norm() << endl; +cout << "[modifyA]" << endl; + +cout << "[recompute]" << endl; + A0 = A; // save A + lu.compute(A); + x = lu.solve(b); + cout << "Residual: " << (A0 * x - b).norm() << endl; +cout << "[recompute]" << endl; + +cout << "[recompute_bis0]" << endl; + MatrixXd A1(2,2); + A1 << 5,-2,3,4; + lu.compute(A1); + cout << "Here is the input matrix A1 after decomposition:\n" << A1 << endl; +cout << "[recompute_bis0]" << endl; + +cout << "[recompute_bis1]" << endl; + x = lu.solve(b); + cout << "Residual: " << (A1 * x - b).norm() << endl; +cout << "[recompute_bis1]" << endl; + +} diff --git a/external/eigen3/doc/examples/TutorialLinAlgInverseDeterminant.cpp b/external/eigen3/doc/examples/TutorialLinAlgInverseDeterminant.cpp index 43970ff..14dde5b 100644 --- a/external/eigen3/doc/examples/TutorialLinAlgInverseDeterminant.cpp +++ b/external/eigen3/doc/examples/TutorialLinAlgInverseDeterminant.cpp @@ -13,4 +13,4 @@ int main() cout << "Here is the matrix A:\n" << A << endl; cout << "The determinant of A is " << A.determinant() << endl; cout << "The inverse of A is:\n" << A.inverse() << endl; -} \ No newline at end of file +} diff --git a/external/eigen3/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp b/external/eigen3/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp new file mode 100644 index 0000000..62e28fc --- /dev/null +++ b/external/eigen3/doc/examples/Tutorial_ReductionsVisitorsBroadcasting_reductions_operatornorm.cpp @@ -0,0 +1,18 @@ +#include +#include + +using namespace Eigen; +using namespace std; + +int main() +{ + MatrixXf m(2,2); + m << 1,-2, + -3,4; + + cout << "1-norm(m) = " << m.cwiseAbs().colwise().sum().maxCoeff() + << " == " << m.colwise().lpNorm<1>().maxCoeff() << endl; + + cout << "infty-norm(m) = " << m.cwiseAbs().rowwise().sum().maxCoeff() + << " == " << m.rowwise().lpNorm<1>().maxCoeff() << endl; +} diff --git a/external/eigen3/doc/examples/make_circulant.cpp b/external/eigen3/doc/examples/make_circulant.cpp new file mode 100644 index 0000000..92e6aaa --- /dev/null +++ b/external/eigen3/doc/examples/make_circulant.cpp @@ -0,0 +1,11 @@ +/* +This program is presented in several fragments in the doc page. +Every fragment is in its own file; this file simply combines them. +*/ + +#include "make_circulant.cpp.preamble" +#include "make_circulant.cpp.traits" +#include "make_circulant.cpp.expression" +#include "make_circulant.cpp.evaluator" +#include "make_circulant.cpp.entry" +#include "make_circulant.cpp.main" diff --git a/external/eigen3/doc/examples/make_circulant.cpp.entry b/external/eigen3/doc/examples/make_circulant.cpp.entry new file mode 100644 index 0000000..f9d2eb8 --- /dev/null +++ b/external/eigen3/doc/examples/make_circulant.cpp.entry @@ -0,0 +1,5 @@ +template +Circulant makeCirculant(const Eigen::MatrixBase& arg) +{ + return Circulant(arg.derived()); +} diff --git a/external/eigen3/doc/examples/make_circulant.cpp.evaluator b/external/eigen3/doc/examples/make_circulant.cpp.evaluator new file mode 100644 index 0000000..2ba79e7 --- /dev/null +++ b/external/eigen3/doc/examples/make_circulant.cpp.evaluator @@ -0,0 +1,32 @@ +namespace Eigen { + namespace internal { + template + struct evaluator > + : evaluator_base > + { + typedef Circulant XprType; + typedef typename nested_eval::type ArgTypeNested; + typedef typename remove_all::type ArgTypeNestedCleaned; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = Eigen::ColMajor + }; + + evaluator(const XprType& xpr) + : m_argImpl(xpr.m_arg), m_rows(xpr.rows()) + { } + + CoeffReturnType coeff(Index row, Index col) const + { + Index index = row - col; + if (index < 0) index += m_rows; + return m_argImpl.coeff(index); + } + + evaluator m_argImpl; + const Index m_rows; + }; + } +} diff --git a/external/eigen3/doc/examples/make_circulant.cpp.expression b/external/eigen3/doc/examples/make_circulant.cpp.expression new file mode 100644 index 0000000..380cd44 --- /dev/null +++ b/external/eigen3/doc/examples/make_circulant.cpp.expression @@ -0,0 +1,20 @@ +template +class Circulant : public Eigen::MatrixBase > +{ +public: + Circulant(const ArgType& arg) + : m_arg(arg) + { + EIGEN_STATIC_ASSERT(ArgType::ColsAtCompileTime == 1, + YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX); + } + + typedef typename Eigen::internal::ref_selector::type Nested; + + typedef Eigen::Index Index; + Index rows() const { return m_arg.rows(); } + Index cols() const { return m_arg.rows(); } + + typedef typename Eigen::internal::ref_selector::type ArgTypeNested; + ArgTypeNested m_arg; +}; diff --git a/external/eigen3/doc/examples/make_circulant.cpp.main b/external/eigen3/doc/examples/make_circulant.cpp.main new file mode 100644 index 0000000..877f97f --- /dev/null +++ b/external/eigen3/doc/examples/make_circulant.cpp.main @@ -0,0 +1,8 @@ +int main() +{ + Eigen::VectorXd vec(4); + vec << 1, 2, 4, 8; + Eigen::MatrixXd mat; + mat = makeCirculant(vec); + std::cout << mat << std::endl; +} diff --git a/external/eigen3/doc/examples/make_circulant.cpp.preamble b/external/eigen3/doc/examples/make_circulant.cpp.preamble new file mode 100644 index 0000000..e575cce --- /dev/null +++ b/external/eigen3/doc/examples/make_circulant.cpp.preamble @@ -0,0 +1,4 @@ +#include +#include + +template class Circulant; diff --git a/external/eigen3/doc/examples/make_circulant.cpp.traits b/external/eigen3/doc/examples/make_circulant.cpp.traits new file mode 100644 index 0000000..4e04535 --- /dev/null +++ b/external/eigen3/doc/examples/make_circulant.cpp.traits @@ -0,0 +1,19 @@ +namespace Eigen { + namespace internal { + template + struct traits > + { + typedef Eigen::Dense StorageKind; + typedef Eigen::MatrixXpr XprKind; + typedef typename ArgType::StorageIndex StorageIndex; + typedef typename ArgType::Scalar Scalar; + enum { + Flags = Eigen::ColMajor, + RowsAtCompileTime = ArgType::RowsAtCompileTime, + ColsAtCompileTime = ArgType::RowsAtCompileTime, + MaxRowsAtCompileTime = ArgType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = ArgType::MaxRowsAtCompileTime + }; + }; + } +} diff --git a/external/eigen3/doc/examples/make_circulant2.cpp b/external/eigen3/doc/examples/make_circulant2.cpp new file mode 100644 index 0000000..95d3dd3 --- /dev/null +++ b/external/eigen3/doc/examples/make_circulant2.cpp @@ -0,0 +1,52 @@ +#include +#include + +using namespace Eigen; + +// [circulant_func] +template +class circulant_functor { + const ArgType &m_vec; +public: + circulant_functor(const ArgType& arg) : m_vec(arg) {} + + const typename ArgType::Scalar& operator() (Index row, Index col) const { + Index index = row - col; + if (index < 0) index += m_vec.size(); + return m_vec(index); + } +}; +// [circulant_func] + +// [square] +template +struct circulant_helper { + typedef Matrix MatrixType; +}; +// [square] + +// [makeCirculant] +template +CwiseNullaryOp, typename circulant_helper::MatrixType> +makeCirculant(const Eigen::MatrixBase& arg) +{ + typedef typename circulant_helper::MatrixType MatrixType; + return MatrixType::NullaryExpr(arg.size(), arg.size(), circulant_functor(arg.derived())); +} +// [makeCirculant] + +// [main] +int main() +{ + Eigen::VectorXd vec(4); + vec << 1, 2, 4, 8; + Eigen::MatrixXd mat; + mat = makeCirculant(vec); + std::cout << mat << std::endl; +} +// [main] diff --git a/external/eigen3/doc/examples/matrixfree_cg.cpp b/external/eigen3/doc/examples/matrixfree_cg.cpp index f0631c3..6a205ae 100644 --- a/external/eigen3/doc/examples/matrixfree_cg.cpp +++ b/external/eigen3/doc/examples/matrixfree_cg.cpp @@ -2,179 +2,127 @@ #include #include #include +#include class MatrixReplacement; -template class MatrixReplacement_ProductReturnType; +using Eigen::SparseMatrix; namespace Eigen { namespace internal { + // MatrixReplacement looks-like a SparseMatrix, so let's inherits its traits: template<> - struct traits : Eigen::internal::traits > + struct traits : public Eigen::internal::traits > {}; - - template - struct traits > { - // The equivalent plain objet type of the product. This type is used if the product needs to be evaluated into a temporary. - typedef Eigen::Matrix ReturnType; - }; } } -// Inheriting EigenBase should not be needed in the future. +// Example of a matrix-free wrapper from a user type to Eigen's compatible type +// For the sake of simplicity, this example simply wrap a Eigen::SparseMatrix. class MatrixReplacement : public Eigen::EigenBase { public: - // Expose some compile-time information to Eigen: + // Required typedefs, constants, and method: typedef double Scalar; typedef double RealScalar; + typedef int StorageIndex; enum { ColsAtCompileTime = Eigen::Dynamic, - RowsAtCompileTime = Eigen::Dynamic, MaxColsAtCompileTime = Eigen::Dynamic, - MaxRowsAtCompileTime = Eigen::Dynamic + IsRowMajor = false }; - Index rows() const { return 4; } - Index cols() const { return 4; } + Index rows() const { return mp_mat->rows(); } + Index cols() const { return mp_mat->cols(); } - void resize(Index a_rows, Index a_cols) - { - // This method should not be needed in the future. - assert(a_rows==0 && a_cols==0 || a_rows==rows() && a_cols==cols()); - } - - // In the future, the return type should be Eigen::Product template - MatrixReplacement_ProductReturnType operator*(const Eigen::MatrixBase& x) const { - return MatrixReplacement_ProductReturnType(*this, x.derived()); + Eigen::Product operator*(const Eigen::MatrixBase& x) const { + return Eigen::Product(*this, x.derived()); } -}; + // Custom API: + MatrixReplacement() : mp_mat(0) {} -// The proxy class representing the product of a MatrixReplacement with a MatrixBase<> -template -class MatrixReplacement_ProductReturnType : public Eigen::ReturnByValue > { -public: - typedef MatrixReplacement::Index Index; - - // The ctor store references to the matrix and right-hand-side object (usually a vector). - MatrixReplacement_ProductReturnType(const MatrixReplacement& matrix, const Rhs& rhs) - : m_matrix(matrix), m_rhs(rhs) - {} - - Index rows() const { return m_matrix.rows(); } - Index cols() const { return m_rhs.cols(); } - - // This function is automatically called by Eigen. It must evaluate the product of matrix * rhs into y. - template - void evalTo(Dest& y) const - { - y.setZero(4); - - y(0) += 2 * m_rhs(0); y(1) += 1 * m_rhs(0); - y(0) += 1 * m_rhs(1); y(1) += 2 * m_rhs(1); y(2) += 1 * m_rhs(1); - y(1) += 1 * m_rhs(2); y(2) += 2 * m_rhs(2); y(3) += 1 * m_rhs(2); - y(2) += 1 * m_rhs(3); y(3) += 2 * m_rhs(3); + void attachMyMatrix(const SparseMatrix &mat) { + mp_mat = &mat; } + const SparseMatrix my_matrix() const { return *mp_mat; } -protected: - const MatrixReplacement& m_matrix; - typename Rhs::Nested m_rhs; +private: + const SparseMatrix *mp_mat; }; -/*****/ - -// This class simply warp a diagonal matrix as a Jacobi preconditioner. -// In the future such simple and generic wrapper should be shipped within Eigen itsel. -template -class MyJacobiPreconditioner -{ - typedef _Scalar Scalar; - typedef Eigen::Matrix Vector; - typedef typename Vector::Index Index; - - public: - // this typedef is only to export the scalar type and compile-time dimensions to solve_retval - typedef Eigen::Matrix MatrixType; - - MyJacobiPreconditioner() : m_isInitialized(false) {} - - void setInvDiag(const Eigen::VectorXd &invdiag) { - m_invdiag=invdiag; - m_isInitialized=true; - } - - Index rows() const { return m_invdiag.size(); } - Index cols() const { return m_invdiag.size(); } - - template - MyJacobiPreconditioner& analyzePattern(const MatType& ) { return *this; } - - template - MyJacobiPreconditioner& factorize(const MatType& mat) { return *this; } - - template - MyJacobiPreconditioner& compute(const MatType& mat) { return *this; } - - template - void _solve(const Rhs& b, Dest& x) const - { - x = m_invdiag.array() * b.array() ; - } - - template inline const Eigen::internal::solve_retval - solve(const Eigen::MatrixBase& b) const - { - eigen_assert(m_isInitialized && "MyJacobiPreconditioner is not initialized."); - eigen_assert(m_invdiag.size()==b.rows() - && "MyJacobiPreconditioner::solve(): invalid number of rows of the right hand side matrix b"); - return Eigen::internal::solve_retval(*this, b.derived()); - } - - protected: - Vector m_invdiag; - bool m_isInitialized; -}; - +// Implementation of MatrixReplacement * Eigen::DenseVector though a specialization of internal::generic_product_impl: namespace Eigen { namespace internal { -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> -{ - typedef MyJacobiPreconditioner<_MatrixType> Dec; - EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) - - template void evalTo(Dest& dst) const + template + struct generic_product_impl // GEMV stands for matrix-vector + : generic_product_impl_base > { - dec()._solve(rhs(),dst); - } -}; + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const MatrixReplacement& lhs, const Rhs& rhs, const Scalar& alpha) + { + // This method should implement "dst += alpha * lhs * rhs" inplace, + // however, for iterative solvers, alpha is always equal to 1, so let's not bother about it. + assert(alpha==Scalar(1) && "scaling is not implemented"); + + // Here we could simply call dst.noalias() += lhs.my_matrix() * rhs, + // but let's do something fancier (and less efficient): + for(Index i=0; i S = Eigen::MatrixXd::Random(n,n).sparseView(0.5,1); + S = S.transpose()*S; + MatrixReplacement A; - Eigen::VectorXd b(4), x; - b << 1, 1, 1, 1; + A.attachMyMatrix(S); - // solve Ax = b using CG with matrix-free version: - Eigen::ConjugateGradient < MatrixReplacement, Eigen::Lower|Eigen::Upper, MyJacobiPreconditioner > cg; + Eigen::VectorXd b(n), x; + b.setRandom(); - Eigen::VectorXd invdiag(4); - invdiag << 1./3., 1./4., 1./4., 1./3.; + // Solve Ax = b using various iterative solver with matrix-free version: + { + Eigen::ConjugateGradient cg; + cg.compute(A); + x = cg.solve(b); + std::cout << "CG: #iterations: " << cg.iterations() << ", estimated error: " << cg.error() << std::endl; + } + + { + Eigen::BiCGSTAB bicg; + bicg.compute(A); + x = bicg.solve(b); + std::cout << "BiCGSTAB: #iterations: " << bicg.iterations() << ", estimated error: " << bicg.error() << std::endl; + } + + { + Eigen::GMRES gmres; + gmres.compute(A); + x = gmres.solve(b); + std::cout << "GMRES: #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl; + } - cg.preconditioner().setInvDiag(invdiag); - cg.compute(A); - x = cg.solve(b); + { + Eigen::DGMRES gmres; + gmres.compute(A); + x = gmres.solve(b); + std::cout << "DGMRES: #iterations: " << gmres.iterations() << ", estimated error: " << gmres.error() << std::endl; + } - std::cout << "#iterations: " << cg.iterations() << std::endl; - std::cout << "estimated error: " << cg.error() << std::endl; + { + Eigen::MINRES minres; + minres.compute(A); + x = minres.solve(b); + std::cout << "MINRES: #iterations: " << minres.iterations() << ", estimated error: " << minres.error() << std::endl; + } } diff --git a/external/eigen3/doc/examples/nullary_indexing.cpp b/external/eigen3/doc/examples/nullary_indexing.cpp new file mode 100644 index 0000000..e27c358 --- /dev/null +++ b/external/eigen3/doc/examples/nullary_indexing.cpp @@ -0,0 +1,66 @@ +#include +#include + +using namespace Eigen; + +// [functor] +template +class indexing_functor { + const ArgType &m_arg; + const RowIndexType &m_rowIndices; + const ColIndexType &m_colIndices; +public: + typedef Matrix MatrixType; + + indexing_functor(const ArgType& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) + : m_arg(arg), m_rowIndices(row_indices), m_colIndices(col_indices) + {} + + const typename ArgType::Scalar& operator() (Index row, Index col) const { + return m_arg(m_rowIndices[row], m_colIndices[col]); + } +}; +// [functor] + +// [function] +template +CwiseNullaryOp, typename indexing_functor::MatrixType> +indexing(const Eigen::MatrixBase& arg, const RowIndexType& row_indices, const ColIndexType& col_indices) +{ + typedef indexing_functor Func; + typedef typename Func::MatrixType MatrixType; + return MatrixType::NullaryExpr(row_indices.size(), col_indices.size(), Func(arg.derived(), row_indices, col_indices)); +} +// [function] + + +int main() +{ + std::cout << "[main1]\n"; + Eigen::MatrixXi A = Eigen::MatrixXi::Random(4,4); + Array3i ri(1,2,1); + ArrayXi ci(6); ci << 3,2,1,0,0,2; + Eigen::MatrixXi B = indexing(A, ri, ci); + std::cout << "A =" << std::endl; + std::cout << A << std::endl << std::endl; + std::cout << "A([" << ri.transpose() << "], [" << ci.transpose() << "]) =" << std::endl; + std::cout << B << std::endl; + std::cout << "[main1]\n"; + + std::cout << "[main2]\n"; + B = indexing(A, ri+1, ci); + std::cout << "A(ri+1,ci) =" << std::endl; + std::cout << B << std::endl << std::endl; +#if __cplusplus >= 201103L + B = indexing(A, ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)); + std::cout << "A(ArrayXi::LinSpaced(13,0,12).unaryExpr([](int x){return x%4;}), ArrayXi::LinSpaced(4,0,3)) =" << std::endl; + std::cout << B << std::endl << std::endl; +#endif + std::cout << "[main2]\n"; +} + diff --git a/external/eigen3/doc/ftv2node.png b/external/eigen3/doc/ftv2node.png new file mode 100644 index 0000000000000000000000000000000000000000..63c605bb4c3d941c921a4b6cfa74951e946bcb48 GIT binary patch literal 86 zcmeAS@N?(olHy`uVBq!ia0vp^0zfRr!3HExu9B$%QnH>djv*C{Z|`mdau^P8_z}#X h?B8GEpdi4(BFDx$je&7RrDQEg&ePS;Wt~$(69Dh@6T1Ka literal 0 HcmV?d00001 diff --git a/external/eigen3/doc/ftv2pnode.png b/external/eigen3/doc/ftv2pnode.png new file mode 100644 index 0000000000000000000000000000000000000000..c6ee22f937a07d1dbfc27c669d11f8ed13e2f152 GIT binary patch literal 229 zcmV^P)R?RzRoKvklcaQ%HF6%rK2&ZgO(-ihJ_C zzrKgp4jgO( fd_(yg|3PpEQb#9`a?Pz_00000NkvXXu0mjftR`5K literal 0 HcmV?d00001 diff --git a/external/eigen3/doc/snippets/BiCGSTAB_simple.cpp b/external/eigen3/doc/snippets/BiCGSTAB_simple.cpp new file mode 100644 index 0000000..5520f4f --- /dev/null +++ b/external/eigen3/doc/snippets/BiCGSTAB_simple.cpp @@ -0,0 +1,11 @@ + int n = 10000; + VectorXd x(n), b(n); + SparseMatrix A(n,n); + /* ... fill A and b ... */ + BiCGSTAB > solver; + solver.compute(A); + x = solver.solve(b); + std::cout << "#iterations: " << solver.iterations() << std::endl; + std::cout << "estimated error: " << solver.error() << std::endl; + /* ... update b ... */ + x = solver.solve(b); // solve again \ No newline at end of file diff --git a/external/eigen3/doc/snippets/BiCGSTAB_step_by_step.cpp b/external/eigen3/doc/snippets/BiCGSTAB_step_by_step.cpp new file mode 100644 index 0000000..06147bb --- /dev/null +++ b/external/eigen3/doc/snippets/BiCGSTAB_step_by_step.cpp @@ -0,0 +1,14 @@ + int n = 10000; + VectorXd x(n), b(n); + SparseMatrix A(n,n); + /* ... fill A and b ... */ + BiCGSTAB > solver(A); + // start from a random solution + x = VectorXd::Random(n); + solver.setMaxIterations(1); + int i = 0; + do { + x = solver.solveWithGuess(b,x); + std::cout << i << " : " << solver.error() << std::endl; + ++i; + } while (solver.info()!=Success && i<100); \ No newline at end of file diff --git a/external/eigen3/doc/snippets/CMakeLists.txt b/external/eigen3/doc/snippets/CMakeLists.txt index 1135900..1baf32f 100644 --- a/external/eigen3/doc/snippets/CMakeLists.txt +++ b/external/eigen3/doc/snippets/CMakeLists.txt @@ -24,5 +24,3 @@ foreach(snippet_src ${snippets_SRCS}) set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${compile_snippet_src} PROPERTIES OBJECT_DEPENDS ${snippet_src}) endforeach(snippet_src) - -ei_add_target_property(compile_tut_arithmetic_transpose_aliasing COMPILE_FLAGS -DEIGEN_NO_DEBUG) diff --git a/external/eigen3/doc/snippets/Cwise_arg.cpp b/external/eigen3/doc/snippets/Cwise_arg.cpp new file mode 100644 index 0000000..3f45133 --- /dev/null +++ b/external/eigen3/doc/snippets/Cwise_arg.cpp @@ -0,0 +1,3 @@ +ArrayXcf v = ArrayXcf::Random(3); +cout << v << endl << endl; +cout << arg(v) << endl; diff --git a/external/eigen3/doc/snippets/Cwise_array_power_array.cpp b/external/eigen3/doc/snippets/Cwise_array_power_array.cpp new file mode 100644 index 0000000..432a76e --- /dev/null +++ b/external/eigen3/doc/snippets/Cwise_array_power_array.cpp @@ -0,0 +1,4 @@ +Array x(8,25,3), + e(1./3.,0.5,2.); +cout << "[" << x << "]^[" << e << "] = " << x.pow(e) << endl; // using ArrayBase::pow +cout << "[" << x << "]^[" << e << "] = " << pow(x,e) << endl; // using Eigen::pow diff --git a/external/eigen3/doc/snippets/Cwise_atan.cpp b/external/eigen3/doc/snippets/Cwise_atan.cpp new file mode 100644 index 0000000..4468447 --- /dev/null +++ b/external/eigen3/doc/snippets/Cwise_atan.cpp @@ -0,0 +1,2 @@ +ArrayXd v = ArrayXd::LinSpaced(5,0,1); +cout << v.atan() << endl; diff --git a/external/eigen3/doc/snippets/Cwise_boolean_not.cpp b/external/eigen3/doc/snippets/Cwise_boolean_not.cpp new file mode 100644 index 0000000..40009f1 --- /dev/null +++ b/external/eigen3/doc/snippets/Cwise_boolean_not.cpp @@ -0,0 +1,5 @@ +Array3d v(1,2,3); +v(1) *= 0.0/0.0; +v(2) /= 0.0; +cout << v << endl << endl; +cout << !isfinite(v) << endl; diff --git a/external/eigen3/doc/snippets/Cwise_boolean_xor.cpp b/external/eigen3/doc/snippets/Cwise_boolean_xor.cpp new file mode 100644 index 0000000..fafbec8 --- /dev/null +++ b/external/eigen3/doc/snippets/Cwise_boolean_xor.cpp @@ -0,0 +1,2 @@ +Array3d v(-1,2,1), w(-3,2,3); +cout << ((v e(2,-3,1./3.); +cout << "10^[" << e << "] = " << pow(10,e) << endl; diff --git a/external/eigen3/doc/snippets/Cwise_sign.cpp b/external/eigen3/doc/snippets/Cwise_sign.cpp new file mode 100644 index 0000000..49920e4 --- /dev/null +++ b/external/eigen3/doc/snippets/Cwise_sign.cpp @@ -0,0 +1,2 @@ +Array3d v(-3,5,0); +cout << v.sign() << endl; diff --git a/external/eigen3/doc/snippets/Cwise_sinh.cpp b/external/eigen3/doc/snippets/Cwise_sinh.cpp new file mode 100644 index 0000000..fac9b19 --- /dev/null +++ b/external/eigen3/doc/snippets/Cwise_sinh.cpp @@ -0,0 +1,2 @@ +ArrayXd v = ArrayXd::LinSpaced(5,0,1); +cout << sinh(v) << endl; diff --git a/external/eigen3/doc/snippets/Cwise_tanh.cpp b/external/eigen3/doc/snippets/Cwise_tanh.cpp new file mode 100644 index 0000000..30cd045 --- /dev/null +++ b/external/eigen3/doc/snippets/Cwise_tanh.cpp @@ -0,0 +1,2 @@ +ArrayXd v = ArrayXd::LinSpaced(5,0,1); +cout << tanh(v) << endl; diff --git a/external/eigen3/doc/snippets/DenseBase_LinSpacedInt.cpp b/external/eigen3/doc/snippets/DenseBase_LinSpacedInt.cpp new file mode 100644 index 0000000..0d7ae06 --- /dev/null +++ b/external/eigen3/doc/snippets/DenseBase_LinSpacedInt.cpp @@ -0,0 +1,8 @@ +cout << "Even spacing inputs:" << endl; +cout << VectorXi::LinSpaced(8,1,4).transpose() << endl; +cout << VectorXi::LinSpaced(8,1,8).transpose() << endl; +cout << VectorXi::LinSpaced(8,1,15).transpose() << endl; +cout << "Uneven spacing inputs:" << endl; +cout << VectorXi::LinSpaced(8,1,7).transpose() << endl; +cout << VectorXi::LinSpaced(8,1,9).transpose() << endl; +cout << VectorXi::LinSpaced(8,1,16).transpose() << endl; diff --git a/external/eigen3/doc/snippets/DirectionWise_hnormalized.cpp b/external/eigen3/doc/snippets/DirectionWise_hnormalized.cpp new file mode 100644 index 0000000..3410790 --- /dev/null +++ b/external/eigen3/doc/snippets/DirectionWise_hnormalized.cpp @@ -0,0 +1,7 @@ +typedef Matrix Matrix4Xd; +Matrix4Xd M = Matrix4Xd::Random(4,5); +Projective3d P(Matrix4d::Random()); +cout << "The matrix M is:" << endl << M << endl << endl; +cout << "M.colwise().hnormalized():" << endl << M.colwise().hnormalized() << endl << endl; +cout << "P*M:" << endl << P*M << endl << endl; +cout << "(P*M).colwise().hnormalized():" << endl << (P*M).colwise().hnormalized() << endl << endl; \ No newline at end of file diff --git a/external/eigen3/doc/snippets/EigenSolver_eigenvectors.cpp b/external/eigen3/doc/snippets/EigenSolver_eigenvectors.cpp index 0fad4da..8355f76 100644 --- a/external/eigen3/doc/snippets/EigenSolver_eigenvectors.cpp +++ b/external/eigen3/doc/snippets/EigenSolver_eigenvectors.cpp @@ -1,4 +1,4 @@ MatrixXd ones = MatrixXd::Ones(3,3); EigenSolver es(ones); -cout << "The first eigenvector of the 3x3 matrix of ones is:" - << endl << es.eigenvectors().col(1) << endl; +cout << "The first eigenvector of the 3x3 matrix of ones is:" + << endl << es.eigenvectors().col(0) << endl; diff --git a/external/eigen3/doc/snippets/LeastSquaresNormalEquations.cpp b/external/eigen3/doc/snippets/LeastSquaresNormalEquations.cpp new file mode 100644 index 0000000..997cf17 --- /dev/null +++ b/external/eigen3/doc/snippets/LeastSquaresNormalEquations.cpp @@ -0,0 +1,4 @@ +MatrixXf A = MatrixXf::Random(3, 2); +VectorXf b = VectorXf::Random(3); +cout << "The solution using normal equations is:\n" + << (A.transpose() * A).ldlt().solve(A.transpose() * b) << endl; diff --git a/external/eigen3/doc/snippets/LeastSquaresQR.cpp b/external/eigen3/doc/snippets/LeastSquaresQR.cpp new file mode 100644 index 0000000..6c97045 --- /dev/null +++ b/external/eigen3/doc/snippets/LeastSquaresQR.cpp @@ -0,0 +1,4 @@ +MatrixXf A = MatrixXf::Random(3, 2); +VectorXf b = VectorXf::Random(3); +cout << "The solution using the QR decomposition is:\n" + << A.colPivHouseholderQr().solve(b) << endl; diff --git a/external/eigen3/doc/snippets/MatrixBase_cwiseSign.cpp b/external/eigen3/doc/snippets/MatrixBase_cwiseSign.cpp new file mode 100644 index 0000000..efd7179 --- /dev/null +++ b/external/eigen3/doc/snippets/MatrixBase_cwiseSign.cpp @@ -0,0 +1,4 @@ +MatrixXd m(2,3); +m << 2, -4, 6, + -5, 1, 0; +cout << m.cwiseSign() << endl; diff --git a/external/eigen3/doc/snippets/MatrixBase_hnormalized.cpp b/external/eigen3/doc/snippets/MatrixBase_hnormalized.cpp new file mode 100644 index 0000000..652cd77 --- /dev/null +++ b/external/eigen3/doc/snippets/MatrixBase_hnormalized.cpp @@ -0,0 +1,6 @@ +Vector4d v = Vector4d::Random(); +Projective3d P(Matrix4d::Random()); +cout << "v = " << v.transpose() << "]^T" << endl; +cout << "v.hnormalized() = " << v.hnormalized().transpose() << "]^T" << endl; +cout << "P*v = " << (P*v).transpose() << "]^T" << endl; +cout << "(P*v).hnormalized() = " << (P*v).hnormalized().transpose() << "]^T" << endl; \ No newline at end of file diff --git a/external/eigen3/doc/snippets/MatrixBase_homogeneous.cpp b/external/eigen3/doc/snippets/MatrixBase_homogeneous.cpp new file mode 100644 index 0000000..457c28f --- /dev/null +++ b/external/eigen3/doc/snippets/MatrixBase_homogeneous.cpp @@ -0,0 +1,6 @@ +Vector3d v = Vector3d::Random(), w; +Projective3d P(Matrix4d::Random()); +cout << "v = [" << v.transpose() << "]^T" << endl; +cout << "h.homogeneous() = [" << v.homogeneous().transpose() << "]^T" << endl; +cout << "(P * v.homogeneous()) = [" << (P * v.homogeneous()).transpose() << "]^T" << endl; +cout << "(P * v.homogeneous()).hnormalized() = [" << (P * v.homogeneous()).eval().hnormalized().transpose() << "]^T" << endl; \ No newline at end of file diff --git a/external/eigen3/doc/snippets/MatrixBase_marked.cpp b/external/eigen3/doc/snippets/MatrixBase_marked.cpp deleted file mode 100644 index f607121..0000000 --- a/external/eigen3/doc/snippets/MatrixBase_marked.cpp +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _MSC_VER - #warning deprecated -#endif -/* -Matrix3d m = Matrix3d::Zero(); -m.part().setOnes(); -cout << "Here is the matrix m:" << endl << m << endl; -Matrix3d n = Matrix3d::Ones(); -n.part() *= 2; -cout << "Here is the matrix n:" << endl << n << endl; -cout << "And now here is m.inverse()*n, taking advantage of the fact that" - " m is upper-triangular:" << endl - << m.marked().solveTriangular(n); -*/ \ No newline at end of file diff --git a/external/eigen3/doc/snippets/MatrixBase_part.cpp b/external/eigen3/doc/snippets/MatrixBase_part.cpp deleted file mode 100644 index d3e7f48..0000000 --- a/external/eigen3/doc/snippets/MatrixBase_part.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _MSC_VER - #warning deprecated -#endif -/* -Matrix3d m = Matrix3d::Zero(); -m.part().setOnes(); -cout << "Here is the matrix m:" << endl << m << endl; -cout << "And let us now compute m*m.adjoint() in a very optimized way" << endl - << "taking advantage of the symmetry." << endl; -Matrix3d n; -n.part() = (m*m.adjoint()).lazy(); -cout << "The result is:" << endl << n << endl; -*/ \ No newline at end of file diff --git a/external/eigen3/doc/snippets/MatrixBase_selfadjointView.cpp b/external/eigen3/doc/snippets/MatrixBase_selfadjointView.cpp new file mode 100644 index 0000000..4bd3c7e --- /dev/null +++ b/external/eigen3/doc/snippets/MatrixBase_selfadjointView.cpp @@ -0,0 +1,6 @@ +Matrix3i m = Matrix3i::Random(); +cout << "Here is the matrix m:" << endl << m << endl; +cout << "Here is the symmetric matrix extracted from the upper part of m:" << endl + << Matrix3i(m.selfadjointView()) << endl; +cout << "Here is the symmetric matrix extracted from the lower part of m:" << endl + << Matrix3i(m.selfadjointView()) << endl; diff --git a/external/eigen3/doc/snippets/MatrixBase_extract.cpp b/external/eigen3/doc/snippets/MatrixBase_triangularView.cpp similarity index 55% rename from external/eigen3/doc/snippets/MatrixBase_extract.cpp rename to external/eigen3/doc/snippets/MatrixBase_triangularView.cpp index c96220f..03aa303 100644 --- a/external/eigen3/doc/snippets/MatrixBase_extract.cpp +++ b/external/eigen3/doc/snippets/MatrixBase_triangularView.cpp @@ -1,13 +1,9 @@ -#ifndef _MSC_VER - #warning deprecated -#endif -/* deprecated Matrix3i m = Matrix3i::Random(); cout << "Here is the matrix m:" << endl << m << endl; cout << "Here is the upper-triangular matrix extracted from m:" << endl - << m.part() << endl; + << Matrix3i(m.triangularView()) << endl; cout << "Here is the strictly-upper-triangular matrix extracted from m:" << endl - << m.part() << endl; + << Matrix3i(m.triangularView()) << endl; cout << "Here is the unit-lower-triangular matrix extracted from m:" << endl - << m.part() << endl; -*/ \ No newline at end of file + << Matrix3i(m.triangularView()) << endl; +// FIXME need to implement output for triangularViews (Bug 885) diff --git a/external/eigen3/doc/snippets/PartialRedux_count.cpp b/external/eigen3/doc/snippets/PartialRedux_count.cpp index c7b3097..1c3b3a2 100644 --- a/external/eigen3/doc/snippets/PartialRedux_count.cpp +++ b/external/eigen3/doc/snippets/PartialRedux_count.cpp @@ -1,3 +1,5 @@ Matrix3d m = Matrix3d::Random(); cout << "Here is the matrix m:" << endl << m << endl; -cout << "Here is the count of elements larger or equal than 0.5 of each row:" << endl << (m.array() >= 0.5).rowwise().count() << endl; +Matrix res = (m.array() >= 0.5).rowwise().count(); +cout << "Here is the count of elements larger or equal than 0.5 of each row:" << endl; +cout << res << endl; diff --git a/external/eigen3/doc/snippets/SparseMatrix_coeffs.cpp b/external/eigen3/doc/snippets/SparseMatrix_coeffs.cpp new file mode 100644 index 0000000..f71a69b --- /dev/null +++ b/external/eigen3/doc/snippets/SparseMatrix_coeffs.cpp @@ -0,0 +1,9 @@ +SparseMatrix A(3,3); +A.insert(1,2) = 0; +A.insert(0,1) = 1; +A.insert(2,0) = 2; +A.makeCompressed(); +cout << "The matrix A is:" << endl << MatrixXd(A) << endl; +cout << "it has " << A.nonZeros() << " stored non zero coefficients that are: " << A.coeffs().transpose() << endl; +A.coeffs() += 10; +cout << "After adding 10 to every stored non zero coefficient, the matrix A is:" << endl << MatrixXd(A) << endl; diff --git a/external/eigen3/doc/snippets/TopicAliasing_mult4.cpp b/external/eigen3/doc/snippets/TopicAliasing_mult4.cpp new file mode 100644 index 0000000..8a8992f --- /dev/null +++ b/external/eigen3/doc/snippets/TopicAliasing_mult4.cpp @@ -0,0 +1,5 @@ +MatrixXf A(2,2), B(3,2); +B << 2, 0, 0, 3, 1, 1; +A << 2, 0, 0, -2; +A = (B * A).cwiseAbs(); +cout << A; \ No newline at end of file diff --git a/external/eigen3/doc/snippets/TopicAliasing_mult5.cpp b/external/eigen3/doc/snippets/TopicAliasing_mult5.cpp new file mode 100644 index 0000000..1a36def --- /dev/null +++ b/external/eigen3/doc/snippets/TopicAliasing_mult5.cpp @@ -0,0 +1,5 @@ +MatrixXf A(2,2), B(3,2); +B << 2, 0, 0, 3, 1, 1; +A << 2, 0, 0, -2; +A = (B * A).eval().cwiseAbs(); +cout << A; diff --git a/external/eigen3/doc/snippets/Triangular_solve.cpp b/external/eigen3/doc/snippets/Triangular_solve.cpp new file mode 100644 index 0000000..5484424 --- /dev/null +++ b/external/eigen3/doc/snippets/Triangular_solve.cpp @@ -0,0 +1,11 @@ +Matrix3d m = Matrix3d::Zero(); +m.triangularView().setOnes(); +cout << "Here is the matrix m:\n" << m << endl; +Matrix3d n = Matrix3d::Ones(); +n.triangularView() *= 2; +cout << "Here is the matrix n:\n" << n << endl; +cout << "And now here is m.inverse()*n, taking advantage of the fact that" + " m is upper-triangular:\n" + << m.triangularView().solve(n) << endl; +cout << "And this is n*m.inverse():\n" + << m.triangularView().solve(n); diff --git a/external/eigen3/doc/snippets/Tutorial_AdvancedInitialization_Join.cpp b/external/eigen3/doc/snippets/Tutorial_AdvancedInitialization_Join.cpp index 84e8715..55a2153 100644 --- a/external/eigen3/doc/snippets/Tutorial_AdvancedInitialization_Join.cpp +++ b/external/eigen3/doc/snippets/Tutorial_AdvancedInitialization_Join.cpp @@ -3,7 +3,7 @@ vec1 << 1, 2, 3; std::cout << "vec1 = " << vec1 << std::endl; RowVectorXd vec2(4); -vec2 << 1, 4, 9, 16;; +vec2 << 1, 4, 9, 16; std::cout << "vec2 = " << vec2 << std::endl; RowVectorXd joined(7); diff --git a/external/eigen3/doc/snippets/Tutorial_ReshapeMat2Mat.cpp b/external/eigen3/doc/snippets/Tutorial_ReshapeMat2Mat.cpp new file mode 100644 index 0000000..f84d6e7 --- /dev/null +++ b/external/eigen3/doc/snippets/Tutorial_ReshapeMat2Mat.cpp @@ -0,0 +1,6 @@ +MatrixXf M1(2,6); // Column-major storage +M1 << 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12; + +Map M2(M1.data(), 6,2); +cout << "M2:" << endl << M2 << endl; \ No newline at end of file diff --git a/external/eigen3/doc/snippets/Tutorial_ReshapeMat2Vec.cpp b/external/eigen3/doc/snippets/Tutorial_ReshapeMat2Vec.cpp new file mode 100644 index 0000000..95bd4e0 --- /dev/null +++ b/external/eigen3/doc/snippets/Tutorial_ReshapeMat2Vec.cpp @@ -0,0 +1,11 @@ +MatrixXf M1(3,3); // Column-major storage +M1 << 1, 2, 3, + 4, 5, 6, + 7, 8, 9; + +Map v1(M1.data(), M1.size()); +cout << "v1:" << endl << v1 << endl; + +Matrix M2(M1); +Map v2(M2.data(), M2.size()); +cout << "v2:" << endl << v2 << endl; \ No newline at end of file diff --git a/external/eigen3/doc/snippets/Tutorial_SlicingCol.cpp b/external/eigen3/doc/snippets/Tutorial_SlicingCol.cpp new file mode 100644 index 0000000..f667ff6 --- /dev/null +++ b/external/eigen3/doc/snippets/Tutorial_SlicingCol.cpp @@ -0,0 +1,11 @@ +MatrixXf M1 = MatrixXf::Random(3,8); +cout << "Column major input:" << endl << M1 << "\n"; +Map > M2(M1.data(), M1.rows(), (M1.cols()+2)/3, OuterStride<>(M1.outerStride()*3)); +cout << "1 column over 3:" << endl << M2 << "\n"; + +typedef Matrix RowMajorMatrixXf; +RowMajorMatrixXf M3(M1); +cout << "Row major input:" << endl << M3 << "\n"; +Map > M4(M3.data(), M3.rows(), (M3.cols()+2)/3, + Stride(M3.outerStride(),3)); +cout << "1 column over 3:" << endl << M4 << "\n"; \ No newline at end of file diff --git a/external/eigen3/doc/snippets/Tutorial_SlicingVec.cpp b/external/eigen3/doc/snippets/Tutorial_SlicingVec.cpp new file mode 100644 index 0000000..07e10bf --- /dev/null +++ b/external/eigen3/doc/snippets/Tutorial_SlicingVec.cpp @@ -0,0 +1,4 @@ +RowVectorXf v = RowVectorXf::LinSpaced(20,0,19); +cout << "Input:" << endl << v << endl; +Map > v2(v.data(), v.size()/2); +cout << "Even:" << v2 << endl; \ No newline at end of file diff --git a/external/eigen3/doc/snippets/VectorwiseOp_homogeneous.cpp b/external/eigen3/doc/snippets/VectorwiseOp_homogeneous.cpp new file mode 100644 index 0000000..aba4fed --- /dev/null +++ b/external/eigen3/doc/snippets/VectorwiseOp_homogeneous.cpp @@ -0,0 +1,7 @@ +typedef Matrix Matrix3Xd; +Matrix3Xd M = Matrix3Xd::Random(3,5); +Projective3d P(Matrix4d::Random()); +cout << "The matrix M is:" << endl << M << endl << endl; +cout << "M.colwise().homogeneous():" << endl << M.colwise().homogeneous() << endl << endl; +cout << "P * M.colwise().homogeneous():" << endl << P * M.colwise().homogeneous() << endl << endl; +cout << "P * M.colwise().homogeneous().hnormalized(): " << endl << (P * M.colwise().homogeneous()).colwise().hnormalized() << endl << endl; \ No newline at end of file diff --git a/external/eigen3/doc/snippets/compile_snippet.cpp.in b/external/eigen3/doc/snippets/compile_snippet.cpp.in index 894cd52..d63f371 100644 --- a/external/eigen3/doc/snippets/compile_snippet.cpp.in +++ b/external/eigen3/doc/snippets/compile_snippet.cpp.in @@ -1,5 +1,13 @@ -#include +static bool eigen_did_assert = false; +#define eigen_assert(X) if(!eigen_did_assert && !(X)){ std::cout << "### Assertion raised in " << __FILE__ << ":" << __LINE__ << ":\n" #X << "\n### The following would happen without assertions:\n"; eigen_did_assert = true;} + #include +#include + +#ifndef M_PI +#define M_PI 3.1415926535897932384626433832795 +#endif + using namespace Eigen; using namespace std; diff --git a/external/eigen3/doc/special_examples/CMakeLists.txt b/external/eigen3/doc/special_examples/CMakeLists.txt index 3ab75db..101fbc5 100644 --- a/external/eigen3/doc/special_examples/CMakeLists.txt +++ b/external/eigen3/doc/special_examples/CMakeLists.txt @@ -19,3 +19,17 @@ if(QT4_FOUND) add_dependencies(all_examples Tutorial_sparse_example) endif(QT4_FOUND) +check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11) +if(EIGEN_COMPILER_SUPPORT_CPP11) + add_executable(random_cpp11 random_cpp11.cpp) + target_link_libraries(random_cpp11 ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + add_dependencies(all_examples random_cpp11) + ei_add_target_property(random_cpp11 COMPILE_FLAGS "-std=c++11") + + add_custom_command( + TARGET random_cpp11 + POST_BUILD + COMMAND random_cpp11 + ARGS >${CMAKE_CURRENT_BINARY_DIR}/random_cpp11.out + ) +endif() diff --git a/external/eigen3/doc/special_examples/Tutorial_sparse_example.cpp b/external/eigen3/doc/special_examples/Tutorial_sparse_example.cpp index 002f19f..830e196 100644 --- a/external/eigen3/doc/special_examples/Tutorial_sparse_example.cpp +++ b/external/eigen3/doc/special_examples/Tutorial_sparse_example.cpp @@ -9,6 +9,8 @@ void saveAsBitmap(const Eigen::VectorXd& x, int n, const char* filename); int main(int argc, char** argv) { + assert(argc==2); + int n = 300; // size of the image int m = n*n; // number of unknows (=number of pixels) diff --git a/external/eigen3/doc/special_examples/Tutorial_sparse_example_details.cpp b/external/eigen3/doc/special_examples/Tutorial_sparse_example_details.cpp index 7d820b4..bc18b01 100644 --- a/external/eigen3/doc/special_examples/Tutorial_sparse_example_details.cpp +++ b/external/eigen3/doc/special_examples/Tutorial_sparse_example_details.cpp @@ -8,7 +8,7 @@ typedef Eigen::Triplet T; void insertCoefficient(int id, int i, int j, double w, std::vector& coeffs, Eigen::VectorXd& b, const Eigen::VectorXd& boundary) { - int n = boundary.size(); + int n = int(boundary.size()); int id1 = i+j*n; if(i==-1 || i==n) b(id) -= w * boundary(j); // constrained coefficient diff --git a/external/eigen3/doc/special_examples/random_cpp11.cpp b/external/eigen3/doc/special_examples/random_cpp11.cpp new file mode 100644 index 0000000..33744c0 --- /dev/null +++ b/external/eigen3/doc/special_examples/random_cpp11.cpp @@ -0,0 +1,14 @@ +#include +#include +#include + +using namespace Eigen; + +int main() { + std::default_random_engine generator; + std::poisson_distribution distribution(4.1); + auto poisson = [&] () {return distribution(generator);}; + + RowVectorXi v = RowVectorXi::NullaryExpr(10, poisson ); + std::cout << v << "\n"; +} diff --git a/external/eigen3/failtest/CMakeLists.txt b/external/eigen3/failtest/CMakeLists.txt index cadc6a2..1a73f05 100644 --- a/external/eigen3/failtest/CMakeLists.txt +++ b/external/eigen3/failtest/CMakeLists.txt @@ -7,6 +7,9 @@ ei_add_failtest("block_nonconst_ctor_on_const_xpr_1") ei_add_failtest("block_nonconst_ctor_on_const_xpr_2") ei_add_failtest("transpose_nonconst_ctor_on_const_xpr") ei_add_failtest("diagonal_nonconst_ctor_on_const_xpr") +ei_add_failtest("cwiseunaryview_nonconst_ctor_on_const_xpr") +ei_add_failtest("triangularview_nonconst_ctor_on_const_xpr") +ei_add_failtest("selfadjointview_nonconst_ctor_on_const_xpr") ei_add_failtest("const_qualified_block_method_retval_0") ei_add_failtest("const_qualified_block_method_retval_1") @@ -25,6 +28,9 @@ ei_add_failtest("block_on_const_type_actually_const_0") ei_add_failtest("block_on_const_type_actually_const_1") ei_add_failtest("transpose_on_const_type_actually_const") ei_add_failtest("diagonal_on_const_type_actually_const") +ei_add_failtest("cwiseunaryview_on_const_type_actually_const") +ei_add_failtest("triangularview_on_const_type_actually_const") +ei_add_failtest("selfadjointview_on_const_type_actually_const") ei_add_failtest("ref_1") ei_add_failtest("ref_2") @@ -32,6 +38,20 @@ ei_add_failtest("ref_3") ei_add_failtest("ref_4") ei_add_failtest("ref_5") +ei_add_failtest("swap_1") +ei_add_failtest("swap_2") + +ei_add_failtest("ternary_1") +ei_add_failtest("ternary_2") + +ei_add_failtest("sparse_ref_1") +ei_add_failtest("sparse_ref_2") +ei_add_failtest("sparse_ref_3") +ei_add_failtest("sparse_ref_4") +ei_add_failtest("sparse_ref_5") + +ei_add_failtest("sparse_storage_mismatch") + ei_add_failtest("partialpivlu_int") ei_add_failtest("fullpivlu_int") ei_add_failtest("llt_int") @@ -40,6 +60,7 @@ ei_add_failtest("qr_int") ei_add_failtest("colpivqr_int") ei_add_failtest("fullpivqr_int") ei_add_failtest("jacobisvd_int") +ei_add_failtest("bdcsvd_int") ei_add_failtest("eigensolver_int") ei_add_failtest("eigensolver_cplx") diff --git a/external/eigen3/failtest/bdcsvd_int.cpp b/external/eigen3/failtest/bdcsvd_int.cpp new file mode 100644 index 0000000..670752c --- /dev/null +++ b/external/eigen3/failtest/bdcsvd_int.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/SVD" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define SCALAR int +#else +#define SCALAR float +#endif + +using namespace Eigen; + +int main() +{ + BDCSVD > qr(Matrix::Random(10,10)); +} diff --git a/external/eigen3/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp b/external/eigen3/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp new file mode 100644 index 0000000..e23cf8f --- /dev/null +++ b/external/eigen3/failtest/cwiseunaryview_nonconst_ctor_on_const_xpr.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + CwiseUnaryView,Matrix3d> t(m); +} + +int main() {} diff --git a/external/eigen3/failtest/cwiseunaryview_on_const_type_actually_const.cpp b/external/eigen3/failtest/cwiseunaryview_on_const_type_actually_const.cpp new file mode 100644 index 0000000..fcd41df --- /dev/null +++ b/external/eigen3/failtest/cwiseunaryview_on_const_type_actually_const.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + MatrixXf m; + CwiseUnaryView,CV_QUALIFIER MatrixXf>(m).coeffRef(0, 0) = 1.0f; +} + +int main() {} diff --git a/external/eigen3/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp b/external/eigen3/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp new file mode 100644 index 0000000..a240f81 --- /dev/null +++ b/external/eigen3/failtest/selfadjointview_nonconst_ctor_on_const_xpr.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + SelfAdjointView t(m); +} + +int main() {} diff --git a/external/eigen3/failtest/selfadjointview_on_const_type_actually_const.cpp b/external/eigen3/failtest/selfadjointview_on_const_type_actually_const.cpp new file mode 100644 index 0000000..19aaad6 --- /dev/null +++ b/external/eigen3/failtest/selfadjointview_on_const_type_actually_const.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + MatrixXf m; + SelfAdjointView(m).coeffRef(0, 0) = 1.0f; +} + +int main() {} diff --git a/external/eigen3/failtest/sparse_ref_1.cpp b/external/eigen3/failtest/sparse_ref_1.cpp new file mode 100644 index 0000000..d78d1f9 --- /dev/null +++ b/external/eigen3/failtest/sparse_ref_1.cpp @@ -0,0 +1,18 @@ +#include "../Eigen/Sparse" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void call_ref(Ref > a) { } + +int main() +{ + SparseMatrix a(10,10); + CV_QUALIFIER SparseMatrix& ac(a); + call_ref(ac); +} diff --git a/external/eigen3/failtest/sparse_ref_2.cpp b/external/eigen3/failtest/sparse_ref_2.cpp new file mode 100644 index 0000000..46c9440 --- /dev/null +++ b/external/eigen3/failtest/sparse_ref_2.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Sparse" + +using namespace Eigen; + +void call_ref(Ref > a) { } + +int main() +{ + SparseMatrix A(10,10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + call_ref(A.row(3)); +#else + call_ref(A.col(3)); +#endif +} diff --git a/external/eigen3/failtest/sparse_ref_3.cpp b/external/eigen3/failtest/sparse_ref_3.cpp new file mode 100644 index 0000000..a9949b5 --- /dev/null +++ b/external/eigen3/failtest/sparse_ref_3.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Sparse" + +using namespace Eigen; + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +void call_ref(Ref > a) { } +#else +void call_ref(const Ref > &a) { } +#endif + +int main() +{ + SparseMatrix a(10,10); + call_ref(a+a); +} diff --git a/external/eigen3/failtest/sparse_ref_4.cpp b/external/eigen3/failtest/sparse_ref_4.cpp new file mode 100644 index 0000000..57bb6a1 --- /dev/null +++ b/external/eigen3/failtest/sparse_ref_4.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Sparse" + +using namespace Eigen; + +void call_ref(Ref > a) {} + +int main() +{ + SparseMatrix A(10,10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + call_ref(A.transpose()); +#else + call_ref(A); +#endif +} diff --git a/external/eigen3/failtest/sparse_ref_5.cpp b/external/eigen3/failtest/sparse_ref_5.cpp new file mode 100644 index 0000000..4478f6f --- /dev/null +++ b/external/eigen3/failtest/sparse_ref_5.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Sparse" + +using namespace Eigen; + +void call_ref(Ref > a) { } + +int main() +{ + SparseMatrix a(10,10); + SparseMatrixBase > &ac(a); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + call_ref(ac); +#else + call_ref(ac.derived()); +#endif +} diff --git a/external/eigen3/failtest/sparse_storage_mismatch.cpp b/external/eigen3/failtest/sparse_storage_mismatch.cpp new file mode 100644 index 0000000..51840d4 --- /dev/null +++ b/external/eigen3/failtest/sparse_storage_mismatch.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Sparse" +using namespace Eigen; + +typedef SparseMatrix Mat1; +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +typedef SparseMatrix Mat2; +#else +typedef SparseMatrix Mat2; +#endif + +int main() +{ + Mat1 a(10,10); + Mat2 b(10,10); + a += b; +} diff --git a/external/eigen3/failtest/swap_1.cpp b/external/eigen3/failtest/swap_1.cpp new file mode 100644 index 0000000..1063797 --- /dev/null +++ b/external/eigen3/failtest/swap_1.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +int main() +{ + VectorXf a(10), b(10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + const DenseBase &ac(a); +#else + DenseBase &ac(a); +#endif + b.swap(ac); +} diff --git a/external/eigen3/failtest/swap_2.cpp b/external/eigen3/failtest/swap_2.cpp new file mode 100644 index 0000000..c130ba6 --- /dev/null +++ b/external/eigen3/failtest/swap_2.cpp @@ -0,0 +1,14 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +int main() +{ + VectorXf a(10), b(10); + VectorXf const &ac(a); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + b.swap(ac); +#else + b.swap(ac.const_cast_derived()); +#endif +} \ No newline at end of file diff --git a/external/eigen3/failtest/ternary_1.cpp b/external/eigen3/failtest/ternary_1.cpp new file mode 100644 index 0000000..b40bcb0 --- /dev/null +++ b/external/eigen3/failtest/ternary_1.cpp @@ -0,0 +1,13 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +int main(int argc,char **) +{ + VectorXf a(10), b(10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + b = argc>1 ? 2*a : -a; +#else + b = argc>1 ? 2*a : VectorXf(-a); +#endif +} diff --git a/external/eigen3/failtest/ternary_2.cpp b/external/eigen3/failtest/ternary_2.cpp new file mode 100644 index 0000000..a46b12b --- /dev/null +++ b/external/eigen3/failtest/ternary_2.cpp @@ -0,0 +1,13 @@ +#include "../Eigen/Core" + +using namespace Eigen; + +int main(int argc,char **) +{ + VectorXf a(10), b(10); +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD + b = argc>1 ? 2*a : a+a; +#else + b = argc>1 ? VectorXf(2*a) : VectorXf(a+a); +#endif +} diff --git a/external/eigen3/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp b/external/eigen3/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp new file mode 100644 index 0000000..807447e --- /dev/null +++ b/external/eigen3/failtest/triangularview_nonconst_ctor_on_const_xpr.cpp @@ -0,0 +1,15 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(CV_QUALIFIER Matrix3d &m){ + TriangularView t(m); +} + +int main() {} diff --git a/external/eigen3/failtest/triangularview_on_const_type_actually_const.cpp b/external/eigen3/failtest/triangularview_on_const_type_actually_const.cpp new file mode 100644 index 0000000..0a381a6 --- /dev/null +++ b/external/eigen3/failtest/triangularview_on_const_type_actually_const.cpp @@ -0,0 +1,16 @@ +#include "../Eigen/Core" + +#ifdef EIGEN_SHOULD_FAIL_TO_BUILD +#define CV_QUALIFIER const +#else +#define CV_QUALIFIER +#endif + +using namespace Eigen; + +void foo(){ + MatrixXf m; + TriangularView(m).coeffRef(0, 0) = 1.0f; +} + +int main() {} diff --git a/external/eigen3/lapack/complex_double.cpp b/external/eigen3/lapack/complex_double.cpp index 424d2b8..c9c5752 100644 --- a/external/eigen3/lapack/complex_double.cpp +++ b/external/eigen3/lapack/complex_double.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2011 Gael Guennebaud +// Copyright (C) 2009-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,3 +15,4 @@ #include "cholesky.cpp" #include "lu.cpp" +#include "svd.cpp" diff --git a/external/eigen3/lapack/complex_single.cpp b/external/eigen3/lapack/complex_single.cpp index c0b2d29..6d11b26 100644 --- a/external/eigen3/lapack/complex_single.cpp +++ b/external/eigen3/lapack/complex_single.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2011 Gael Guennebaud +// Copyright (C) 2009-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,3 +15,4 @@ #include "cholesky.cpp" #include "lu.cpp" +#include "svd.cpp" diff --git a/external/eigen3/lapack/double.cpp b/external/eigen3/lapack/double.cpp index d86549e..ea78bb6 100644 --- a/external/eigen3/lapack/double.cpp +++ b/external/eigen3/lapack/double.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2011 Gael Guennebaud +// Copyright (C) 2009-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,3 +15,4 @@ #include "cholesky.cpp" #include "lu.cpp" #include "eigenvalues.cpp" +#include "svd.cpp" diff --git a/external/eigen3/lapack/eigenvalues.cpp b/external/eigen3/lapack/eigenvalues.cpp index a1526eb..921c515 100644 --- a/external/eigen3/lapack/eigenvalues.cpp +++ b/external/eigen3/lapack/eigenvalues.cpp @@ -7,10 +7,10 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#include "common.h" +#include "lapack_common.h" #include -// computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges +// computes eigen values and vectors of a general N-by-N matrix A EIGEN_LAPACK_FUNC(syev,(char *jobz, char *uplo, int* n, Scalar* a, int *lda, Scalar* w, Scalar* /*work*/, int* lwork, int *info)) { // TODO exploit the work buffer @@ -22,24 +22,7 @@ EIGEN_LAPACK_FUNC(syev,(char *jobz, char *uplo, int* n, Scalar* a, int *lda, Sca else if(*n<0) *info = -3; else if(*lda +// Copyright (C) 2010-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -11,6 +11,7 @@ #define EIGEN_LAPACK_COMMON_H #include "../blas/common.h" +#include "../Eigen/src/misc/lapack.h" #define EIGEN_LAPACK_FUNC(FUNC,ARGLIST) \ extern "C" { int EIGEN_BLAS_FUNC(FUNC) ARGLIST; } \ @@ -18,6 +19,11 @@ typedef Eigen::Map > PivotsType; +#if ISCOMPLEX +#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) X, +#else +#define EIGEN_LAPACK_ARG_IF_COMPLEX(X) +#endif #endif // EIGEN_LAPACK_COMMON_H diff --git a/external/eigen3/lapack/single.cpp b/external/eigen3/lapack/single.cpp index a64ed44..c7da3ef 100644 --- a/external/eigen3/lapack/single.cpp +++ b/external/eigen3/lapack/single.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009-2011 Gael Guennebaud +// Copyright (C) 2009-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,3 +15,4 @@ #include "cholesky.cpp" #include "lu.cpp" #include "eigenvalues.cpp" +#include "svd.cpp" diff --git a/external/eigen3/lapack/svd.cpp b/external/eigen3/lapack/svd.cpp new file mode 100644 index 0000000..77b302b --- /dev/null +++ b/external/eigen3/lapack/svd.cpp @@ -0,0 +1,138 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "lapack_common.h" +#include + +// computes the singular values/vectors a general M-by-N matrix A using divide-and-conquer +EIGEN_LAPACK_FUNC(gesdd,(char *jobz, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork, + EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int * /*iwork*/, int *info)) +{ + // TODO exploit the work buffer + bool query_size = *lwork==-1; + int diag_size = (std::min)(*m,*n); + + *info = 0; + if(*jobz!='A' && *jobz!='S' && *jobz!='O' && *jobz!='N') *info = -1; + else if(*m<0) *info = -2; + else if(*n<0) *info = -3; + else if(*lda=*n && *ldvt<*n)) *info = -10; + + if(*info!=0) + { + int e = -*info; + return xerbla_(SCALAR_SUFFIX_UP"GESDD ", &e, 6); + } + + if(query_size) + { + *lwork = 0; + return 0; + } + + if(*n==0 || *m==0) + return 0; + + PlainMatrixType mat(*m,*n); + mat = matrix(a,*m,*n,*lda); + + int option = *jobz=='A' ? ComputeFullU|ComputeFullV + : *jobz=='S' ? ComputeThinU|ComputeThinV + : *jobz=='O' ? ComputeThinU|ComputeThinV + : 0; + + BDCSVD svd(mat,option); + + make_vector(s,diag_size) = svd.singularValues().head(diag_size); + + if(*jobz=='A') + { + matrix(u,*m,*m,*ldu) = svd.matrixU(); + matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); + } + else if(*jobz=='S') + { + matrix(u,*m,diag_size,*ldu) = svd.matrixU(); + matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint(); + } + else if(*jobz=='O' && *m>=*n) + { + matrix(a,*m,*n,*lda) = svd.matrixU(); + matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); + } + else if(*jobz=='O') + { + matrix(u,*m,*m,*ldu) = svd.matrixU(); + matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint(); + } + + return 0; +} + +// computes the singular values/vectors a general M-by-N matrix A using two sided jacobi algorithm +EIGEN_LAPACK_FUNC(gesvd,(char *jobu, char *jobv, int *m, int* n, Scalar* a, int *lda, RealScalar *s, Scalar *u, int *ldu, Scalar *vt, int *ldvt, Scalar* /*work*/, int* lwork, + EIGEN_LAPACK_ARG_IF_COMPLEX(RealScalar */*rwork*/) int *info)) +{ + // TODO exploit the work buffer + bool query_size = *lwork==-1; + int diag_size = (std::min)(*m,*n); + + *info = 0; + if( *jobu!='A' && *jobu!='S' && *jobu!='O' && *jobu!='N') *info = -1; + else if((*jobv!='A' && *jobv!='S' && *jobv!='O' && *jobv!='N') + || (*jobu=='O' && *jobv=='O')) *info = -2; + else if(*m<0) *info = -3; + else if(*n<0) *info = -4; + else if(*lda svd(mat,option); + + make_vector(s,diag_size) = svd.singularValues().head(diag_size); + { + if(*jobu=='A') matrix(u,*m,*m,*ldu) = svd.matrixU(); + else if(*jobu=='S') matrix(u,*m,diag_size,*ldu) = svd.matrixU(); + else if(*jobu=='O') matrix(a,*m,diag_size,*lda) = svd.matrixU(); + } + { + if(*jobv=='A') matrix(vt,*n,*n,*ldvt) = svd.matrixV().adjoint(); + else if(*jobv=='S') matrix(vt,diag_size,*n,*ldvt) = svd.matrixV().adjoint(); + else if(*jobv=='O') matrix(a,diag_size,*n,*lda) = svd.matrixV().adjoint(); + } + return 0; +} diff --git a/external/eigen3/scripts/buildtests.in b/external/eigen3/scripts/buildtests.in index 7026373..526d5b7 100755 --- a/external/eigen3/scripts/buildtests.in +++ b/external/eigen3/scripts/buildtests.in @@ -2,7 +2,7 @@ if [[ $# != 1 || $1 == *help ]] then - echo "usage: ./check regexp" + echo "usage: $0 regexp" echo " Builds tests matching the regexp." echo " The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'." echo " For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'" @@ -14,9 +14,9 @@ targets_to_make=`echo "$TESTSLIST" | egrep "$1" | xargs echo` if [ -n "${EIGEN_MAKE_ARGS:+x}" ] then - make $targets_to_make ${EIGEN_MAKE_ARGS} + @CMAKE_MAKE_PROGRAM@ $targets_to_make ${EIGEN_MAKE_ARGS} else - make $targets_to_make + @CMAKE_MAKE_PROGRAM@ $targets_to_make @EIGEN_TEST_BUILD_FLAGS@ fi exit $? diff --git a/external/eigen3/scripts/check.in b/external/eigen3/scripts/check.in index a90061a..7717e2d 100755 --- a/external/eigen3/scripts/check.in +++ b/external/eigen3/scripts/check.in @@ -3,7 +3,7 @@ if [[ $# != 1 || $1 == *help ]] then - echo "usage: ./check regexp" + echo "usage: $0 regexp" echo " Builds and runs tests matching the regexp." echo " The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'." echo " For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'" diff --git a/external/eigen3/scripts/eigen_gen_docs b/external/eigen3/scripts/eigen_gen_docs index 0e6f9ad..787dcb3 100644 --- a/external/eigen3/scripts/eigen_gen_docs +++ b/external/eigen3/scripts/eigen_gen_docs @@ -4,7 +4,7 @@ # You should call this script with USER set as you want, else some default # will be used USER=${USER:-'orzel'} -UPLOAD_DIR=dox +UPLOAD_DIR=dox-devel #ulimit -v 1024000 diff --git a/external/eigen3/test/CMakeLists.txt b/external/eigen3/test/CMakeLists.txt index 40c8f66..0747aa6 100644 --- a/external/eigen3/test/CMakeLists.txt +++ b/external/eigen3/test/CMakeLists.txt @@ -1,22 +1,38 @@ - -# generate split test header file -message(STATUS ${CMAKE_CURRENT_BINARY_DIR}) -file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h "") -foreach(i RANGE 1 999) - file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h - "#ifdef EIGEN_TEST_PART_${i}\n" - "#define CALL_SUBTEST_${i}(FUNC) CALL_SUBTEST(FUNC)\n" - "#else\n" - "#define CALL_SUBTEST_${i}(FUNC)\n" - "#endif\n\n" +# generate split test header file only if it does not yet exist +# in order to prevent a rebuild everytime cmake is configured +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h "") + foreach(i RANGE 1 999) + file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h + "#ifdef EIGEN_TEST_PART_${i}\n" + "#define CALL_SUBTEST_${i}(FUNC) CALL_SUBTEST(FUNC)\n" + "#else\n" + "#define CALL_SUBTEST_${i}(FUNC)\n" + "#endif\n\n" ) -endforeach() + endforeach() +endif() + +# check if we have a Fortran compiler +include("../cmake/language_support.cmake") + +workaround_9220(Fortran EIGEN_Fortran_COMPILER_WORKS) + +if(EIGEN_Fortran_COMPILER_WORKS) + enable_language(Fortran OPTIONAL) + if(NOT CMAKE_Fortran_COMPILER) + set(EIGEN_Fortran_COMPILER_WORKS OFF) + endif() +endif() + +if(NOT EIGEN_Fortran_COMPILER_WORKS) + # search for a default Lapack library to complete Eigen's one + find_package(LAPACK QUIET) +endif() # configure blas/lapack (use Eigen's ones) -set(BLAS_FOUND TRUE) -set(LAPACK_FOUND TRUE) -set(BLAS_LIBRARIES eigen_blas) -set(LAPACK_LIBRARIES eigen_lapack) +set(EIGEN_BLAS_LIBRARIES eigen_blas) +set(EIGEN_LAPACK_LIBRARIES eigen_lapack) set(EIGEN_TEST_MATRIX_DIR "" CACHE STRING "Enable testing of realword sparse matrices contained in the specified path") if(EIGEN_TEST_MATRIX_DIR) @@ -31,56 +47,63 @@ endif(EIGEN_TEST_MATRIX_DIR) set(SPARSE_LIBS " ") find_package(Cholmod) -if(CHOLMOD_FOUND AND BLAS_FOUND AND LAPACK_FOUND) +if(CHOLMOD_FOUND) add_definitions("-DEIGEN_CHOLMOD_SUPPORT") include_directories(${CHOLMOD_INCLUDES}) - set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) - set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) + set(CHOLMOD_ALL_LIBS ${CHOLMOD_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "Cholmod, ") else() ei_add_property(EIGEN_MISSING_BACKENDS "Cholmod, ") endif() find_package(Umfpack) -if(UMFPACK_FOUND AND BLAS_FOUND) +if(UMFPACK_FOUND) add_definitions("-DEIGEN_UMFPACK_SUPPORT") include_directories(${UMFPACK_INCLUDES}) - set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${BLAS_LIBRARIES}) - set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${BLAS_LIBRARIES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) + set(UMFPACK_ALL_LIBS ${UMFPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "UmfPack, ") else() ei_add_property(EIGEN_MISSING_BACKENDS "UmfPack, ") endif() -find_package(SuperLU) -if(SUPERLU_FOUND AND BLAS_FOUND) +find_package(SuperLU 4.0) +if(SUPERLU_FOUND) add_definitions("-DEIGEN_SUPERLU_SUPPORT") include_directories(${SUPERLU_INCLUDES}) - set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${BLAS_LIBRARIES}) - set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${BLAS_LIBRARIES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) + set(SUPERLU_ALL_LIBS ${SUPERLU_LIBRARIES} ${EIGEN_BLAS_LIBRARIES}) ei_add_property(EIGEN_TESTED_BACKENDS "SuperLU, ") else() ei_add_property(EIGEN_MISSING_BACKENDS "SuperLU, ") endif() -find_package(Pastix) -find_package(Scotch) -find_package(Metis 5.0 REQUIRED) -if(PASTIX_FOUND AND BLAS_FOUND) +find_package(PASTIX QUIET COMPONENTS METIS SCOTCH) +# check that the PASTIX found is a version without MPI +find_path(PASTIX_pastix_nompi.h_INCLUDE_DIRS + NAMES pastix_nompi.h + HINTS ${PASTIX_INCLUDE_DIRS} +) +if (NOT PASTIX_pastix_nompi.h_INCLUDE_DIRS) + message(STATUS "A version of Pastix has been found but pastix_nompi.h does not exist in the include directory." + " Because Eigen tests require a version without MPI, we disable the Pastix backend.") +endif() +if(PASTIX_FOUND AND PASTIX_pastix_nompi.h_INCLUDE_DIRS) add_definitions("-DEIGEN_PASTIX_SUPPORT") - include_directories(${PASTIX_INCLUDES}) + include_directories(${PASTIX_INCLUDE_DIRS_DEP}) if(SCOTCH_FOUND) - include_directories(${SCOTCH_INCLUDES}) + include_directories(${SCOTCH_INCLUDE_DIRS}) set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${SCOTCH_LIBRARIES}) elseif(METIS_FOUND) - include_directories(${METIS_INCLUDES}) + include_directories(${METIS_INCLUDE_DIRS}) set(PASTIX_LIBRARIES ${PASTIX_LIBRARIES} ${METIS_LIBRARIES}) else(SCOTCH_FOUND) ei_add_property(EIGEN_MISSING_BACKENDS "PaStiX, ") endif(SCOTCH_FOUND) - set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES} ${ORDERING_LIBRARIES} ${BLAS_LIBRARIES}) - set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES} ${BLAS_LIBRARIES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES_DEP} ${ORDERING_LIBRARIES}) + set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES_DEP}) ei_add_property(EIGEN_TESTED_BACKENDS "PaStiX, ") else() ei_add_property(EIGEN_MISSING_BACKENDS "PaStiX, ") @@ -88,23 +111,21 @@ endif() if(METIS_FOUND) add_definitions("-DEIGEN_METIS_SUPPORT") - include_directories(${METIS_INCLUDES}) + include_directories(${METIS_INCLUDE_DIRS}) ei_add_property(EIGEN_TESTED_BACKENDS "METIS, ") else() ei_add_property(EIGEN_MISSING_BACKENDS "METIS, ") endif() find_package(SPQR) -if(SPQR_FOUND AND BLAS_FOUND AND LAPACK_FOUND) - if(CHOLMOD_FOUND) - add_definitions("-DEIGEN_SPQR_SUPPORT") - include_directories(${SPQR_INCLUDES}) - set(SPQR_ALL_LIBS ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARIES} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES}) - set(SPARSE_LIBS ${SPARSE_LIBS} ${SPQR_ALL_LIBS}) - ei_add_property(EIGEN_TESTED_BACKENDS "SPQR, ") - else(CHOLMOD_FOUND) - ei_add_property(EIGEN_MISSING_BACKENDS "SPQR, ") - endif(CHOLMOD_FOUND) +if(SPQR_FOUND AND CHOLMOD_FOUND AND (EIGEN_Fortran_COMPILER_WORKS OR LAPACK_FOUND) ) + add_definitions("-DEIGEN_SPQR_SUPPORT") + include_directories(${SPQR_INCLUDES}) + set(SPQR_ALL_LIBS ${SPQR_LIBRARIES} ${CHOLMOD_LIBRARIES} ${EIGEN_LAPACK_LIBRARIES} ${EIGEN_BLAS_LIBRARIES} ${LAPACK_LIBRARIES}) + set(SPARSE_LIBS ${SPARSE_LIBS} ${SPQR_ALL_LIBS}) + ei_add_property(EIGEN_TESTED_BACKENDS "SPQR, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "SPQR, ") endif() option(EIGEN_TEST_NOQT "Disable Qt support in unit tests" OFF) @@ -127,24 +148,31 @@ add_custom_target(BuildOfficial) ei_add_test(rand) ei_add_test(meta) +ei_add_test(numext) ei_add_test(sizeof) ei_add_test(dynalloc) ei_add_test(nomalloc) ei_add_test(first_aligned) +ei_add_test(nullary) ei_add_test(mixingtypes) -ei_add_test(packetmath) +ei_add_test(packetmath "-DEIGEN_FAST_MATH=1") ei_add_test(unalignedassert) ei_add_test(vectorization_logic) ei_add_test(basicstuff) +ei_add_test(constructor) ei_add_test(linearstructure) ei_add_test(integer_types) -ei_add_test(cwiseop) ei_add_test(unalignedcount) -ei_add_test(exceptions) +if(NOT EIGEN_TEST_NO_EXCEPTIONS) + ei_add_test(exceptions) +endif() ei_add_test(redux) ei_add_test(visitor) ei_add_test(block) ei_add_test(corners) +ei_add_test(swap) +ei_add_test(resize) +ei_add_test(conservative_resize) ei_add_test(product_small) ei_add_test(product_large) ei_add_test(product_extra) @@ -162,6 +190,7 @@ ei_add_test(array_for_matrix) ei_add_test(array_replicate) ei_add_test(array_reverse) ei_add_test(ref) +ei_add_test(is_same_dense) ei_add_test(triangular) ei_add_test(selfadjoint) ei_add_test(product_selfadjoint) @@ -173,6 +202,7 @@ ei_add_test(product_trsolve) ei_add_test(product_mmtr) ei_add_test(product_notemporary) ei_add_test(stable_norm) +ei_add_test(permutationmatrices) ei_add_test(bandmatrix) ei_add_test(cholesky) ei_add_test(lu) @@ -192,56 +222,75 @@ ei_add_test(real_qz) ei_add_test(eigensolver_generalized_real) ei_add_test(jacobi) ei_add_test(jacobisvd) +ei_add_test(bdcsvd) +ei_add_test(householder) ei_add_test(geo_orthomethods) -ei_add_test(geo_homogeneous) ei_add_test(geo_quaternion) -ei_add_test(geo_transformations) ei_add_test(geo_eulerangles) -ei_add_test(geo_hyperplane) ei_add_test(geo_parametrizedline) ei_add_test(geo_alignedbox) +ei_add_test(geo_hyperplane) +ei_add_test(geo_transformations) +ei_add_test(geo_homogeneous) ei_add_test(stdvector) ei_add_test(stdvector_overload) ei_add_test(stdlist) ei_add_test(stdlist_overload) ei_add_test(stddeque) ei_add_test(stddeque_overload) -ei_add_test(resize) -ei_add_test(sparse_vector) ei_add_test(sparse_basic) +ei_add_test(sparse_block) +ei_add_test(sparse_vector) ei_add_test(sparse_product) +ei_add_test(sparse_ref) ei_add_test(sparse_solvers) -ei_add_test(umeyama) -ei_add_test(householder) -ei_add_test(swap) -ei_add_test(conservative_resize) -ei_add_test(permutationmatrices) ei_add_test(sparse_permutations) -ei_add_test(nullary) +ei_add_test(simplicial_cholesky) +ei_add_test(conjugate_gradient) +ei_add_test(incomplete_cholesky) +ei_add_test(bicgstab) +ei_add_test(lscg) +ei_add_test(sparselu) +ei_add_test(sparseqr) +ei_add_test(umeyama) ei_add_test(nesting_ops "${CMAKE_CXX_FLAGS_DEBUG}") ei_add_test(zerosized) ei_add_test(dontalign) -ei_add_test(sizeoverflow) +ei_add_test(evaluators) +if(NOT EIGEN_TEST_NO_EXCEPTIONS) + ei_add_test(sizeoverflow) +endif() ei_add_test(prec_inverse_4x4) ei_add_test(vectorwiseop) ei_add_test(special_numbers) ei_add_test(rvalue_types) +ei_add_test(dense_storage) +ei_add_test(ctorleak) ei_add_test(mpl2only) +ei_add_test(inplace_decomposition) +ei_add_test(half_float) +ei_add_test(array_of_string) -ei_add_test(simplicial_cholesky) -ei_add_test(conjugate_gradient) -ei_add_test(bicgstab) -ei_add_test(sparselu) -ei_add_test(sparseqr) +add_executable(bug1213 bug1213.cpp bug1213_main.cpp) + +check_cxx_compiler_flag("-ffast-math" COMPILER_SUPPORT_FASTMATH) +if(COMPILER_SUPPORT_FASTMATH) + set(EIGEN_FASTMATH_FLAGS "-ffast-math") +else() + check_cxx_compiler_flag("/fp:fast" COMPILER_SUPPORT_FPFAST) + if(COMPILER_SUPPORT_FPFAST) + set(EIGEN_FASTMATH_FLAGS "/fp:fast") + endif() +endif() + +ei_add_test(fastmath " ${EIGEN_FASTMATH_FLAGS} ") -# ei_add_test(denseLM) +# # ei_add_test(denseLM) if(QT4_FOUND) ei_add_test(qtvector "" "${QT_QTCORE_LIBRARY}") endif(QT4_FOUND) -ei_add_test(eigen2support) - if(UMFPACK_FOUND) ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}") endif() @@ -286,9 +335,54 @@ ei_add_property(EIGEN_TESTING_SUMMARY "Sparse lib flags: ${SPARSE_LIBS}\n") option(EIGEN_TEST_EIGEN2 "Run whole Eigen2 test suite against EIGEN2_SUPPORT" OFF) mark_as_advanced(EIGEN_TEST_EIGEN2) if(EIGEN_TEST_EIGEN2) - add_subdirectory(eigen2) + message(WARNING "The Eigen2 test suite has been removed") endif() +# boost MP unit test +find_package(Boost) +if(Boost_FOUND) + include_directories(${Boost_INCLUDE_DIRS}) + ei_add_test(boostmultiprec "" "${Boost_LIBRARIES}") + ei_add_property(EIGEN_TESTED_BACKENDS "Boost.Multiprecision, ") +else() + ei_add_property(EIGEN_MISSING_BACKENDS "Boost.Multiprecision, ") +endif() + + +# CUDA unit tests +option(EIGEN_TEST_CUDA "Enable CUDA support in unit tests" OFF) +option(EIGEN_TEST_CUDA_CLANG "Use clang instead of nvcc to compile the CUDA tests" OFF) + +if(EIGEN_TEST_CUDA_CLANG AND NOT CMAKE_CXX_COMPILER MATCHES "clang") + message(WARNING "EIGEN_TEST_CUDA_CLANG is set, but CMAKE_CXX_COMPILER does not appear to be clang.") +endif() + +if(EIGEN_TEST_CUDA) + +find_package(CUDA 5.0) +if(CUDA_FOUND) + + set(CUDA_PROPAGATE_HOST_FLAGS OFF) + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) + endif() + if(EIGEN_TEST_CUDA_CLANG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_30") + endif() + cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR}) + set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") + + ei_add_test(cuda_basic) + + unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) + +endif(CUDA_FOUND) + +endif(EIGEN_TEST_CUDA) + + +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests) +add_test(NAME failtests WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/failtests COMMAND ${CMAKE_COMMAND} ${Eigen_SOURCE_DIR} -G "${CMAKE_GENERATOR}" -DEIGEN_FAILTEST=ON) option(EIGEN_TEST_BUILD_DOCUMENTATION "Test building the doxygen documentation" OFF) IF(EIGEN_TEST_BUILD_DOCUMENTATION) diff --git a/external/eigen3/test/adjoint.cpp b/external/eigen3/test/adjoint.cpp index ea36f78..bdea51c 100644 --- a/external/eigen3/test/adjoint.cpp +++ b/external/eigen3/test/adjoint.cpp @@ -42,6 +42,17 @@ template<> struct adjoint_specific { VERIFY_IS_APPROX(v1, v1.norm() * v3); VERIFY_IS_APPROX(v3, v1.normalized()); VERIFY_IS_APPROX(v3.norm(), RealScalar(1)); + + // check null inputs + VERIFY_IS_APPROX((v1*0).normalized(), (v1*0)); +#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE) + RealScalar very_small = (std::numeric_limits::min)(); + VERIFY( (v1*very_small).norm() == 0 ); + VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small)); + v3 = v1*very_small; + v3.normalize(); + VERIFY_IS_APPROX(v3, (v1*very_small)); +#endif // check compatibility of dot and adjoint ref = NumTraits::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm())); @@ -64,6 +75,7 @@ template void adjoint(const MatrixType& m) typedef typename NumTraits::Real RealScalar; typedef Matrix VectorType; typedef Matrix SquareMatrixType; + const Index PacketSize = internal::packet_traits::size; Index rows = m.rows(); Index cols = m.cols(); @@ -108,6 +120,17 @@ template void adjoint(const MatrixType& m) VERIFY_IS_APPROX(m3,m1.transpose()); m3.transposeInPlace(); VERIFY_IS_APPROX(m3,m1); + + if(PacketSize(0,m3.rows()-PacketSize); + Index j = internal::random(0,m3.cols()-PacketSize); + m3.template block(i,j).transposeInPlace(); + VERIFY_IS_APPROX( (m3.template block(i,j)), (m1.template block(i,j).transpose()) ); + m3.template block(i,j).transposeInPlace(); + VERIFY_IS_APPROX(m3,m1); + } // check inplace adjoint m3 = m1; @@ -129,14 +152,24 @@ void test_adjoint() CALL_SUBTEST_1( adjoint(Matrix()) ); CALL_SUBTEST_2( adjoint(Matrix3d()) ); CALL_SUBTEST_3( adjoint(Matrix4f()) ); + CALL_SUBTEST_4( adjoint(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_5( adjoint(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_6( adjoint(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + + // Complement for 128 bits vectorization: + CALL_SUBTEST_8( adjoint(Matrix2d()) ); + CALL_SUBTEST_9( adjoint(Matrix()) ); + + // 256 bits vectorization: + CALL_SUBTEST_10( adjoint(Matrix()) ); + CALL_SUBTEST_11( adjoint(Matrix()) ); + CALL_SUBTEST_12( adjoint(Matrix()) ); } // test a large static matrix only once CALL_SUBTEST_7( adjoint(Matrix()) ); -#ifdef EIGEN_TEST_PART_4 +#ifdef EIGEN_TEST_PART_13 { MatrixXcf a(10,10), b(10,10); VERIFY_RAISES_ASSERT(a = a.transpose()); @@ -154,6 +187,13 @@ void test_adjoint() a.transpose() = a.adjoint(); a.transpose() += a.adjoint(); a.transpose() += a.adjoint() + b; + + // regression tests for check_for_aliasing + MatrixXd c(10,10); + c = 1.0 * MatrixXd::Ones(10,10) + c; + c = MatrixXd::Ones(10,10) * 1.0 + c; + c = c + MatrixXd::Ones(10,10) .cwiseProduct( MatrixXd::Zero(10,10) ); + c = MatrixXd::Ones(10,10) * MatrixXd::Zero(10,10); } #endif } diff --git a/external/eigen3/test/array.cpp b/external/eigen3/test/array.cpp index 68f6b3d..15c3266 100644 --- a/external/eigen3/test/array.cpp +++ b/external/eigen3/test/array.cpp @@ -13,6 +13,7 @@ template void array(const ArrayType& m) { typedef typename ArrayType::Index Index; typedef typename ArrayType::Scalar Scalar; + typedef typename ArrayType::RealScalar RealScalar; typedef Array ColVectorType; typedef Array RowVectorType; @@ -22,6 +23,8 @@ template void array(const ArrayType& m) ArrayType m1 = ArrayType::Random(rows, cols), m2 = ArrayType::Random(rows, cols), m3(rows, cols); + ArrayType m4 = m1; // copy constructor + VERIFY_IS_APPROX(m1, m4); ColVectorType cv1 = ColVectorType::Random(rows); RowVectorType rv1 = RowVectorType::Random(cols); @@ -70,7 +73,7 @@ template void array(const ArrayType& m) VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.rowwise().sum().sum() - m1.sum()), m1.abs().sum()); if (!internal::isMuchSmallerThan(abs(m1.sum() - (m1+m2).sum()), m1.abs().sum(), test_precision())) VERIFY_IS_NOT_APPROX(((m1+m2).rowwise().sum()).sum(), m1.sum()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; @@ -81,6 +84,47 @@ template void array(const ArrayType& m) VERIFY_IS_APPROX(m3.rowwise() += rv1, m1.rowwise() + rv1); m3 = m1; VERIFY_IS_APPROX(m3.rowwise() -= rv1, m1.rowwise() - rv1); + + // Conversion from scalar + VERIFY_IS_APPROX((m3 = s1), ArrayType::Constant(rows,cols,s1)); + VERIFY_IS_APPROX((m3 = 1), ArrayType::Constant(rows,cols,1)); + VERIFY_IS_APPROX((m3.topLeftCorner(rows,cols) = 1), ArrayType::Constant(rows,cols,1)); + typedef Array FixedArrayType; + FixedArrayType f1(s1); + VERIFY_IS_APPROX(f1, FixedArrayType::Constant(s1)); + FixedArrayType f2(numext::real(s1)); + VERIFY_IS_APPROX(f2, FixedArrayType::Constant(numext::real(s1))); + FixedArrayType f3((int)100*numext::real(s1)); + VERIFY_IS_APPROX(f3, FixedArrayType::Constant((int)100*numext::real(s1))); + f1.setRandom(); + FixedArrayType f4(f1.data()); + VERIFY_IS_APPROX(f4, f1); + + // pow + VERIFY_IS_APPROX(m1.pow(2), m1.square()); + VERIFY_IS_APPROX(pow(m1,2), m1.square()); + VERIFY_IS_APPROX(m1.pow(3), m1.cube()); + VERIFY_IS_APPROX(pow(m1,3), m1.cube()); + VERIFY_IS_APPROX((-m1).pow(3), -m1.cube()); + VERIFY_IS_APPROX(pow(2*m1,3), 8*m1.cube()); + ArrayType exponents = ArrayType::Constant(rows, cols, RealScalar(2)); + VERIFY_IS_APPROX(Eigen::pow(m1,exponents), m1.square()); + VERIFY_IS_APPROX(m1.pow(exponents), m1.square()); + VERIFY_IS_APPROX(Eigen::pow(2*m1,exponents), 4*m1.square()); + VERIFY_IS_APPROX((2*m1).pow(exponents), 4*m1.square()); + VERIFY_IS_APPROX(Eigen::pow(m1,2*exponents), m1.square().square()); + VERIFY_IS_APPROX(m1.pow(2*exponents), m1.square().square()); + VERIFY_IS_APPROX(Eigen::pow(m1(0,0), exponents), ArrayType::Constant(rows,cols,m1(0,0)*m1(0,0))); + + // Check possible conflicts with 1D ctor + typedef Array OneDArrayType; + OneDArrayType o1(rows); + VERIFY(o1.size()==rows); + OneDArrayType o4((int)rows); + VERIFY(o4.size()==rows); } template void comparisons(const ArrayType& m) @@ -97,8 +141,11 @@ template void comparisons(const ArrayType& m) c = internal::random(0, cols-1); ArrayType m1 = ArrayType::Random(rows, cols), - m2 = ArrayType::Random(rows, cols), - m3(rows, cols); + m2 = ArrayType::Random(rows, cols), + m3(rows, cols), + m4 = m1; + + m4 = (m4.abs()==Scalar(0)).select(1,m4); VERIFY(((m1 + Scalar(1)) > m1).all()); VERIFY(((m1 - Scalar(1)) < m1).all()); @@ -112,11 +159,17 @@ template void comparisons(const ArrayType& m) VERIFY(!(m1 > m2 && m1 < m2).any()); VERIFY((m1 <= m2 || m1 >= m2).all()); - // comparisons to scalar + // comparisons array to scalar VERIFY( (m1 != (m1(r,c)+1) ).any() ); - VERIFY( (m1 > (m1(r,c)-1) ).any() ); - VERIFY( (m1 < (m1(r,c)+1) ).any() ); - VERIFY( (m1 == m1(r,c) ).any() ); + VERIFY( (m1 > (m1(r,c)-1) ).any() ); + VERIFY( (m1 < (m1(r,c)+1) ).any() ); + VERIFY( (m1 == m1(r,c) ).any() ); + + // comparisons scalar to array + VERIFY( ( (m1(r,c)+1) != m1).any() ); + VERIFY( ( (m1(r,c)-1) < m1).any() ); + VERIFY( ( (m1(r,c)+1) > m1).any() ); + VERIFY( ( m1(r,c) == m1).any() ); // test Select VERIFY_IS_APPROX( (m1 void array_real(const ArrayType& m) ArrayType m1 = ArrayType::Random(rows, cols), m2 = ArrayType::Random(rows, cols), - m3(rows, cols); + m3(rows, cols), + m4 = m1; + + m4 = (m4.abs()==Scalar(0)).select(1,m4); Scalar s1 = internal::random(); - // these tests are mostly to check possible compilation issues. + // these tests are mostly to check possible compilation issues with free-functions. VERIFY_IS_APPROX(m1.sin(), sin(m1)); VERIFY_IS_APPROX(m1.cos(), cos(m1)); + VERIFY_IS_APPROX(m1.tan(), tan(m1)); VERIFY_IS_APPROX(m1.asin(), asin(m1)); VERIFY_IS_APPROX(m1.acos(), acos(m1)); - VERIFY_IS_APPROX(m1.tan(), tan(m1)); - + VERIFY_IS_APPROX(m1.atan(), atan(m1)); + VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); + VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); + VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); + + VERIFY_IS_APPROX(m1.arg(), arg(m1)); + VERIFY_IS_APPROX(m1.round(), round(m1)); + VERIFY_IS_APPROX(m1.floor(), floor(m1)); + VERIFY_IS_APPROX(m1.ceil(), ceil(m1)); + VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all()); + VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all()); + VERIFY((m1.isFinite() == (Eigen::isfinite)(m1)).all()); + VERIFY_IS_APPROX(m1.inverse(), inverse(m1)); + VERIFY_IS_APPROX(m1.abs(), abs(m1)); + VERIFY_IS_APPROX(m1.abs2(), abs2(m1)); + VERIFY_IS_APPROX(m1.square(), square(m1)); + VERIFY_IS_APPROX(m1.cube(), cube(m1)); VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval())); + VERIFY_IS_APPROX(m1.sign(), sign(m1)); + - VERIFY_IS_APPROX(m1.abs().sqrt(), sqrt(abs(m1))); - VERIFY_IS_APPROX(m1.abs(), sqrt(numext::abs2(m1))); + // avoid NaNs with abs() so verification doesn't fail + m3 = m1.abs(); + VERIFY_IS_APPROX(m3.sqrt(), sqrt(abs(m1))); + VERIFY_IS_APPROX(m3.rsqrt(), Scalar(1)/sqrt(abs(m1))); + VERIFY_IS_APPROX(rsqrt(m3), Scalar(1)/sqrt(abs(m1))); + VERIFY_IS_APPROX(m3.log(), log(m3)); + VERIFY_IS_APPROX(m3.log1p(), log1p(m3)); + VERIFY_IS_APPROX(m3.log10(), log10(m3)); + + + VERIFY((!(m1>m2) == (m1<=m2)).all()); + + VERIFY_IS_APPROX(sin(m1.asin()), m1); + VERIFY_IS_APPROX(cos(m1.acos()), m1); + VERIFY_IS_APPROX(tan(m1.atan()), m1); + VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); + VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); + VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); + VERIFY_IS_APPROX(arg(m1), ((m1<0).template cast())*std::acos(-1.0)); + VERIFY((round(m1) <= ceil(m1) && round(m1) >= floor(m1)).all()); + VERIFY((Eigen::isnan)((m1*0.0)/0.0).all()); + VERIFY((Eigen::isinf)(m4/0.0).all()); + VERIFY(((Eigen::isfinite)(m1) && (!(Eigen::isfinite)(m1*0.0/0.0)) && (!(Eigen::isfinite)(m4/0.0))).all()); + VERIFY_IS_APPROX(inverse(inverse(m1)),m1); + VERIFY((abs(m1) == m1 || abs(m1) == -m1).all()); + VERIFY_IS_APPROX(m3, sqrt(abs2(m1))); + VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() ); + VERIFY_IS_APPROX( m1*m1.sign(),m1.abs()); + VERIFY_IS_APPROX(m1.sign() * m1.abs(), m1); VERIFY_IS_APPROX(numext::abs2(numext::real(m1)) + numext::abs2(numext::imag(m1)), numext::abs2(m1)); VERIFY_IS_APPROX(numext::abs2(real(m1)) + numext::abs2(imag(m1)), numext::abs2(m1)); @@ -187,52 +288,138 @@ template void array_real(const ArrayType& m) // shift argument of logarithm so that it is not zero Scalar smallNumber = NumTraits::dummy_precision(); - VERIFY_IS_APPROX((m1.abs() + smallNumber).log() , log(abs(m1) + smallNumber)); + VERIFY_IS_APPROX((m3 + smallNumber).log() , log(abs(m1) + smallNumber)); + VERIFY_IS_APPROX((m3 + smallNumber + 1).log() , log1p(abs(m1) + smallNumber)); VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2)); VERIFY_IS_APPROX(m1.exp(), exp(m1)); VERIFY_IS_APPROX(m1.exp() / m2.exp(),(m1-m2).exp()); - VERIFY_IS_APPROX(m1.pow(2), m1.square()); - VERIFY_IS_APPROX(pow(m1,2), m1.square()); - - ArrayType exponents = ArrayType::Constant(rows, cols, RealScalar(2)); - VERIFY_IS_APPROX(Eigen::pow(m1,exponents), m1.square()); - - m3 = m1.abs(); VERIFY_IS_APPROX(m3.pow(RealScalar(0.5)), m3.sqrt()); VERIFY_IS_APPROX(pow(m3,RealScalar(0.5)), m3.sqrt()); + VERIFY_IS_APPROX(m3.pow(RealScalar(-0.5)), m3.rsqrt()); + VERIFY_IS_APPROX(pow(m3,RealScalar(-0.5)), m3.rsqrt()); + + VERIFY_IS_APPROX(log10(m3), log(m3)/log(10)); + // scalar by array division const RealScalar tiny = sqrt(std::numeric_limits::epsilon()); s1 += Scalar(tiny); m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse()); - + // check inplace transpose m3 = m1; m3.transposeInPlace(); - VERIFY_IS_APPROX(m3,m1.transpose()); + VERIFY_IS_APPROX(m3, m1.transpose()); m3.transposeInPlace(); - VERIFY_IS_APPROX(m3,m1); + VERIFY_IS_APPROX(m3, m1); } template void array_complex(const ArrayType& m) { typedef typename ArrayType::Index Index; + typedef typename ArrayType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; Index rows = m.rows(); Index cols = m.cols(); ArrayType m1 = ArrayType::Random(rows, cols), - m2(rows, cols); + m2(rows, cols), + m4 = m1; + + m4.real() = (m4.real().abs()==RealScalar(0)).select(RealScalar(1),m4.real()); + m4.imag() = (m4.imag().abs()==RealScalar(0)).select(RealScalar(1),m4.imag()); + + Array m3(rows, cols); for (Index i = 0; i < m.rows(); ++i) for (Index j = 0; j < m.cols(); ++j) m2(i,j) = sqrt(m1(i,j)); - VERIFY_IS_APPROX(m1.sqrt(), m2); - VERIFY_IS_APPROX(m1.sqrt(), Eigen::sqrt(m1)); + // these tests are mostly to check possible compilation issues with free-functions. + VERIFY_IS_APPROX(m1.sin(), sin(m1)); + VERIFY_IS_APPROX(m1.cos(), cos(m1)); + VERIFY_IS_APPROX(m1.tan(), tan(m1)); + VERIFY_IS_APPROX(m1.sinh(), sinh(m1)); + VERIFY_IS_APPROX(m1.cosh(), cosh(m1)); + VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); + VERIFY_IS_APPROX(m1.arg(), arg(m1)); + VERIFY((m1.isNaN() == (Eigen::isnan)(m1)).all()); + VERIFY((m1.isInf() == (Eigen::isinf)(m1)).all()); + VERIFY((m1.isFinite() == (Eigen::isfinite)(m1)).all()); + VERIFY_IS_APPROX(m1.inverse(), inverse(m1)); + VERIFY_IS_APPROX(m1.log(), log(m1)); + VERIFY_IS_APPROX(m1.log10(), log10(m1)); + VERIFY_IS_APPROX(m1.abs(), abs(m1)); + VERIFY_IS_APPROX(m1.abs2(), abs2(m1)); + VERIFY_IS_APPROX(m1.sqrt(), sqrt(m1)); + VERIFY_IS_APPROX(m1.square(), square(m1)); + VERIFY_IS_APPROX(m1.cube(), cube(m1)); + VERIFY_IS_APPROX(cos(m1+RealScalar(3)*m2), cos((m1+RealScalar(3)*m2).eval())); + VERIFY_IS_APPROX(m1.sign(), sign(m1)); + + + VERIFY_IS_APPROX(m1.exp() * m2.exp(), exp(m1+m2)); + VERIFY_IS_APPROX(m1.exp(), exp(m1)); + VERIFY_IS_APPROX(m1.exp() / m2.exp(),(m1-m2).exp()); + + VERIFY_IS_APPROX(sinh(m1), 0.5*(exp(m1)-exp(-m1))); + VERIFY_IS_APPROX(cosh(m1), 0.5*(exp(m1)+exp(-m1))); + VERIFY_IS_APPROX(tanh(m1), (0.5*(exp(m1)-exp(-m1)))/(0.5*(exp(m1)+exp(-m1)))); + + for (Index i = 0; i < m.rows(); ++i) + for (Index j = 0; j < m.cols(); ++j) + m3(i,j) = std::atan2(imag(m1(i,j)), real(m1(i,j))); + VERIFY_IS_APPROX(arg(m1), m3); + + std::complex zero(0.0,0.0); + VERIFY((Eigen::isnan)(m1*zero/zero).all()); +#if EIGEN_COMP_MSVC + // msvc complex division is not robust + VERIFY((Eigen::isinf)(m4/RealScalar(0)).all()); +#else +#if EIGEN_COMP_CLANG + // clang's complex division is notoriously broken too + if((numext::isinf)(m4(0,0)/RealScalar(0))) { +#endif + VERIFY((Eigen::isinf)(m4/zero).all()); +#if EIGEN_COMP_CLANG + } + else + { + VERIFY((Eigen::isinf)(m4.real()/zero.real()).all()); + } +#endif +#endif // MSVC + + VERIFY(((Eigen::isfinite)(m1) && (!(Eigen::isfinite)(m1*zero/zero)) && (!(Eigen::isfinite)(m1/zero))).all()); + + VERIFY_IS_APPROX(inverse(inverse(m1)),m1); + VERIFY_IS_APPROX(conj(m1.conjugate()), m1); + VERIFY_IS_APPROX(abs(m1), sqrt(square(real(m1))+square(imag(m1)))); + VERIFY_IS_APPROX(abs(m1), sqrt(abs2(m1))); + VERIFY_IS_APPROX(log10(m1), log(m1)/log(10)); + + VERIFY_IS_APPROX( m1.sign(), -(-m1).sign() ); + VERIFY_IS_APPROX( m1.sign() * m1.abs(), m1); + + // scalar by array division + Scalar s1 = internal::random(); + const RealScalar tiny = std::sqrt(std::numeric_limits::epsilon()); + s1 += Scalar(tiny); + m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); + VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse()); + + // check inplace transpose + m2 = m1; + m2.transposeInPlace(); + VERIFY_IS_APPROX(m2, m1.transpose()); + m2.transposeInPlace(); + VERIFY_IS_APPROX(m2, m1); + } template void min_max(const ArrayType& m) @@ -301,7 +488,7 @@ void test_array() VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, int >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, float >::value)); VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); - typedef CwiseUnaryOp, ArrayXd > Xpr; + typedef CwiseUnaryOp, ArrayXd > Xpr; VERIFY((internal::is_same< internal::global_math_functions_filtering_base::type, ArrayBase >::value)); diff --git a/external/eigen3/test/array_for_matrix.cpp b/external/eigen3/test/array_for_matrix.cpp index 9667e1f..b872139 100644 --- a/external/eigen3/test/array_for_matrix.cpp +++ b/external/eigen3/test/array_for_matrix.cpp @@ -45,7 +45,7 @@ template void array_for_matrix(const MatrixType& m) VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum().sum() - m1.sum(), m1.squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum() + m2.colwise().sum() - (m1+m2).colwise().sum(), (m1+m2).squaredNorm()); VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum() - m2.rowwise().sum() - (m1-m2).rowwise().sum(), (m1-m2).squaredNorm()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); + VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); // vector-wise ops m3 = m1; @@ -68,6 +68,16 @@ template void array_for_matrix(const MatrixType& m) const Scalar& ref_a2 = m.array().matrix().coeffRef(0,0); VERIFY(&ref_a1 == &ref_m1); VERIFY(&ref_a2 == &ref_m2); + + // Check write accessors: + m1.array().coeffRef(0,0) = 1; + VERIFY_IS_APPROX(m1(0,0),Scalar(1)); + m1.array()(0,0) = 2; + VERIFY_IS_APPROX(m1(0,0),Scalar(2)); + m1.array().matrix().coeffRef(0,0) = 3; + VERIFY_IS_APPROX(m1(0,0),Scalar(3)); + m1.array().matrix()(0,0) = 4; + VERIFY_IS_APPROX(m1(0,0),Scalar(4)); } template void comparisons(const MatrixType& m) @@ -124,6 +134,12 @@ template void comparisons(const MatrixType& m) // count VERIFY(((m1.array().abs()+1)>RealScalar(0.1)).count() == rows*cols); + // and/or + VERIFY( ((m1.array()RealScalar(0)).matrix()).count() == 0); + VERIFY( ((m1.array()=RealScalar(0)).matrix()).count() == rows*cols); + RealScalar a = m1.cwiseAbs().mean(); + VERIFY( ((m1.array()<-a).matrix() || (m1.array()>a).matrix()).count() == (m1.cwiseAbs().array()>a).count()); + typedef Matrix VectorOfIndices; // TODO allows colwise/rowwise for array @@ -134,9 +150,21 @@ template void comparisons(const MatrixType& m) template void lpNorm(const VectorType& v) { using std::sqrt; + typedef typename VectorType::RealScalar RealScalar; VectorType u = VectorType::Random(v.size()); - VERIFY_IS_APPROX(u.template lpNorm(), u.cwiseAbs().maxCoeff()); + if(v.size()==0) + { + VERIFY_IS_APPROX(u.template lpNorm(), RealScalar(0)); + VERIFY_IS_APPROX(u.template lpNorm<1>(), RealScalar(0)); + VERIFY_IS_APPROX(u.template lpNorm<2>(), RealScalar(0)); + VERIFY_IS_APPROX(u.template lpNorm<5>(), RealScalar(0)); + } + else + { + VERIFY_IS_APPROX(u.template lpNorm(), u.cwiseAbs().maxCoeff()); + } + VERIFY_IS_APPROX(u.template lpNorm<1>(), u.cwiseAbs().sum()); VERIFY_IS_APPROX(u.template lpNorm<2>(), sqrt(u.array().abs().square().sum())); VERIFY_IS_APPROX(numext::pow(u.template lpNorm<5>(), typename VectorType::RealScalar(5)), u.array().abs().pow(5).sum()); @@ -207,12 +235,31 @@ template void resize(const MatrixTraits& t) VERIFY(a1.size()==cols); } +template void regression_bug_654() { ArrayXf a = RowVectorXf(3); VectorXf v = Array(3); } +// Check propagation of LvalueBit through Array/Matrix-Wrapper +template +void regrrssion_bug_1410() +{ + const Matrix4i M; + const Array4i A; + ArrayWrapper MA = M.array(); + MA.row(0); + MatrixWrapper AM = A.matrix(); + AM.row(0); + + VERIFY((internal::traits >::Flags&LvalueBit)==0); + VERIFY((internal::traits >::Flags&LvalueBit)==0); + + VERIFY((internal::traits >::Flags&LvalueBit)==LvalueBit); + VERIFY((internal::traits >::Flags&LvalueBit)==LvalueBit); +} + void test_array_for_matrix() { for(int i = 0; i < g_repeat; i++) { @@ -245,10 +292,13 @@ void test_array_for_matrix() CALL_SUBTEST_5( lpNorm(VectorXf(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_4( lpNorm(VectorXcf(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } + CALL_SUBTEST_5( lpNorm(VectorXf(0)) ); + CALL_SUBTEST_4( lpNorm(VectorXcf(0)) ); for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_4( resize(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_5( resize(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_6( resize(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } - CALL_SUBTEST_6( regression_bug_654() ); + CALL_SUBTEST_6( regression_bug_654<0>() ); + CALL_SUBTEST_6( regrrssion_bug_1410<0>() ); } diff --git a/external/eigen3/test/array_of_string.cpp b/external/eigen3/test/array_of_string.cpp new file mode 100644 index 0000000..e23b7c5 --- /dev/null +++ b/external/eigen3/test/array_of_string.cpp @@ -0,0 +1,32 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +void test_array_of_string() +{ + typedef Array ArrayXs; + ArrayXs a1(3), a2(3), a3(3), a3ref(3); + a1 << "one", "two", "three"; + a2 << "1", "2", "3"; + a3ref << "one (1)", "two (2)", "three (3)"; + std::stringstream s1; + s1 << a1; + VERIFY_IS_EQUAL(s1.str(), std::string(" one two three")); + a3 = a1 + std::string(" (") + a2 + std::string(")"); + VERIFY((a3==a3ref).all()); + + a3 = a1; + a3 += std::string(" (") + a2 + std::string(")"); + VERIFY((a3==a3ref).all()); + + a1.swap(a3); + VERIFY((a1==a3ref).all()); + VERIFY((a3!=a3ref).all()); +} diff --git a/external/eigen3/test/array_replicate.cpp b/external/eigen3/test/array_replicate.cpp index f412d1a..779c8fc 100644 --- a/external/eigen3/test/array_replicate.cpp +++ b/external/eigen3/test/array_replicate.cpp @@ -44,6 +44,19 @@ template void replicate(const MatrixType& m) x2 << m2, m2, m2, m2, m2, m2; VERIFY_IS_APPROX(x2, (m2.template replicate<2,3>())); + + x2.resize(rows,3*cols); + x2 << m2, m2, m2; + VERIFY_IS_APPROX(x2, (m2.template replicate<1,3>())); + + vx1.resize(3*rows,cols); + vx1 << m2, m2, m2; + VERIFY_IS_APPROX(vx1+vx1, vx1+(m2.template replicate<3,1>())); + + vx1=m2+(m2.colwise().replicate(1)); + + if(m2.cols()==1) + VERIFY_IS_APPROX(m2.coeff(0), (m2.template replicate<3,1>().coeff(m2.rows()))); x2.resize(rows,f1); for (int j=0; j void reverse(const MatrixType& m) // this test relies a lot on Random.h, and there's not much more that we can do // to test it, hence I consider that we will have tested Random.h - MatrixType m1 = MatrixType::Random(rows, cols); + MatrixType m1 = MatrixType::Random(rows, cols), m2; VectorType v1 = VectorType::Random(rows); MatrixType m1_r = m1.reverse(); @@ -96,14 +96,32 @@ template void reverse(const MatrixType& m) m1.reverse()(r, c) = x; VERIFY_IS_APPROX(x, m1(rows - 1 - r, cols - 1 - c)); + + m2 = m1; + m2.reverseInPlace(); + VERIFY_IS_APPROX(m2,m1.reverse().eval()); + + m2 = m1; + m2.col(0).reverseInPlace(); + VERIFY_IS_APPROX(m2.col(0),m1.col(0).reverse().eval()); + + m2 = m1; + m2.row(0).reverseInPlace(); + VERIFY_IS_APPROX(m2.row(0),m1.row(0).reverse().eval()); + + m2 = m1; + m2.rowwise().reverseInPlace(); + VERIFY_IS_APPROX(m2,m1.rowwise().reverse().eval()); + + m2 = m1; + m2.colwise().reverseInPlace(); + VERIFY_IS_APPROX(m2,m1.colwise().reverse().eval()); - /* m1.colwise().reverse()(r, c) = x; VERIFY_IS_APPROX(x, m1(rows - 1 - r, c)); m1.rowwise().reverse()(r, c) = x; VERIFY_IS_APPROX(x, m1(r, cols - 1 - c)); - */ } void test_array_reverse() @@ -113,11 +131,11 @@ void test_array_reverse() CALL_SUBTEST_2( reverse(Matrix2f()) ); CALL_SUBTEST_3( reverse(Matrix4f()) ); CALL_SUBTEST_4( reverse(Matrix4d()) ); - CALL_SUBTEST_5( reverse(MatrixXcf(3, 3)) ); - CALL_SUBTEST_6( reverse(MatrixXi(6, 3)) ); - CALL_SUBTEST_7( reverse(MatrixXcd(20, 20)) ); + CALL_SUBTEST_5( reverse(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( reverse(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( reverse(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_8( reverse(Matrix()) ); - CALL_SUBTEST_9( reverse(Matrix(6,3)) ); + CALL_SUBTEST_9( reverse(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } #ifdef EIGEN_TEST_PART_3 Vector4f x; x << 1, 2, 3, 4; diff --git a/external/eigen3/test/bandmatrix.cpp b/external/eigen3/test/bandmatrix.cpp index 5e4e8e0..f8c38f7 100644 --- a/external/eigen3/test/bandmatrix.cpp +++ b/external/eigen3/test/bandmatrix.cpp @@ -11,7 +11,6 @@ template void bandmatrix(const MatrixType& _m) { - typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; typedef Matrix DenseMatrixType; @@ -62,8 +61,6 @@ using Eigen::internal::BandMatrix; void test_bandmatrix() { - typedef BandMatrix::Index Index; - for(int i = 0; i < 10*g_repeat ; i++) { Index rows = internal::random(1,10); Index cols = internal::random(1,10); diff --git a/external/eigen3/test/basicstuff.cpp b/external/eigen3/test/basicstuff.cpp index 8c0621e..99d91f9 100644 --- a/external/eigen3/test/basicstuff.cpp +++ b/external/eigen3/test/basicstuff.cpp @@ -126,6 +126,20 @@ template void basicStuff(const MatrixType& m) for(typename MatrixType::Index i=0;i(0,10)>5; + m3 = b ? m1 : m2; + if(b) VERIFY_IS_APPROX(m3,m1); + else VERIFY_IS_APPROX(m3,m2); + m3 = b ? -m1 : m2; + if(b) VERIFY_IS_APPROX(m3,-m1); + else VERIFY_IS_APPROX(m3,m2); + m3 = b ? m1 : -m2; + if(b) VERIFY_IS_APPROX(m3,m1); + else VERIFY_IS_APPROX(m3,-m2); + } } template void basicStuffComplex(const MatrixType& m) @@ -180,15 +194,64 @@ void casting() template void fixedSizeMatrixConstruction() { - const Scalar raw[3] = {1,2,3}; - Matrix m(raw); - Array a(raw); - VERIFY(m(0) == 1); - VERIFY(m(1) == 2); - VERIFY(m(2) == 3); - VERIFY(a(0) == 1); - VERIFY(a(1) == 2); - VERIFY(a(2) == 3); + Scalar raw[4]; + for(int k=0; k<4; ++k) + raw[k] = internal::random(); + + { + Matrix m(raw); + Array a(raw); + for(int k=0; k<4; ++k) VERIFY(m(k) == raw[k]); + for(int k=0; k<4; ++k) VERIFY(a(k) == raw[k]); + VERIFY_IS_EQUAL(m,(Matrix(raw[0],raw[1],raw[2],raw[3]))); + VERIFY((a==(Array(raw[0],raw[1],raw[2],raw[3]))).all()); + } + { + Matrix m(raw); + Array a(raw); + for(int k=0; k<3; ++k) VERIFY(m(k) == raw[k]); + for(int k=0; k<3; ++k) VERIFY(a(k) == raw[k]); + VERIFY_IS_EQUAL(m,(Matrix(raw[0],raw[1],raw[2]))); + VERIFY((a==Array(raw[0],raw[1],raw[2])).all()); + } + { + Matrix m(raw), m2( (DenseIndex(raw[0])), (DenseIndex(raw[1])) ); + Array a(raw), a2( (DenseIndex(raw[0])), (DenseIndex(raw[1])) ); + for(int k=0; k<2; ++k) VERIFY(m(k) == raw[k]); + for(int k=0; k<2; ++k) VERIFY(a(k) == raw[k]); + VERIFY_IS_EQUAL(m,(Matrix(raw[0],raw[1]))); + VERIFY((a==Array(raw[0],raw[1])).all()); + for(int k=0; k<2; ++k) VERIFY(m2(k) == DenseIndex(raw[k])); + for(int k=0; k<2; ++k) VERIFY(a2(k) == DenseIndex(raw[k])); + } + { + Matrix m(raw), + m2( (DenseIndex(raw[0])), (DenseIndex(raw[1])) ), + m3( (int(raw[0])), (int(raw[1])) ), + m4( (float(raw[0])), (float(raw[1])) ); + Array a(raw), a2( (DenseIndex(raw[0])), (DenseIndex(raw[1])) ); + for(int k=0; k<2; ++k) VERIFY(m(k) == raw[k]); + for(int k=0; k<2; ++k) VERIFY(a(k) == raw[k]); + VERIFY_IS_EQUAL(m,(Matrix(raw[0],raw[1]))); + VERIFY((a==Array(raw[0],raw[1])).all()); + for(int k=0; k<2; ++k) VERIFY(m2(k) == DenseIndex(raw[k])); + for(int k=0; k<2; ++k) VERIFY(a2(k) == DenseIndex(raw[k])); + for(int k=0; k<2; ++k) VERIFY(m3(k) == int(raw[k])); + for(int k=0; k<2; ++k) VERIFY((m4(k)) == Scalar(float(raw[k]))); + } + { + Matrix m(raw), m1(raw[0]), m2( (DenseIndex(raw[0])) ), m3( (int(raw[0])) ); + Array a(raw), a1(raw[0]), a2( (DenseIndex(raw[0])) ); + VERIFY(m(0) == raw[0]); + VERIFY(a(0) == raw[0]); + VERIFY(m1(0) == raw[0]); + VERIFY(a1(0) == raw[0]); + VERIFY(m2(0) == DenseIndex(raw[0])); + VERIFY(a2(0) == DenseIndex(raw[0])); + VERIFY(m3(0) == int(raw[0])); + VERIFY_IS_EQUAL(m,(Matrix(raw[0]))); + VERIFY((a==Array(raw[0])).all()); + } } void test_basicstuff() @@ -207,8 +270,11 @@ void test_basicstuff() } CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); CALL_SUBTEST_1(fixedSizeMatrixConstruction()); - CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); + CALL_SUBTEST_1(fixedSizeMatrixConstruction()); CALL_SUBTEST_2(casting()); } diff --git a/external/eigen3/test/bdcsvd.cpp b/external/eigen3/test/bdcsvd.cpp new file mode 100644 index 0000000..f9f687a --- /dev/null +++ b/external/eigen3/test/bdcsvd.cpp @@ -0,0 +1,111 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Gauthier Brun +// Copyright (C) 2013 Nicolas Carre +// Copyright (C) 2013 Jean Ceccato +// Copyright (C) 2013 Pierre Zoppitelli +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/ + +// discard stack allocation as that too bypasses malloc +#define EIGEN_STACK_ALLOCATION_LIMIT 0 +#define EIGEN_RUNTIME_NO_MALLOC + +#include "main.h" +#include +#include +#include + + +#define SVD_DEFAULT(M) BDCSVD +#define SVD_FOR_MIN_NORM(M) BDCSVD +#include "svd_common.h" + +// Check all variants of JacobiSVD +template +void bdcsvd(const MatrixType& a = MatrixType(), bool pickrandom = true) +{ + MatrixType m = a; + if(pickrandom) + svd_fill_random(m); + + CALL_SUBTEST(( svd_test_all_computation_options >(m, false) )); +} + +template +void bdcsvd_method() +{ + enum { Size = MatrixType::RowsAtCompileTime }; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix RealVecType; + MatrixType m = MatrixType::Identity(); + VERIFY_IS_APPROX(m.bdcSvd().singularValues(), RealVecType::Ones()); + VERIFY_RAISES_ASSERT(m.bdcSvd().matrixU()); + VERIFY_RAISES_ASSERT(m.bdcSvd().matrixV()); + VERIFY_IS_APPROX(m.bdcSvd(ComputeFullU|ComputeFullV).solve(m), m); +} + +// compare the Singular values returned with Jacobi and Bdc +template +void compare_bdc_jacobi(const MatrixType& a = MatrixType(), unsigned int computationOptions = 0) +{ + MatrixType m = MatrixType::Random(a.rows(), a.cols()); + BDCSVD bdc_svd(m); + JacobiSVD jacobi_svd(m); + VERIFY_IS_APPROX(bdc_svd.singularValues(), jacobi_svd.singularValues()); + if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU()); + if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU()); + if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV()); + if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV()); +} + +void test_bdcsvd() +{ + CALL_SUBTEST_3(( svd_verify_assert >(Matrix3f()) )); + CALL_SUBTEST_4(( svd_verify_assert >(Matrix4d()) )); + CALL_SUBTEST_7(( svd_verify_assert >(MatrixXf(10,12)) )); + CALL_SUBTEST_8(( svd_verify_assert >(MatrixXcd(7,5)) )); + + CALL_SUBTEST_101(( svd_all_trivial_2x2(bdcsvd) )); + CALL_SUBTEST_102(( svd_all_trivial_2x2(bdcsvd) )); + + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_3(( bdcsvd() )); + CALL_SUBTEST_4(( bdcsvd() )); + CALL_SUBTEST_5(( bdcsvd >() )); + + int r = internal::random(1, EIGEN_TEST_MAX_SIZE/2), + c = internal::random(1, EIGEN_TEST_MAX_SIZE/2); + + TEST_SET_BUT_UNUSED_VARIABLE(r) + TEST_SET_BUT_UNUSED_VARIABLE(c) + + CALL_SUBTEST_6(( bdcsvd(Matrix(r,2)) )); + CALL_SUBTEST_7(( bdcsvd(MatrixXf(r,c)) )); + CALL_SUBTEST_7(( compare_bdc_jacobi(MatrixXf(r,c)) )); + CALL_SUBTEST_10(( bdcsvd(MatrixXd(r,c)) )); + CALL_SUBTEST_10(( compare_bdc_jacobi(MatrixXd(r,c)) )); + CALL_SUBTEST_8(( bdcsvd(MatrixXcd(r,c)) )); + CALL_SUBTEST_8(( compare_bdc_jacobi(MatrixXcd(r,c)) )); + + // Test on inf/nan matrix + CALL_SUBTEST_7( (svd_inf_nan, MatrixXf>()) ); + CALL_SUBTEST_10( (svd_inf_nan, MatrixXd>()) ); + } + + // test matrixbase method + CALL_SUBTEST_1(( bdcsvd_method() )); + CALL_SUBTEST_3(( bdcsvd_method() )); + + // Test problem size constructors + CALL_SUBTEST_7( BDCSVD(10,10) ); + + // Check that preallocation avoids subsequent mallocs + CALL_SUBTEST_9( svd_preallocate() ); + + CALL_SUBTEST_2( svd_underoverflow() ); +} + diff --git a/external/eigen3/test/bicgstab.cpp b/external/eigen3/test/bicgstab.cpp index f327e2f..4cc0dd3 100644 --- a/external/eigen3/test/bicgstab.cpp +++ b/external/eigen3/test/bicgstab.cpp @@ -10,13 +10,16 @@ #include "sparse_solver.h" #include -template void test_bicgstab_T() +template void test_bicgstab_T() { - BiCGSTAB, DiagonalPreconditioner > bicgstab_colmajor_diag; - BiCGSTAB, IdentityPreconditioner > bicgstab_colmajor_I; - BiCGSTAB, IncompleteLUT > bicgstab_colmajor_ilut; + BiCGSTAB, DiagonalPreconditioner > bicgstab_colmajor_diag; + BiCGSTAB, IdentityPreconditioner > bicgstab_colmajor_I; + BiCGSTAB, IncompleteLUT > bicgstab_colmajor_ilut; //BiCGSTAB, SSORPreconditioner > bicgstab_colmajor_ssor; + bicgstab_colmajor_diag.setTolerance(NumTraits::epsilon()*4); + bicgstab_colmajor_ilut.setTolerance(NumTraits::epsilon()*4); + CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_diag) ); // CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_I) ); CALL_SUBTEST( check_sparse_square_solving(bicgstab_colmajor_ilut) ); @@ -25,6 +28,7 @@ template void test_bicgstab_T() void test_bicgstab() { - CALL_SUBTEST_1(test_bicgstab_T()); - CALL_SUBTEST_2(test_bicgstab_T >()); + CALL_SUBTEST_1((test_bicgstab_T()) ); + CALL_SUBTEST_2((test_bicgstab_T, int>())); + CALL_SUBTEST_3((test_bicgstab_T())); } diff --git a/external/eigen3/test/block.cpp b/external/eigen3/test/block.cpp index 9ed5d7b..39565af 100644 --- a/external/eigen3/test/block.cpp +++ b/external/eigen3/test/block.cpp @@ -130,6 +130,14 @@ template void block(const MatrixType& m) VERIFY(numext::real(ones.col(c1).dot(ones.col(c2))) == RealScalar(rows)); VERIFY(numext::real(ones.row(r1).dot(ones.row(r2))) == RealScalar(cols)); + + // chekc that linear acccessors works on blocks + m1 = m1_copy; + if((MatrixType::Flags&RowMajorBit)==0) + VERIFY_IS_EQUAL(m1.leftCols(c1).coeff(r1+c1*rows), m1(r1,c1)); + else + VERIFY_IS_EQUAL(m1.topRows(r1).coeff(c1+r1*cols), m1(r1,c1)); + // now test some block-inside-of-block. @@ -141,11 +149,11 @@ template void block(const MatrixType& m) VERIFY_IS_EQUAL( (m1.transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , (m1.row(r1).segment(c1,c2-c1+1)).transpose() ); // expressions without direct access - VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , ((m1+m2).block(r2,c2,rows-r2,cols-c2)) ); - VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) ); - VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , ((m1+m2).col(c1).segment(r1,r2-r1+1)) ); - VERIFY_IS_EQUAL( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); - VERIFY_IS_EQUAL( ((m1+m2).transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,rows-r1,cols-c1).block(r2-r1,c2-c1,rows-r2,cols-c2)) , ((m1+m2).block(r2,c2,rows-r2,cols-c2)) ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).row(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)) ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).col(0)) , ((m1+m2).col(c1).segment(r1,r2-r1+1)) ); + VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); + VERIFY_IS_APPROX( ((m1+m2).transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); // evaluation into plain matrices from expressions with direct access (stress MapBase) DynamicMatrixType dm; @@ -173,6 +181,19 @@ template void block(const MatrixType& m) dm = m1.row(r1).segment(c1,c2-c1+1).transpose(); dv = m1.transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0); VERIFY_IS_EQUAL(dv, dm); + + VERIFY_IS_EQUAL( (m1.template block(1,0,0,1)), m1.block(1,0,0,1)); + VERIFY_IS_EQUAL( (m1.template block<1,Dynamic>(0,1,1,0)), m1.block(0,1,1,0)); + VERIFY_IS_EQUAL( ((m1*1).template block(1,0,0,1)), m1.block(1,0,0,1)); + VERIFY_IS_EQUAL( ((m1*1).template block<1,Dynamic>(0,1,1,0)), m1.block(0,1,1,0)); + + if (rows>=2 && cols>=2) + { + VERIFY_RAISES_ASSERT( m1 += m1.col(0) ); + VERIFY_RAISES_ASSERT( m1 -= m1.col(0) ); + VERIFY_RAISES_ASSERT( m1.array() *= m1.col(0).array() ); + VERIFY_RAISES_ASSERT( m1.array() /= m1.col(0).array() ); + } } diff --git a/external/eigen3/test/boostmultiprec.cpp b/external/eigen3/test/boostmultiprec.cpp new file mode 100644 index 0000000..e06e9bd --- /dev/null +++ b/external/eigen3/test/boostmultiprec.cpp @@ -0,0 +1,201 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#ifdef EIGEN_TEST_MAX_SIZE +#undef EIGEN_TEST_MAX_SIZE +#endif + +#define EIGEN_TEST_MAX_SIZE 50 + +#ifdef EIGEN_TEST_PART_1 +#include "cholesky.cpp" +#endif + +#ifdef EIGEN_TEST_PART_2 +#include "lu.cpp" +#endif + +#ifdef EIGEN_TEST_PART_3 +#include "qr.cpp" +#endif + +#ifdef EIGEN_TEST_PART_4 +#include "qr_colpivoting.cpp" +#endif + +#ifdef EIGEN_TEST_PART_5 +#include "qr_fullpivoting.cpp" +#endif + +#ifdef EIGEN_TEST_PART_6 +#include "eigensolver_selfadjoint.cpp" +#endif + +#ifdef EIGEN_TEST_PART_7 +#include "eigensolver_generic.cpp" +#endif + +#ifdef EIGEN_TEST_PART_8 +#include "eigensolver_generalized_real.cpp" +#endif + +#ifdef EIGEN_TEST_PART_9 +#include "jacobisvd.cpp" +#endif + +#ifdef EIGEN_TEST_PART_10 +#include "bdcsvd.cpp" +#endif + +#include + +#undef min +#undef max +#undef isnan +#undef isinf +#undef isfinite + +#include +#include +#include +#include + +namespace mp = boost::multiprecision; +typedef mp::number, mp::et_on> Real; + +namespace Eigen { + template<> struct NumTraits : GenericNumTraits { + static inline Real dummy_precision() { return 1e-50; } + }; + + template + struct NumTraits > : NumTraits {}; + + template<> + Real test_precision() { return 1e-50; } + + // needed in C++93 mode where number does not support explicit cast. + namespace internal { + template + struct cast_impl { + static inline NewType run(const Real& x) { + return x.template convert_to(); + } + }; + + template<> + struct cast_impl > { + static inline std::complex run(const Real& x) { + return std::complex(x); + } + }; + } +} + +namespace boost { +namespace multiprecision { + // to make ADL works as expected: + using boost::math::isfinite; + using boost::math::isnan; + using boost::math::isinf; + using boost::math::copysign; + using boost::math::hypot; + + // The following is needed for std::complex: + Real fabs(const Real& a) { return abs EIGEN_NOT_A_MACRO (a); } + Real fmax(const Real& a, const Real& b) { using std::max; return max(a,b); } + + // some specialization for the unit tests: + inline bool test_isMuchSmallerThan(const Real& a, const Real& b) { + return internal::isMuchSmallerThan(a, b, test_precision()); + } + + inline bool test_isApprox(const Real& a, const Real& b) { + return internal::isApprox(a, b, test_precision()); + } + + inline bool test_isApproxOrLessThan(const Real& a, const Real& b) { + return internal::isApproxOrLessThan(a, b, test_precision()); + } + + Real get_test_precision(const Real&) { + return test_precision(); + } + + Real test_relative_error(const Real &a, const Real &b) { + using Eigen::numext::abs2; + return sqrt(abs2(a-b)/Eigen::numext::mini(abs2(a),abs2(b))); + } +} +} + +namespace Eigen { + +} + +void test_boostmultiprec() +{ + typedef Matrix Mat; + typedef Matrix,Dynamic,Dynamic> MatC; + + std::cout << "NumTraits::epsilon() = " << NumTraits::epsilon() << std::endl; + std::cout << "NumTraits::dummy_precision() = " << NumTraits::dummy_precision() << std::endl; + std::cout << "NumTraits::lowest() = " << NumTraits::lowest() << std::endl; + std::cout << "NumTraits::highest() = " << NumTraits::highest() << std::endl; + std::cout << "NumTraits::digits10() = " << NumTraits::digits10() << std::endl; + + // chekc stream output + { + Mat A(10,10); + A.setRandom(); + std::stringstream ss; + ss << A; + } + { + MatC A(10,10); + A.setRandom(); + std::stringstream ss; + ss << A; + } + + for(int i = 0; i < g_repeat; i++) { + int s = internal::random(1,EIGEN_TEST_MAX_SIZE); + + CALL_SUBTEST_1( cholesky(Mat(s,s)) ); + + CALL_SUBTEST_2( lu_non_invertible() ); + CALL_SUBTEST_2( lu_invertible() ); + CALL_SUBTEST_2( lu_non_invertible() ); + CALL_SUBTEST_2( lu_invertible() ); + + CALL_SUBTEST_3( qr(Mat(internal::random(1,EIGEN_TEST_MAX_SIZE),internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_3( qr_invertible() ); + + CALL_SUBTEST_4( qr() ); + CALL_SUBTEST_4( cod() ); + CALL_SUBTEST_4( qr_invertible() ); + + CALL_SUBTEST_5( qr() ); + CALL_SUBTEST_5( qr_invertible() ); + + CALL_SUBTEST_6( selfadjointeigensolver(Mat(s,s)) ); + + CALL_SUBTEST_7( eigensolver(Mat(s,s)) ); + + CALL_SUBTEST_8( generalized_eigensolver_real(Mat(s,s)) ); + + TEST_SET_BUT_UNUSED_VARIABLE(s) + } + + CALL_SUBTEST_9(( jacobisvd(Mat(internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); + CALL_SUBTEST_10(( bdcsvd(Mat(internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE), internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); +} + diff --git a/external/eigen3/test/bug1213.cpp b/external/eigen3/test/bug1213.cpp new file mode 100644 index 0000000..581760c --- /dev/null +++ b/external/eigen3/test/bug1213.cpp @@ -0,0 +1,13 @@ + +// This anonymous enum is essential to trigger the linking issue +enum { + Foo +}; + +#include "bug1213.h" + +bool bug1213_1(const Eigen::Vector3f& x) +{ + return bug1213_2(x); +} + diff --git a/external/eigen3/test/bug1213.h b/external/eigen3/test/bug1213.h new file mode 100644 index 0000000..040e5a4 --- /dev/null +++ b/external/eigen3/test/bug1213.h @@ -0,0 +1,8 @@ + +#include + +template +bool bug1213_2(const Eigen::Matrix& x); + +bool bug1213_1(const Eigen::Vector3f& x); + diff --git a/external/eigen3/test/bug1213_main.cpp b/external/eigen3/test/bug1213_main.cpp new file mode 100644 index 0000000..4802c00 --- /dev/null +++ b/external/eigen3/test/bug1213_main.cpp @@ -0,0 +1,18 @@ + +// This is a regression unit regarding a weird linking issue with gcc. + +#include "bug1213.h" + +int main() +{ + return 0; +} + + +template +bool bug1213_2(const Eigen::Matrix& ) +{ + return true; +} + +template bool bug1213_2(const Eigen::Vector3f&); diff --git a/external/eigen3/test/cholesky.cpp b/external/eigen3/test/cholesky.cpp index 56885de..8ad5ac6 100644 --- a/external/eigen3/test/cholesky.cpp +++ b/external/eigen3/test/cholesky.cpp @@ -11,20 +11,17 @@ #define EIGEN_NO_ASSERTION_CHECKING #endif -static int nb_temporaries; - -#define EIGEN_DENSE_STORAGE_CTOR_PLUGIN { if(size!=0) nb_temporaries++; } +#define TEST_ENABLE_TEMPORARY_TRACKING #include "main.h" #include #include -#define VERIFY_EVALUATION_COUNT(XPR,N) {\ - nb_temporaries = 0; \ - XPR; \ - if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ - VERIFY( (#XPR) && nb_temporaries==N ); \ - } +template +typename MatrixType::RealScalar matrix_l1_norm(const MatrixType& m) { + MatrixType symm = m.template selfadjointView(); + return symm.cwiseAbs().colwise().sum().maxCoeff(); +} template class CholType> void test_chol_update(const MatrixType& symm) { @@ -83,14 +80,10 @@ template void cholesky(const MatrixType& m) symm += a1 * a1.adjoint(); } - // to test if really Cholesky only uses the upper triangular part, uncomment the following - // FIXME: currently that fails !! - //symm.template part().setZero(); - { SquareMatrixType symmUp = symm.template triangularView(); SquareMatrixType symmLo = symm.template triangularView(); - + LLT chollo(symmLo); VERIFY_IS_APPROX(symm, chollo.reconstructedMatrix()); vecX = chollo.solve(vecB); @@ -98,6 +91,14 @@ template void cholesky(const MatrixType& m) matX = chollo.solve(matB); VERIFY_IS_APPROX(symm * matX, matB); + const MatrixType symmLo_inverse = chollo.solve(MatrixType::Identity(rows,cols)); + RealScalar rcond = (RealScalar(1) / matrix_l1_norm(symmLo)) / + matrix_l1_norm(symmLo_inverse); + RealScalar rcond_est = chollo.rcond(); + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + // test the upper mode LLT cholup(symmUp); VERIFY_IS_APPROX(symm, cholup.reconstructedMatrix()); @@ -106,6 +107,15 @@ template void cholesky(const MatrixType& m) matX = cholup.solve(matB); VERIFY_IS_APPROX(symm * matX, matB); + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + const MatrixType symmUp_inverse = cholup.solve(MatrixType::Identity(rows,cols)); + rcond = (RealScalar(1) / matrix_l1_norm(symmUp)) / + matrix_l1_norm(symmUp_inverse); + rcond_est = cholup.rcond(); + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + MatrixType neg = -symmLo; chollo.compute(neg); VERIFY(chollo.info()==NumericalIssue); @@ -114,7 +124,7 @@ template void cholesky(const MatrixType& m) VERIFY_IS_APPROX(MatrixType(chollo.matrixU().transpose().conjugate()), MatrixType(chollo.matrixL())); VERIFY_IS_APPROX(MatrixType(cholup.matrixL().transpose().conjugate()), MatrixType(cholup.matrixU())); VERIFY_IS_APPROX(MatrixType(cholup.matrixU().transpose().conjugate()), MatrixType(cholup.matrixL())); - + // test some special use cases of SelfCwiseBinaryOp: MatrixType m1 = MatrixType::Random(rows,cols), m2(rows,cols); m2 = m1; @@ -144,19 +154,38 @@ template void cholesky(const MatrixType& m) SquareMatrixType symmLo = symm.template triangularView(); LDLT ldltlo(symmLo); + VERIFY(ldltlo.info()==Success); VERIFY_IS_APPROX(symm, ldltlo.reconstructedMatrix()); vecX = ldltlo.solve(vecB); VERIFY_IS_APPROX(symm * vecX, vecB); matX = ldltlo.solve(matB); VERIFY_IS_APPROX(symm * matX, matB); + const MatrixType symmLo_inverse = ldltlo.solve(MatrixType::Identity(rows,cols)); + RealScalar rcond = (RealScalar(1) / matrix_l1_norm(symmLo)) / + matrix_l1_norm(symmLo_inverse); + RealScalar rcond_est = ldltlo.rcond(); + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + LDLT ldltup(symmUp); + VERIFY(ldltup.info()==Success); VERIFY_IS_APPROX(symm, ldltup.reconstructedMatrix()); vecX = ldltup.solve(vecB); VERIFY_IS_APPROX(symm * vecX, vecB); matX = ldltup.solve(matB); VERIFY_IS_APPROX(symm * matX, matB); + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + const MatrixType symmUp_inverse = ldltup.solve(MatrixType::Identity(rows,cols)); + rcond = (RealScalar(1) / matrix_l1_norm(symmUp)) / + matrix_l1_norm(symmUp_inverse); + rcond_est = ldltup.rcond(); + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + VERIFY_IS_APPROX(MatrixType(ldltlo.matrixL().transpose().conjugate()), MatrixType(ldltlo.matrixU())); VERIFY_IS_APPROX(MatrixType(ldltlo.matrixU().transpose().conjugate()), MatrixType(ldltlo.matrixL())); VERIFY_IS_APPROX(MatrixType(ldltup.matrixL().transpose().conjugate()), MatrixType(ldltup.matrixU())); @@ -185,7 +214,7 @@ template void cholesky(const MatrixType& m) if(rows>=3) { SquareMatrixType A = symm; - int c = internal::random(0,rows-2); + Index c = internal::random(0,rows-2); A.bottomRightCorner(c,c).setZero(); // Make sure a solution exists: vecX.setRandom(); @@ -196,11 +225,11 @@ template void cholesky(const MatrixType& m) vecX = ldltlo.solve(vecB); VERIFY_IS_APPROX(A * vecX, vecB); } - + // check non-full rank matrices if(rows>=3) { - int r = internal::random(1,rows-1); + Index r = internal::random(1,rows-1); Matrix a = Matrix::Random(rows,r); SquareMatrixType A = a * a.adjoint(); // Make sure a solution exists: @@ -212,15 +241,17 @@ template void cholesky(const MatrixType& m) vecX = ldltlo.solve(vecB); VERIFY_IS_APPROX(A * vecX, vecB); } - + // check matrices with a wide spectrum if(rows>=3) { + using std::pow; + using std::sqrt; RealScalar s = (std::min)(16,std::numeric_limits::max_exponent10/8); Matrix a = Matrix::Random(rows,rows); Matrix d = Matrix::Random(rows); - for(int k=0; k(-s,s)); + for(Index k=0; k(-s,s)); SquareMatrixType A = a * d.asDiagonal() * a.adjoint(); // Make sure a solution exists: vecX.setRandom(); @@ -229,7 +260,20 @@ template void cholesky(const MatrixType& m) ldltlo.compute(A); VERIFY_IS_APPROX(A, ldltlo.reconstructedMatrix()); vecX = ldltlo.solve(vecB); - VERIFY_IS_APPROX(A * vecX, vecB); + + if(ldltlo.vectorD().real().cwiseAbs().minCoeff()>RealScalar(0)) + { + VERIFY_IS_APPROX(A * vecX,vecB); + } + else + { + RealScalar large_tol = sqrt(test_precision()); + VERIFY((A * vecX).isApprox(vecB, large_tol)); + + ++g_test_level; + VERIFY_IS_APPROX(A * vecX,vecB); + --g_test_level; + } } } @@ -289,6 +333,7 @@ template void cholesky_cplx(const MatrixType& m) RealMatrixType symmLo = symm.template triangularView(); LDLT ldltlo(symmLo); + VERIFY(ldltlo.info()==Success); VERIFY_IS_APPROX(symm, ldltlo.reconstructedMatrix()); vecX = ldltlo.solve(vecB); VERIFY_IS_APPROX(symm * vecX, vecB); @@ -314,46 +359,101 @@ template void cholesky_bug241(const MatrixType& m) } // LDLT is not guaranteed to work for indefinite matrices, but happens to work fine if matrix is diagonal. -// This test checks that LDLT reports correctly that matrix is indefinite. +// This test checks that LDLT reports correctly that matrix is indefinite. // See http://forum.kde.org/viewtopic.php?f=74&t=106942 and bug 736 template void cholesky_definiteness(const MatrixType& m) { eigen_assert(m.rows() == 2 && m.cols() == 2); MatrixType mat; LDLT ldlt(2); - + { mat << 1, 0, 0, -1; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(!ldlt.isNegative()); VERIFY(!ldlt.isPositive()); } { mat << 1, 2, 2, 1; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(!ldlt.isNegative()); VERIFY(!ldlt.isPositive()); } { mat << 0, 0, 0, 0; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(ldlt.isNegative()); VERIFY(ldlt.isPositive()); } { mat << 0, 0, 0, 1; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(!ldlt.isNegative()); VERIFY(ldlt.isPositive()); } { mat << -1, 0, 0, 0; ldlt.compute(mat); + VERIFY(ldlt.info()==Success); VERIFY(ldlt.isNegative()); VERIFY(!ldlt.isPositive()); } } +template +void cholesky_faillure_cases() +{ + MatrixXd mat; + LDLT ldlt; + + { + mat.resize(2,2); + mat << 0, 1, 1, 0; + ldlt.compute(mat); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + VERIFY(ldlt.info()==NumericalIssue); + } +#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE_SSE2) + { + mat.resize(3,3); + mat << -1, -3, 3, + -3, -8.9999999999999999999, 1, + 3, 1, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } +#endif + { + mat.resize(3,3); + mat << 1, 2, 3, + 2, 4, 1, + 3, 1, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } + + { + mat.resize(8,8); + mat << 0.1, 0, -0.1, 0, 0, 0, 1, 0, + 0, 4.24667, 0, 2.00333, 0, 0, 0, 0, + -0.1, 0, 0.2, 0, -0.1, 0, 0, 0, + 0, 2.00333, 0, 8.49333, 0, 2.00333, 0, 0, + 0, 0, -0.1, 0, 0.1, 0, 0, 1, + 0, 0, 0, 2.00333, 0, 4.24667, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } +} + template void cholesky_verify_assert() { MatrixType tmp; @@ -384,10 +484,14 @@ void test_cholesky() CALL_SUBTEST_3( cholesky_definiteness(Matrix2d()) ); CALL_SUBTEST_4( cholesky(Matrix3f()) ); CALL_SUBTEST_5( cholesky(Matrix4d()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_2( cholesky(MatrixXd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_6( cholesky_cplx(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } CALL_SUBTEST_4( cholesky_verify_assert() ); @@ -398,7 +502,8 @@ void test_cholesky() // Test problem size constructors CALL_SUBTEST_9( LLT(10) ); CALL_SUBTEST_9( LDLT(10) ); - - TEST_SET_BUT_UNUSED_VARIABLE(s) + + CALL_SUBTEST_2( cholesky_faillure_cases() ); + TEST_SET_BUT_UNUSED_VARIABLE(nb_temporaries) } diff --git a/external/eigen3/test/cholmod_support.cpp b/external/eigen3/test/cholmod_support.cpp index 8f8be3c..a7eda28 100644 --- a/external/eigen3/test/cholmod_support.cpp +++ b/external/eigen3/test/cholmod_support.cpp @@ -7,6 +7,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS #include "sparse_solver.h" #include @@ -40,13 +41,13 @@ template void test_cholmod_T() check_sparse_spd_solving(llt_colmajor_upper); check_sparse_spd_solving(ldlt_colmajor_lower); check_sparse_spd_solving(ldlt_colmajor_upper); - -// check_sparse_spd_determinant(chol_colmajor_lower); -// check_sparse_spd_determinant(chol_colmajor_upper); -// check_sparse_spd_determinant(llt_colmajor_lower); -// check_sparse_spd_determinant(llt_colmajor_upper); -// check_sparse_spd_determinant(ldlt_colmajor_lower); -// check_sparse_spd_determinant(ldlt_colmajor_upper); + + check_sparse_spd_determinant(chol_colmajor_lower); + check_sparse_spd_determinant(chol_colmajor_upper); + check_sparse_spd_determinant(llt_colmajor_lower); + check_sparse_spd_determinant(llt_colmajor_upper); + check_sparse_spd_determinant(ldlt_colmajor_lower); + check_sparse_spd_determinant(ldlt_colmajor_upper); } void test_cholmod_support() diff --git a/external/eigen3/test/commainitializer.cpp b/external/eigen3/test/commainitializer.cpp index 2965923..9844adb 100644 --- a/external/eigen3/test/commainitializer.cpp +++ b/external/eigen3/test/commainitializer.cpp @@ -70,8 +70,9 @@ void test_commainitializer() Matrix3d m3; Matrix4d m4; - #ifndef _MSC_VER VERIFY_RAISES_ASSERT( (m3 << 1, 2, 3, 4, 5, 6, 7, 8) ); + + #ifndef _MSC_VER VERIFY_RAISES_ASSERT( (m3 << 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) ); #endif @@ -99,6 +100,7 @@ void test_commainitializer() vec[2].transpose(); VERIFY_IS_APPROX(m3, ref); + // recursively test all block-sizes from 0 to 3: test_block_recursion<(1<<8) - 1>(); } diff --git a/external/eigen3/test/conjugate_gradient.cpp b/external/eigen3/test/conjugate_gradient.cpp index 019cc4d..9622fd8 100644 --- a/external/eigen3/test/conjugate_gradient.cpp +++ b/external/eigen3/test/conjugate_gradient.cpp @@ -10,13 +10,14 @@ #include "sparse_solver.h" #include -template void test_conjugate_gradient_T() +template void test_conjugate_gradient_T() { - ConjugateGradient, Lower > cg_colmajor_lower_diag; - ConjugateGradient, Upper > cg_colmajor_upper_diag; - ConjugateGradient, Lower|Upper> cg_colmajor_loup_diag; - ConjugateGradient, Lower, IdentityPreconditioner> cg_colmajor_lower_I; - ConjugateGradient, Upper, IdentityPreconditioner> cg_colmajor_upper_I; + typedef SparseMatrix SparseMatrixType; + ConjugateGradient cg_colmajor_lower_diag; + ConjugateGradient cg_colmajor_upper_diag; + ConjugateGradient cg_colmajor_loup_diag; + ConjugateGradient cg_colmajor_lower_I; + ConjugateGradient cg_colmajor_upper_I; CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_lower_diag) ); CALL_SUBTEST( check_sparse_spd_solving(cg_colmajor_upper_diag) ); @@ -27,6 +28,7 @@ template void test_conjugate_gradient_T() void test_conjugate_gradient() { - CALL_SUBTEST_1(test_conjugate_gradient_T()); - CALL_SUBTEST_2(test_conjugate_gradient_T >()); + CALL_SUBTEST_1(( test_conjugate_gradient_T() )); + CALL_SUBTEST_2(( test_conjugate_gradient_T, int>() )); + CALL_SUBTEST_3(( test_conjugate_gradient_T() )); } diff --git a/external/eigen3/test/constructor.cpp b/external/eigen3/test/constructor.cpp new file mode 100644 index 0000000..eec9e21 --- /dev/null +++ b/external/eigen3/test/constructor.cpp @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define TEST_ENABLE_TEMPORARY_TRACKING + +#include "main.h" + +template struct Wrapper +{ + MatrixType m_mat; + inline Wrapper(const MatrixType &x) : m_mat(x) {} + inline operator const MatrixType& () const { return m_mat; } + inline operator MatrixType& () { return m_mat; } +}; + +template void ctor_init1(const MatrixType& m) +{ + // Check logic in PlainObjectBase::_init1 + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m0 = MatrixType::Random(rows,cols); + + VERIFY_EVALUATION_COUNT( MatrixType m1(m0), 1); + VERIFY_EVALUATION_COUNT( MatrixType m2(m0+m0), 1); + VERIFY_EVALUATION_COUNT( MatrixType m2(m0.block(0,0,rows,cols)) , 1); + + Wrapper wrapper(m0); + VERIFY_EVALUATION_COUNT( MatrixType m3(wrapper) , 1); +} + + +void test_constructor() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( ctor_init1(Matrix()) ); + CALL_SUBTEST_1( ctor_init1(Matrix4d()) ); + CALL_SUBTEST_1( ctor_init1(MatrixXcf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_1( ctor_init1(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } + { + Matrix a(123); + VERIFY_IS_EQUAL(a[0], 123); + } + { + Matrix a(123.0); + VERIFY_IS_EQUAL(a[0], 123); + } + { + Matrix a(123); + VERIFY_IS_EQUAL(a[0], 123.f); + } + { + Array a(123); + VERIFY_IS_EQUAL(a[0], 123); + } + { + Array a(123.0); + VERIFY_IS_EQUAL(a[0], 123); + } + { + Array a(123); + VERIFY_IS_EQUAL(a[0], 123.f); + } + { + Array a(123); + VERIFY_IS_EQUAL(a(4), 123); + } + { + Array a(123.0); + VERIFY_IS_EQUAL(a(4), 123); + } + { + Array a(123); + VERIFY_IS_EQUAL(a(4), 123.f); + } +} diff --git a/external/eigen3/test/ctorleak.cpp b/external/eigen3/test/ctorleak.cpp new file mode 100644 index 0000000..c158f5e --- /dev/null +++ b/external/eigen3/test/ctorleak.cpp @@ -0,0 +1,69 @@ +#include "main.h" + +#include // std::exception + +struct Foo +{ + static Index object_count; + static Index object_limit; + int dummy; + + Foo() + { +#ifdef EIGEN_EXCEPTIONS + // TODO: Is this the correct way to handle this? + if (Foo::object_count > Foo::object_limit) { std::cout << "\nThrow!\n"; throw Foo::Fail(); } +#endif + std::cout << '+'; + ++Foo::object_count; + } + + ~Foo() + { + std::cout << '-'; + --Foo::object_count; + } + + class Fail : public std::exception {}; +}; + +Index Foo::object_count = 0; +Index Foo::object_limit = 0; + +#undef EIGEN_TEST_MAX_SIZE +#define EIGEN_TEST_MAX_SIZE 3 + +void test_ctorleak() +{ + typedef Matrix MatrixX; + typedef Matrix VectorX; + Foo::object_count = 0; + for(int i = 0; i < g_repeat; i++) { + Index rows = internal::random(2,EIGEN_TEST_MAX_SIZE), cols = internal::random(2,EIGEN_TEST_MAX_SIZE); + Foo::object_limit = internal::random(0, rows*cols - 2); + std::cout << "object_limit =" << Foo::object_limit << std::endl; +#ifdef EIGEN_EXCEPTIONS + try + { +#endif + std::cout << "\nMatrixX m(" << rows << ", " << cols << ");\n"; + MatrixX m(rows, cols); +#ifdef EIGEN_EXCEPTIONS + VERIFY(false); // not reached if exceptions are enabled + } + catch (const Foo::Fail&) { /* ignore */ } +#endif + VERIFY_IS_EQUAL(Index(0), Foo::object_count); + + { + Foo::object_limit = (rows+1)*(cols+1); + MatrixX A(rows, cols); + VERIFY_IS_EQUAL(Foo::object_count, rows*cols); + VectorX v=A.row(0); + VERIFY_IS_EQUAL(Foo::object_count, (rows+1)*cols); + v = A.col(0); + VERIFY_IS_EQUAL(Foo::object_count, rows*(cols+1)); + } + VERIFY_IS_EQUAL(Index(0), Foo::object_count); + } +} diff --git a/external/eigen3/test/cuda_basic.cu b/external/eigen3/test/cuda_basic.cu new file mode 100644 index 0000000..cb2e416 --- /dev/null +++ b/external/eigen3/test/cuda_basic.cu @@ -0,0 +1,173 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// workaround issue between gcc >= 4.7 and cuda 5.5 +#if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7) + #undef _GLIBCXX_ATOMIC_BUILTINS + #undef _GLIBCXX_USE_INT128 +#endif + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cuda_basic +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int + +#include +#include +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include +#endif +#include "main.h" +#include "cuda_common.h" + +// Check that dense modules can be properly parsed by nvcc +#include + +// struct Foo{ +// EIGEN_DEVICE_FUNC +// void operator()(int i, const float* mats, float* vecs) const { +// using namespace Eigen; +// // Matrix3f M(data); +// // Vector3f x(data+9); +// // Map(data+9) = M.inverse() * x; +// Matrix3f M(mats+i/16); +// Vector3f x(vecs+i*3); +// // using std::min; +// // using std::sqrt; +// Map(vecs+i*3) << x.minCoeff(), 1, 2;// / x.dot(x);//(M.inverse() * x) / x.x(); +// //x = x*2 + x.y() * x + x * x.maxCoeff() - x / x.sum(); +// } +// }; + +template +struct coeff_wise { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + T x1(in+i); + T x2(in+i+1); + T x3(in+i+2); + Map res(out+i*T::MaxSizeAtCompileTime); + + res.array() += (in[0] * x1 + x2).array() * x3.array(); + } +}; + +template +struct replicate { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + T x1(in+i); + int step = x1.size() * 4; + int stride = 3 * step; + + typedef Map > MapType; + MapType(out+i*stride+0*step, x1.rows()*2, x1.cols()*2) = x1.replicate(2,2); + MapType(out+i*stride+1*step, x1.rows()*3, x1.cols()) = in[i] * x1.colwise().replicate(3); + MapType(out+i*stride+2*step, x1.rows(), x1.cols()*3) = in[i] * x1.rowwise().replicate(3); + } +}; + +template +struct redux { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + int N = 10; + T x1(in+i); + out[i*N+0] = x1.minCoeff(); + out[i*N+1] = x1.maxCoeff(); + out[i*N+2] = x1.sum(); + out[i*N+3] = x1.prod(); + out[i*N+4] = x1.matrix().squaredNorm(); + out[i*N+5] = x1.matrix().norm(); + out[i*N+6] = x1.colwise().sum().maxCoeff(); + out[i*N+7] = x1.rowwise().maxCoeff().sum(); + out[i*N+8] = x1.matrix().colwise().squaredNorm().sum(); + } +}; + +template +struct prod_test { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const + { + using namespace Eigen; + typedef Matrix T3; + T1 x1(in+i); + T2 x2(in+i+1); + Map res(out+i*T3::MaxSizeAtCompileTime); + res += in[i] * x1 * x2; + } +}; + +template +struct diagonal { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const + { + using namespace Eigen; + T1 x1(in+i); + Map res(out+i*T2::MaxSizeAtCompileTime); + res += x1.diagonal(); + } +}; + +template +struct eigenvalues { + EIGEN_DEVICE_FUNC + void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const + { + using namespace Eigen; + typedef Matrix Vec; + T M(in+i); + Map res(out+i*Vec::MaxSizeAtCompileTime); + T A = M*M.adjoint(); + SelfAdjointEigenSolver eig; + eig.computeDirect(M); + res = eig.eigenvalues(); + } +}; + +void test_cuda_basic() +{ + ei_test_init_cuda(); + + int nthreads = 100; + Eigen::VectorXf in, out; + + #ifndef __CUDA_ARCH__ + int data_size = nthreads * 512; + in.setRandom(data_size); + out.setRandom(data_size); + #endif + + CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(replicate(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(replicate(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(redux(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(redux(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(prod_test(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(prod_test(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(diagonal(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(diagonal(), nthreads, in, out) ); + + CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues(), nthreads, in, out) ); + CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues(), nthreads, in, out) ); + +} diff --git a/external/eigen3/test/cuda_common.h b/external/eigen3/test/cuda_common.h new file mode 100644 index 0000000..9737693 --- /dev/null +++ b/external/eigen3/test/cuda_common.h @@ -0,0 +1,101 @@ + +#ifndef EIGEN_TEST_CUDA_COMMON_H +#define EIGEN_TEST_CUDA_COMMON_H + +#include +#include +#include +#include + +#ifndef __CUDACC__ +dim3 threadIdx, blockDim, blockIdx; +#endif + +template +void run_on_cpu(const Kernel& ker, int n, const Input& in, Output& out) +{ + for(int i=0; i +__global__ +void run_on_cuda_meta_kernel(const Kernel ker, int n, const Input* in, Output* out) +{ + int i = threadIdx.x + blockIdx.x*blockDim.x; + if(i +void run_on_cuda(const Kernel& ker, int n, const Input& in, Output& out) +{ + typename Input::Scalar* d_in; + typename Output::Scalar* d_out; + std::ptrdiff_t in_bytes = in.size() * sizeof(typename Input::Scalar); + std::ptrdiff_t out_bytes = out.size() * sizeof(typename Output::Scalar); + + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_out, out.data(), out_bytes, cudaMemcpyHostToDevice); + + // Simple and non-optimal 1D mapping assuming n is not too large + // That's only for unit testing! + dim3 Blocks(128); + dim3 Grids( (n+int(Blocks.x)-1)/int(Blocks.x) ); + + cudaThreadSynchronize(); + run_on_cuda_meta_kernel<<>>(ker, n, d_in, d_out); + cudaThreadSynchronize(); + + // check inputs have not been modified + cudaMemcpy(const_cast(in.data()), d_in, in_bytes, cudaMemcpyDeviceToHost); + cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost); + + cudaFree(d_in); + cudaFree(d_out); +} + + +template +void run_and_compare_to_cuda(const Kernel& ker, int n, const Input& in, Output& out) +{ + Input in_ref, in_cuda; + Output out_ref, out_cuda; + #ifndef __CUDA_ARCH__ + in_ref = in_cuda = in; + out_ref = out_cuda = out; + #endif + run_on_cpu (ker, n, in_ref, out_ref); + run_on_cuda(ker, n, in_cuda, out_cuda); + #ifndef __CUDA_ARCH__ + VERIFY_IS_APPROX(in_ref, in_cuda); + VERIFY_IS_APPROX(out_ref, out_cuda); + #endif +} + + +void ei_test_init_cuda() +{ + int device = 0; + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, device); + std::cout << "CUDA device info:\n"; + std::cout << " name: " << deviceProp.name << "\n"; + std::cout << " capability: " << deviceProp.major << "." << deviceProp.minor << "\n"; + std::cout << " multiProcessorCount: " << deviceProp.multiProcessorCount << "\n"; + std::cout << " maxThreadsPerMultiProcessor: " << deviceProp.maxThreadsPerMultiProcessor << "\n"; + std::cout << " warpSize: " << deviceProp.warpSize << "\n"; + std::cout << " regsPerBlock: " << deviceProp.regsPerBlock << "\n"; + std::cout << " concurrentKernels: " << deviceProp.concurrentKernels << "\n"; + std::cout << " clockRate: " << deviceProp.clockRate << "\n"; + std::cout << " canMapHostMemory: " << deviceProp.canMapHostMemory << "\n"; + std::cout << " computeMode: " << deviceProp.computeMode << "\n"; +} + +#endif // EIGEN_TEST_CUDA_COMMON_H diff --git a/external/eigen3/test/cwiseop.cpp b/external/eigen3/test/cwiseop.cpp deleted file mode 100644 index d13002c..0000000 --- a/external/eigen3/test/cwiseop.cpp +++ /dev/null @@ -1,187 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN2_SUPPORT -#define EIGEN_NO_EIGEN2_DEPRECATED_WARNING - -#define EIGEN_NO_STATIC_ASSERT -#include "main.h" -#include - -#ifdef min -#undef min -#endif - -#ifdef max -#undef max -#endif - -using namespace std; - -template struct AddIfNull { - const Scalar operator() (const Scalar a, const Scalar b) const {return a<=1e-3 ? b : a;} - enum { Cost = NumTraits::AddCost }; -}; - -template -typename Eigen::internal::enable_if::IsInteger,typename MatrixType::Scalar>::type -cwiseops_real_only(MatrixType& m1, MatrixType& m2, MatrixType& m3, MatrixType& mones) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - - VERIFY_IS_APPROX(m1.cwise() / m2, m1.cwise() * (m2.cwise().inverse())); - m3 = m1.cwise().abs().cwise().sqrt(); - VERIFY_IS_APPROX(m3.cwise().square(), m1.cwise().abs()); - VERIFY_IS_APPROX(m1.cwise().square().cwise().sqrt(), m1.cwise().abs()); - VERIFY_IS_APPROX(m1.cwise().abs().cwise().log().cwise().exp() , m1.cwise().abs()); - - VERIFY_IS_APPROX(m1.cwise().pow(2), m1.cwise().square()); - m3 = (m1.cwise().abs().cwise()<=RealScalar(0.01)).select(mones,m1); - VERIFY_IS_APPROX(m3.cwise().pow(-1), m3.cwise().inverse()); - m3 = m1.cwise().abs(); - VERIFY_IS_APPROX(m3.cwise().pow(RealScalar(0.5)), m3.cwise().sqrt()); - -// VERIFY_IS_APPROX(m1.cwise().tan(), m1.cwise().sin().cwise() / m1.cwise().cos()); - VERIFY_IS_APPROX(mones, m1.cwise().sin().cwise().square() + m1.cwise().cos().cwise().square()); - m3 = m1; - m3.cwise() /= m2; - VERIFY_IS_APPROX(m3, m1.cwise() / m2); - - return Scalar(0); -} - -template -typename Eigen::internal::enable_if::IsInteger,typename MatrixType::Scalar>::type -cwiseops_real_only(MatrixType& , MatrixType& , MatrixType& , MatrixType& ) -{ - return 0; -} - -template void cwiseops(const MatrixType& m) -{ - typedef typename MatrixType::Index Index; - typedef typename MatrixType::Scalar Scalar; - typedef Matrix VectorType; - - Index rows = m.rows(); - Index cols = m.cols(); - - MatrixType m1 = MatrixType::Random(rows, cols), - m1bis = m1, - m2 = MatrixType::Random(rows, cols), - m3(rows, cols), - m4(rows, cols), - mzero = MatrixType::Zero(rows, cols), - mones = MatrixType::Ones(rows, cols), - identity = Matrix - ::Identity(rows, rows); - VectorType vzero = VectorType::Zero(rows), - vones = VectorType::Ones(rows), - v3(rows); - - Index r = internal::random(0, rows-1), - c = internal::random(0, cols-1); - - Scalar s1 = internal::random(); - - // test Zero, Ones, Constant, and the set* variants - m3 = MatrixType::Constant(rows, cols, s1); - for (int j=0; j >(mones); - - VERIFY_IS_APPROX(m1.cwise().pow(2), m1.cwise().abs2()); - VERIFY_IS_APPROX(m1.cwise().pow(2), m1.cwise().square()); - VERIFY_IS_APPROX(m1.cwise().pow(3), m1.cwise().cube()); - - VERIFY_IS_APPROX(m1 + mones, m1.cwise()+Scalar(1)); - VERIFY_IS_APPROX(m1 - mones, m1.cwise()-Scalar(1)); - m3 = m1; m3.cwise() += 1; - VERIFY_IS_APPROX(m1 + mones, m3); - m3 = m1; m3.cwise() -= 1; - VERIFY_IS_APPROX(m1 - mones, m3); - - VERIFY_IS_APPROX(m2, m2.cwise() * mones); - VERIFY_IS_APPROX(m1.cwise() * m2, m2.cwise() * m1); - m3 = m1; - m3.cwise() *= m2; - VERIFY_IS_APPROX(m3, m1.cwise() * m2); - - VERIFY_IS_APPROX(mones, m2.cwise()/m2); - - // check min - VERIFY_IS_APPROX( m1.cwise().min(m2), m2.cwise().min(m1) ); - VERIFY_IS_APPROX( m1.cwise().min(m1+mones), m1 ); - VERIFY_IS_APPROX( m1.cwise().min(m1-mones), m1-mones ); - - // check max - VERIFY_IS_APPROX( m1.cwise().max(m2), m2.cwise().max(m1) ); - VERIFY_IS_APPROX( m1.cwise().max(m1-mones), m1 ); - VERIFY_IS_APPROX( m1.cwise().max(m1+mones), m1+mones ); - - VERIFY( (m1.cwise() == m1).all() ); - VERIFY( (m1.cwise() != m2).any() ); - VERIFY(!(m1.cwise() == (m1+mones)).any() ); - if (rows*cols>1) - { - m3 = m1; - m3(r,c) += 1; - VERIFY( (m1.cwise() == m3).any() ); - VERIFY( !(m1.cwise() == m3).all() ); - } - VERIFY( (m1.cwise().min(m2).cwise() <= m2).all() ); - VERIFY( (m1.cwise().max(m2).cwise() >= m2).all() ); - VERIFY( (m1.cwise().min(m2).cwise() < (m1+mones)).all() ); - VERIFY( (m1.cwise().max(m2).cwise() > (m1-mones)).all() ); - -#if(__cplusplus < 201103L) -// std::binder* are deprecated since c++11 and will be removed in c++17 - VERIFY( (m1.cwise()(), Scalar(1)))).all() ); - VERIFY( !(m1.cwise()(), Scalar(1)))).all() ); - VERIFY( !(m1.cwise()>m1bis.unaryExpr(bind2nd(plus(), Scalar(1)))).any() ); -#endif - - cwiseops_real_only(m1, m2, m3, mones); -} - -void test_cwiseop() -{ - for(int i = 0; i < g_repeat ; i++) { - CALL_SUBTEST_1( cwiseops(Matrix()) ); - CALL_SUBTEST_2( cwiseops(Matrix4d()) ); - CALL_SUBTEST_3( cwiseops(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_4( cwiseops(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_5( cwiseops(MatrixXi(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); - CALL_SUBTEST_6( cwiseops(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); - } -} diff --git a/external/eigen3/test/dense_storage.cpp b/external/eigen3/test/dense_storage.cpp new file mode 100644 index 0000000..e63712b --- /dev/null +++ b/external/eigen3/test/dense_storage.cpp @@ -0,0 +1,76 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include + +template +void dense_storage_copy() +{ + static const int Size = ((Rows==Dynamic || Cols==Dynamic) ? Dynamic : Rows*Cols); + typedef DenseStorage DenseStorageType; + + const int rows = (Rows==Dynamic) ? 4 : Rows; + const int cols = (Cols==Dynamic) ? 3 : Cols; + const int size = rows*cols; + DenseStorageType reference(size, rows, cols); + T* raw_reference = reference.data(); + for (int i=0; i(i); + + DenseStorageType copied_reference(reference); + const T* raw_copied_reference = copied_reference.data(); + for (int i=0; i +void dense_storage_assignment() +{ + static const int Size = ((Rows==Dynamic || Cols==Dynamic) ? Dynamic : Rows*Cols); + typedef DenseStorage DenseStorageType; + + const int rows = (Rows==Dynamic) ? 4 : Rows; + const int cols = (Cols==Dynamic) ? 3 : Cols; + const int size = rows*cols; + DenseStorageType reference(size, rows, cols); + T* raw_reference = reference.data(); + for (int i=0; i(i); + + DenseStorageType copied_reference; + copied_reference = reference; + const T* raw_copied_reference = copied_reference.data(); + for (int i=0; i(); + dense_storage_copy(); + dense_storage_copy(); + dense_storage_copy(); + + dense_storage_copy(); + dense_storage_copy(); + dense_storage_copy(); + dense_storage_copy(); + + dense_storage_assignment(); + dense_storage_assignment(); + dense_storage_assignment(); + dense_storage_assignment(); + + dense_storage_assignment(); + dense_storage_assignment(); + dense_storage_assignment(); + dense_storage_assignment(); +} diff --git a/external/eigen3/test/diagonal.cpp b/external/eigen3/test/diagonal.cpp index 53814a5..c1546e9 100644 --- a/external/eigen3/test/diagonal.cpp +++ b/external/eigen3/test/diagonal.cpp @@ -20,6 +20,8 @@ template void diagonal(const MatrixType& m) MatrixType m1 = MatrixType::Random(rows, cols), m2 = MatrixType::Random(rows, cols); + Scalar s1 = internal::random(); + //check diagonal() VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal()); m2.diagonal() = 2 * m1.diagonal(); @@ -58,6 +60,26 @@ template void diagonal(const MatrixType& m) VERIFY_IS_APPROX(m2.template diagonal(), static_cast(2) * m1.diagonal(N2)); m2.diagonal(N2)[0] *= 3; VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast(6) * m1.diagonal(N2)[0]); + + m2.diagonal(N2).x() = s1; + VERIFY_IS_APPROX(m2.diagonal(N2).x(), s1); + m2.diagonal(N2).coeffRef(0) = Scalar(2)*s1; + VERIFY_IS_APPROX(m2.diagonal(N2).coeff(0), Scalar(2)*s1); + } +} + +template void diagonal_assert(const MatrixType& m) { + Index rows = m.rows(); + Index cols = m.cols(); + + MatrixType m1 = MatrixType::Random(rows, cols); + + if (rows>=2 && cols>=2) + { + VERIFY_RAISES_ASSERT( m1 += m1.diagonal() ); + VERIFY_RAISES_ASSERT( m1 -= m1.diagonal() ); + VERIFY_RAISES_ASSERT( m1.array() *= m1.diagonal().array() ); + VERIFY_RAISES_ASSERT( m1.array() /= m1.diagonal().array() ); } } @@ -74,4 +96,6 @@ void test_diagonal() CALL_SUBTEST_1( diagonal(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_1( diagonal(Matrix(3, 4)) ); } + + CALL_SUBTEST_1( diagonal_assert(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } diff --git a/external/eigen3/test/diagonalmatrices.cpp b/external/eigen3/test/diagonalmatrices.cpp index 149f1db..cd6dc8c 100644 --- a/external/eigen3/test/diagonalmatrices.cpp +++ b/external/eigen3/test/diagonalmatrices.cpp @@ -17,6 +17,7 @@ template void diagonalmatrices(const MatrixType& m) typedef Matrix VectorType; typedef Matrix RowVectorType; typedef Matrix SquareMatrixType; + typedef Matrix DynMatrixType; typedef DiagonalMatrix LeftDiagonalMatrix; typedef DiagonalMatrix RightDiagonalMatrix; typedef Matrix BigMatrix; @@ -64,6 +65,13 @@ template void diagonalmatrices(const MatrixType& m) VERIFY_IS_APPROX( (((v1+v2).asDiagonal() * (m1+m2))(i,j)) , (v1+v2)(i) * (m1+m2)(i,j) ); VERIFY_IS_APPROX( ((m1 * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * m1(i,j) ); VERIFY_IS_APPROX( (((m1+m2) * (rv1+rv2).asDiagonal())(i,j)) , (rv1+rv2)(j) * (m1+m2)(i,j) ); + + if(rows>1) + { + DynMatrixType tmp = m1.topRows(rows/2), res; + VERIFY_IS_APPROX( (res = m1.topRows(rows/2) * rv1.asDiagonal()), tmp * rv1.asDiagonal() ); + VERIFY_IS_APPROX( (res = v1.head(rows/2).asDiagonal()*m1.topRows(rows/2)), v1.head(rows/2).asDiagonal()*tmp ); + } BigMatrix big; big.setZero(2*rows, 2*cols); @@ -84,6 +92,24 @@ template void diagonalmatrices(const MatrixType& m) VERIFY_IS_APPROX(m1 * (rdm1 * s1), (m1 * rdm1) * s1); VERIFY_IS_APPROX(m1 * (s1 * rdm1), (m1 * rdm1) * s1); + + // Diagonal to dense + sq_m1.setRandom(); + sq_m2 = sq_m1; + VERIFY_IS_APPROX( (sq_m1 += (s1*v1).asDiagonal()), sq_m2 += (s1*v1).asDiagonal().toDenseMatrix() ); + VERIFY_IS_APPROX( (sq_m1 -= (s1*v1).asDiagonal()), sq_m2 -= (s1*v1).asDiagonal().toDenseMatrix() ); + VERIFY_IS_APPROX( (sq_m1 = (s1*v1).asDiagonal()), (s1*v1).asDiagonal().toDenseMatrix() ); +} + +template +void bug987() +{ + Matrix3Xd points = Matrix3Xd::Random(3, 3); + Vector2d diag = Vector2d::Random(); + Matrix2Xd tmp1 = points.topRows<2>(), res1, res2; + VERIFY_IS_APPROX( res1 = diag.asDiagonal() * points.topRows<2>(), res2 = diag.asDiagonal() * tmp1 ); + Matrix2d tmp2 = points.topLeftCorner<2,2>(); + VERIFY_IS_APPROX(( res1 = points.topLeftCorner<2,2>()*diag.asDiagonal()) , res2 = tmp2*diag.asDiagonal() ); } void test_diagonalmatrices() @@ -99,4 +125,5 @@ void test_diagonalmatrices() CALL_SUBTEST_8( diagonalmatrices(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_9( diagonalmatrices(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } + CALL_SUBTEST_10( bug987<0>() ); } diff --git a/external/eigen3/test/dynalloc.cpp b/external/eigen3/test/dynalloc.cpp index ef92c05..f1cc70b 100644 --- a/external/eigen3/test/dynalloc.cpp +++ b/external/eigen3/test/dynalloc.cpp @@ -9,18 +9,20 @@ #include "main.h" -#if EIGEN_ALIGN -#define ALIGNMENT 16 +#if EIGEN_MAX_ALIGN_BYTES>0 +#define ALIGNMENT EIGEN_MAX_ALIGN_BYTES #else #define ALIGNMENT 1 #endif +typedef Matrix Vector8f; + void check_handmade_aligned_malloc() { for(int i = 1; i < 1000; i++) { char *p = (char*)internal::handmade_aligned_malloc(i); - VERIFY(size_t(p)%ALIGNMENT==0); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; internal::handmade_aligned_free(p); @@ -29,10 +31,10 @@ void check_handmade_aligned_malloc() void check_aligned_malloc() { - for(int i = 1; i < 1000; i++) + for(int i = ALIGNMENT; i < 1000; i++) { char *p = (char*)internal::aligned_malloc(i); - VERIFY(size_t(p)%ALIGNMENT==0); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; internal::aligned_free(p); @@ -41,10 +43,10 @@ void check_aligned_malloc() void check_aligned_new() { - for(int i = 1; i < 1000; i++) + for(int i = ALIGNMENT; i < 1000; i++) { float *p = internal::aligned_new(i); - VERIFY(size_t(p)%ALIGNMENT==0); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; internal::aligned_delete(p,i); @@ -53,10 +55,10 @@ void check_aligned_new() void check_aligned_stack_alloc() { - for(int i = 1; i < 400; i++) + for(int i = ALIGNMENT; i < 400; i++) { ei_declare_aligned_stack_constructed_variable(float,p,i,0); - VERIFY(size_t(p)%ALIGNMENT==0); + VERIFY(internal::UIntPtr(p)%ALIGNMENT==0); // if the buffer is wrongly allocated this will give a bad write --> check with valgrind for(int j = 0; j < i; j++) p[j]=0; } @@ -68,7 +70,7 @@ struct MyStruct { EIGEN_MAKE_ALIGNED_OPERATOR_NEW char dummychar; - Vector4f avec; + Vector8f avec; }; class MyClassA @@ -76,15 +78,19 @@ class MyClassA public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW char dummychar; - Vector4f avec; + Vector8f avec; }; template void check_dynaligned() { - T* obj = new T; - VERIFY(T::NeedsToAlign==1); - VERIFY(size_t(obj)%ALIGNMENT==0); - delete obj; + // TODO have to be updated once we support multiple alignment values + if(T::SizeAtCompileTime % ALIGNMENT == 0) + { + T* obj = new T; + VERIFY(T::NeedsToAlign==1); + VERIFY(internal::UIntPtr(obj)%ALIGNMENT==0); + delete obj; + } } template void check_custom_new_delete() @@ -100,7 +106,7 @@ template void check_custom_new_delete() delete[] t; } -#ifdef EIGEN_ALIGN +#if EIGEN_MAX_ALIGN_BYTES>0 { T* t = static_cast((T::operator new)(sizeof(T))); (T::operator delete)(t, sizeof(T)); @@ -120,9 +126,17 @@ void test_dynalloc() CALL_SUBTEST(check_aligned_malloc()); CALL_SUBTEST(check_aligned_new()); CALL_SUBTEST(check_aligned_stack_alloc()); + + for (int i=0; i() ); + CALL_SUBTEST( check_custom_new_delete() ); + CALL_SUBTEST( check_custom_new_delete() ); + CALL_SUBTEST( check_custom_new_delete() ); + } // check static allocation, who knows ? - #if EIGEN_ALIGN_STATICALLY + #if EIGEN_MAX_STATIC_ALIGN_BYTES for (int i=0; i() ); @@ -130,23 +144,19 @@ void test_dynalloc() CALL_SUBTEST(check_dynaligned() ); CALL_SUBTEST(check_dynaligned() ); CALL_SUBTEST(check_dynaligned() ); - - CALL_SUBTEST( check_custom_new_delete() ); - CALL_SUBTEST( check_custom_new_delete() ); - CALL_SUBTEST( check_custom_new_delete() ); - CALL_SUBTEST( check_custom_new_delete() ); + CALL_SUBTEST(check_dynaligned() ); } { - MyStruct foo0; VERIFY(size_t(foo0.avec.data())%ALIGNMENT==0); - MyClassA fooA; VERIFY(size_t(fooA.avec.data())%ALIGNMENT==0); + MyStruct foo0; VERIFY(internal::UIntPtr(foo0.avec.data())%ALIGNMENT==0); + MyClassA fooA; VERIFY(internal::UIntPtr(fooA.avec.data())%ALIGNMENT==0); } // dynamic allocation, single object for (int i=0; iavec.data())%ALIGNMENT==0); - MyClassA *fooA = new MyClassA(); VERIFY(size_t(fooA->avec.data())%ALIGNMENT==0); + MyStruct *foo0 = new MyStruct(); VERIFY(internal::UIntPtr(foo0->avec.data())%ALIGNMENT==0); + MyClassA *fooA = new MyClassA(); VERIFY(internal::UIntPtr(fooA->avec.data())%ALIGNMENT==0); delete foo0; delete fooA; } @@ -155,8 +165,8 @@ void test_dynalloc() const int N = 10; for (int i=0; iavec.data())%ALIGNMENT==0); - MyClassA *fooA = new MyClassA[N]; VERIFY(size_t(fooA->avec.data())%ALIGNMENT==0); + MyStruct *foo0 = new MyStruct[N]; VERIFY(internal::UIntPtr(foo0->avec.data())%ALIGNMENT==0); + MyClassA *fooA = new MyClassA[N]; VERIFY(internal::UIntPtr(fooA->avec.data())%ALIGNMENT==0); delete[] foo0; delete[] fooA; } diff --git a/external/eigen3/test/eigen2/CMakeLists.txt b/external/eigen3/test/eigen2/CMakeLists.txt deleted file mode 100644 index 9615a60..0000000 --- a/external/eigen3/test/eigen2/CMakeLists.txt +++ /dev/null @@ -1,61 +0,0 @@ -add_custom_target(eigen2_buildtests) -add_custom_target(eigen2_check COMMAND "ctest -R eigen2") -add_dependencies(eigen2_check eigen2_buildtests) -add_dependencies(buildtests eigen2_buildtests) - -add_definitions("-DEIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API") -add_definitions("-DEIGEN_NO_EIGEN2_DEPRECATED_WARNING") - -ei_add_test(eigen2_meta) -ei_add_test(eigen2_sizeof) -ei_add_test(eigen2_dynalloc) -ei_add_test(eigen2_nomalloc) -#ei_add_test(eigen2_first_aligned) -ei_add_test(eigen2_mixingtypes) -#ei_add_test(eigen2_packetmath) -ei_add_test(eigen2_unalignedassert) -#ei_add_test(eigen2_vectorization_logic) -ei_add_test(eigen2_basicstuff) -ei_add_test(eigen2_linearstructure) -ei_add_test(eigen2_cwiseop) -ei_add_test(eigen2_sum) -ei_add_test(eigen2_product_small) -ei_add_test(eigen2_product_large ${EI_OFLAG}) -ei_add_test(eigen2_adjoint) -ei_add_test(eigen2_submatrices) -ei_add_test(eigen2_miscmatrices) -ei_add_test(eigen2_commainitializer) -ei_add_test(eigen2_smallvectors) -ei_add_test(eigen2_map) -ei_add_test(eigen2_array) -ei_add_test(eigen2_triangular) -ei_add_test(eigen2_cholesky " " "${GSL_LIBRARIES}") -ei_add_test(eigen2_lu ${EI_OFLAG}) -ei_add_test(eigen2_determinant ${EI_OFLAG}) -ei_add_test(eigen2_inverse) -ei_add_test(eigen2_qr) -ei_add_test(eigen2_eigensolver " " "${GSL_LIBRARIES}") -ei_add_test(eigen2_svd) -ei_add_test(eigen2_geometry) -ei_add_test(eigen2_geometry_with_eigen2_prefix) -ei_add_test(eigen2_hyperplane) -ei_add_test(eigen2_parametrizedline) -ei_add_test(eigen2_alignedbox) -ei_add_test(eigen2_regression) -ei_add_test(eigen2_stdvector) -ei_add_test(eigen2_newstdvector) -if(QT4_FOUND) - ei_add_test(eigen2_qtvector " " "${QT_QTCORE_LIBRARY}") -endif(QT4_FOUND) -# no support for eigen2 sparse module -# if(NOT EIGEN_DEFAULT_TO_ROW_MAJOR) -# ei_add_test(eigen2_sparse_vector) -# ei_add_test(eigen2_sparse_basic) -# ei_add_test(eigen2_sparse_solvers " " "${SPARSE_LIBS}") -# ei_add_test(eigen2_sparse_product) -# endif() -ei_add_test(eigen2_swap) -ei_add_test(eigen2_visitor) -ei_add_test(eigen2_bug_132) - -ei_add_test(eigen2_prec_inverse_4x4 ${EI_OFLAG}) diff --git a/external/eigen3/test/eigen2/eigen2_adjoint.cpp b/external/eigen3/test/eigen2/eigen2_adjoint.cpp deleted file mode 100644 index c0f8114..0000000 --- a/external/eigen3/test/eigen2/eigen2_adjoint.cpp +++ /dev/null @@ -1,99 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void adjoint(const MatrixType& m) -{ - /* this test covers the following files: - Transpose.h Conjugate.h Dot.h - */ - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - typedef Matrix SquareMatrixType; - int rows = m.rows(); - int cols = m.cols(); - - RealScalar largerEps = test_precision(); - if (ei_is_same_type::ret) - largerEps = RealScalar(1e-3f); - - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols), - m3(rows, cols), - square = SquareMatrixType::Random(rows, rows); - VectorType v1 = VectorType::Random(rows), - v2 = VectorType::Random(rows), - v3 = VectorType::Random(rows), - vzero = VectorType::Zero(rows); - - Scalar s1 = ei_random(), - s2 = ei_random(); - - // check basic compatibility of adjoint, transpose, conjugate - VERIFY_IS_APPROX(m1.transpose().conjugate().adjoint(), m1); - VERIFY_IS_APPROX(m1.adjoint().conjugate().transpose(), m1); - - // check multiplicative behavior - VERIFY_IS_APPROX((m1.adjoint() * m2).adjoint(), m2.adjoint() * m1); - VERIFY_IS_APPROX((s1 * m1).adjoint(), ei_conj(s1) * m1.adjoint()); - - // check basic properties of dot, norm, norm2 - typedef typename NumTraits::Real RealScalar; - VERIFY(ei_isApprox((s1 * v1 + s2 * v2).eigen2_dot(v3), s1 * v1.eigen2_dot(v3) + s2 * v2.eigen2_dot(v3), largerEps)); - VERIFY(ei_isApprox(v3.eigen2_dot(s1 * v1 + s2 * v2), ei_conj(s1)*v3.eigen2_dot(v1)+ei_conj(s2)*v3.eigen2_dot(v2), largerEps)); - VERIFY_IS_APPROX(ei_conj(v1.eigen2_dot(v2)), v2.eigen2_dot(v1)); - VERIFY_IS_APPROX(ei_real(v1.eigen2_dot(v1)), v1.squaredNorm()); - if(NumTraits::HasFloatingPoint) - VERIFY_IS_APPROX(v1.squaredNorm(), v1.norm() * v1.norm()); - VERIFY_IS_MUCH_SMALLER_THAN(ei_abs(vzero.eigen2_dot(v1)), static_cast(1)); - if(NumTraits::HasFloatingPoint) - VERIFY_IS_MUCH_SMALLER_THAN(vzero.norm(), static_cast(1)); - - // check compatibility of dot and adjoint - VERIFY(ei_isApprox(v1.eigen2_dot(square * v2), (square.adjoint() * v1).eigen2_dot(v2), largerEps)); - - // like in testBasicStuff, test operator() to check const-qualification - int r = ei_random(0, rows-1), - c = ei_random(0, cols-1); - VERIFY_IS_APPROX(m1.conjugate()(r,c), ei_conj(m1(r,c))); - VERIFY_IS_APPROX(m1.adjoint()(c,r), ei_conj(m1(r,c))); - - if(NumTraits::HasFloatingPoint) - { - // check that Random().normalized() works: tricky as the random xpr must be evaluated by - // normalized() in order to produce a consistent result. - VERIFY_IS_APPROX(VectorType::Random(rows).normalized().norm(), RealScalar(1)); - } - - // check inplace transpose - m3 = m1; - m3.transposeInPlace(); - VERIFY_IS_APPROX(m3,m1.transpose()); - m3.transposeInPlace(); - VERIFY_IS_APPROX(m3,m1); - -} - -void test_eigen2_adjoint() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( adjoint(Matrix()) ); - CALL_SUBTEST_2( adjoint(Matrix3d()) ); - CALL_SUBTEST_3( adjoint(Matrix4f()) ); - CALL_SUBTEST_4( adjoint(MatrixXcf(4, 4)) ); - CALL_SUBTEST_5( adjoint(MatrixXi(8, 12)) ); - CALL_SUBTEST_6( adjoint(MatrixXf(21, 21)) ); - } - // test a large matrix only once - CALL_SUBTEST_7( adjoint(Matrix()) ); -} - diff --git a/external/eigen3/test/eigen2/eigen2_alignedbox.cpp b/external/eigen3/test/eigen2/eigen2_alignedbox.cpp deleted file mode 100644 index 35043b9..0000000 --- a/external/eigen3/test/eigen2/eigen2_alignedbox.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include -#include -#include - -template void alignedbox(const BoxType& _box) -{ - /* this test covers the following files: - AlignedBox.h - */ - - const int dim = _box.dim(); - typedef typename BoxType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - - VectorType p0 = VectorType::Random(dim); - VectorType p1 = VectorType::Random(dim); - RealScalar s1 = ei_random(0,1); - - BoxType b0(dim); - BoxType b1(VectorType::Random(dim),VectorType::Random(dim)); - BoxType b2; - - b0.extend(p0); - b0.extend(p1); - VERIFY(b0.contains(p0*s1+(Scalar(1)-s1)*p1)); - VERIFY(!b0.contains(p0 + (1+s1)*(p1-p0))); - - (b2 = b0).extend(b1); - VERIFY(b2.contains(b0)); - VERIFY(b2.contains(b1)); - VERIFY_IS_APPROX(b2.clamp(b0), b0); - - // casting - const int Dim = BoxType::AmbientDimAtCompileTime; - typedef typename GetDifferentType::type OtherScalar; - AlignedBox hp1f = b0.template cast(); - VERIFY_IS_APPROX(hp1f.template cast(),b0); - AlignedBox hp1d = b0.template cast(); - VERIFY_IS_APPROX(hp1d.template cast(),b0); -} - -void test_eigen2_alignedbox() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( alignedbox(AlignedBox()) ); - CALL_SUBTEST_2( alignedbox(AlignedBox()) ); - CALL_SUBTEST_3( alignedbox(AlignedBox()) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_array.cpp b/external/eigen3/test/eigen2/eigen2_array.cpp deleted file mode 100644 index c1ff40c..0000000 --- a/external/eigen3/test/eigen2/eigen2_array.cpp +++ /dev/null @@ -1,142 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include - -template void array(const MatrixType& m) -{ - /* this test covers the following files: - Array.cpp - */ - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - - int rows = m.rows(); - int cols = m.cols(); - - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols), - m3(rows, cols); - - Scalar s1 = ei_random(), - s2 = ei_random(); - - // scalar addition - VERIFY_IS_APPROX(m1.cwise() + s1, s1 + m1.cwise()); - VERIFY_IS_APPROX(m1.cwise() + s1, MatrixType::Constant(rows,cols,s1) + m1); - VERIFY_IS_APPROX((m1*Scalar(2)).cwise() - s2, (m1+m1) - MatrixType::Constant(rows,cols,s2) ); - m3 = m1; - m3.cwise() += s2; - VERIFY_IS_APPROX(m3, m1.cwise() + s2); - m3 = m1; - m3.cwise() -= s1; - VERIFY_IS_APPROX(m3, m1.cwise() - s1); - - // reductions - VERIFY_IS_APPROX(m1.colwise().sum().sum(), m1.sum()); - VERIFY_IS_APPROX(m1.rowwise().sum().sum(), m1.sum()); - if (!ei_isApprox(m1.sum(), (m1+m2).sum())) - VERIFY_IS_NOT_APPROX(((m1+m2).rowwise().sum()).sum(), m1.sum()); - VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op())); -} - -template void comparisons(const MatrixType& m) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - - int rows = m.rows(); - int cols = m.cols(); - - int r = ei_random(0, rows-1), - c = ei_random(0, cols-1); - - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols), - m3(rows, cols); - - VERIFY(((m1.cwise() + Scalar(1)).cwise() > m1).all()); - VERIFY(((m1.cwise() - Scalar(1)).cwise() < m1).all()); - if (rows*cols>1) - { - m3 = m1; - m3(r,c) += 1; - VERIFY(! (m1.cwise() < m3).all() ); - VERIFY(! (m1.cwise() > m3).all() ); - } - - // comparisons to scalar - VERIFY( (m1.cwise() != (m1(r,c)+1) ).any() ); - VERIFY( (m1.cwise() > (m1(r,c)-1) ).any() ); - VERIFY( (m1.cwise() < (m1(r,c)+1) ).any() ); - VERIFY( (m1.cwise() == m1(r,c) ).any() ); - - // test Select - VERIFY_IS_APPROX( (m1.cwise()m2).select(m1,m2), m1.cwise().max(m2) ); - Scalar mid = (m1.cwise().abs().minCoeff() + m1.cwise().abs().maxCoeff())/Scalar(2); - for (int j=0; j=MatrixType::Constant(rows,cols,mid)) - .select(m1,0), m3); - // even shorter version: - VERIFY_IS_APPROX( (m1.cwise().abs().cwise()RealScalar(0.1)).count() == rows*cols); - VERIFY_IS_APPROX(((m1.cwise().abs().cwise()+1).cwise()>RealScalar(0.1)).colwise().count().template cast(), RowVectorXi::Constant(cols,rows)); - VERIFY_IS_APPROX(((m1.cwise().abs().cwise()+1).cwise()>RealScalar(0.1)).rowwise().count().template cast(), VectorXi::Constant(rows, cols)); -} - -template void lpNorm(const VectorType& v) -{ - VectorType u = VectorType::Random(v.size()); - - VERIFY_IS_APPROX(u.template lpNorm(), u.cwise().abs().maxCoeff()); - VERIFY_IS_APPROX(u.template lpNorm<1>(), u.cwise().abs().sum()); - VERIFY_IS_APPROX(u.template lpNorm<2>(), ei_sqrt(u.cwise().abs().cwise().square().sum())); - VERIFY_IS_APPROX(ei_pow(u.template lpNorm<5>(), typename VectorType::RealScalar(5)), u.cwise().abs().cwise().pow(5).sum()); -} - -void test_eigen2_array() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( array(Matrix()) ); - CALL_SUBTEST_2( array(Matrix2f()) ); - CALL_SUBTEST_3( array(Matrix4d()) ); - CALL_SUBTEST_4( array(MatrixXcf(3, 3)) ); - CALL_SUBTEST_5( array(MatrixXf(8, 12)) ); - CALL_SUBTEST_6( array(MatrixXi(8, 12)) ); - } - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( comparisons(Matrix()) ); - CALL_SUBTEST_2( comparisons(Matrix2f()) ); - CALL_SUBTEST_3( comparisons(Matrix4d()) ); - CALL_SUBTEST_5( comparisons(MatrixXf(8, 12)) ); - CALL_SUBTEST_6( comparisons(MatrixXi(8, 12)) ); - } - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( lpNorm(Matrix()) ); - CALL_SUBTEST_2( lpNorm(Vector2f()) ); - CALL_SUBTEST_3( lpNorm(Vector3d()) ); - CALL_SUBTEST_4( lpNorm(Vector4f()) ); - CALL_SUBTEST_5( lpNorm(VectorXf(16)) ); - CALL_SUBTEST_7( lpNorm(VectorXcd(10)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_basicstuff.cpp b/external/eigen3/test/eigen2/eigen2_basicstuff.cpp deleted file mode 100644 index dd2dec1..0000000 --- a/external/eigen3/test/eigen2/eigen2_basicstuff.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void basicStuff(const MatrixType& m) -{ - typedef typename MatrixType::Scalar Scalar; - typedef Matrix VectorType; - - int rows = m.rows(); - int cols = m.cols(); - - // this test relies a lot on Random.h, and there's not much more that we can do - // to test it, hence I consider that we will have tested Random.h - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols), - m3(rows, cols), - mzero = MatrixType::Zero(rows, cols), - square = Matrix::Random(rows, rows); - VectorType v1 = VectorType::Random(rows), - vzero = VectorType::Zero(rows); - - Scalar x = ei_random(); - - int r = ei_random(0, rows-1), - c = ei_random(0, cols-1); - - m1.coeffRef(r,c) = x; - VERIFY_IS_APPROX(x, m1.coeff(r,c)); - m1(r,c) = x; - VERIFY_IS_APPROX(x, m1(r,c)); - v1.coeffRef(r) = x; - VERIFY_IS_APPROX(x, v1.coeff(r)); - v1(r) = x; - VERIFY_IS_APPROX(x, v1(r)); - v1[r] = x; - VERIFY_IS_APPROX(x, v1[r]); - - VERIFY_IS_APPROX( v1, v1); - VERIFY_IS_NOT_APPROX( v1, 2*v1); - VERIFY_IS_MUCH_SMALLER_THAN( vzero, v1); - if(NumTraits::HasFloatingPoint) - VERIFY_IS_MUCH_SMALLER_THAN( vzero, v1.norm()); - VERIFY_IS_NOT_MUCH_SMALLER_THAN(v1, v1); - VERIFY_IS_APPROX( vzero, v1-v1); - VERIFY_IS_APPROX( m1, m1); - VERIFY_IS_NOT_APPROX( m1, 2*m1); - VERIFY_IS_MUCH_SMALLER_THAN( mzero, m1); - VERIFY_IS_NOT_MUCH_SMALLER_THAN(m1, m1); - VERIFY_IS_APPROX( mzero, m1-m1); - - // always test operator() on each read-only expression class, - // in order to check const-qualifiers. - // indeed, if an expression class (here Zero) is meant to be read-only, - // hence has no _write() method, the corresponding MatrixBase method (here zero()) - // should return a const-qualified object so that it is the const-qualified - // operator() that gets called, which in turn calls _read(). - VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows,cols)(r,c), static_cast(1)); - - // now test copying a row-vector into a (column-)vector and conversely. - square.col(r) = square.row(r).eval(); - Matrix rv(rows); - Matrix cv(rows); - rv = square.row(r); - cv = square.col(r); - VERIFY_IS_APPROX(rv, cv.transpose()); - - if(cols!=1 && rows!=1 && MatrixType::SizeAtCompileTime!=Dynamic) - { - VERIFY_RAISES_ASSERT(m1 = (m2.block(0,0, rows-1, cols-1))); - } - - VERIFY_IS_APPROX(m3 = m1,m1); - MatrixType m4; - VERIFY_IS_APPROX(m4 = m1,m1); - - // test swap - m3 = m1; - m1.swap(m2); - VERIFY_IS_APPROX(m3, m2); - if(rows*cols>=3) - { - VERIFY_IS_NOT_APPROX(m3, m1); - } -} - -void test_eigen2_basicstuff() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( basicStuff(Matrix()) ); - CALL_SUBTEST_2( basicStuff(Matrix4d()) ); - CALL_SUBTEST_3( basicStuff(MatrixXcf(3, 3)) ); - CALL_SUBTEST_4( basicStuff(MatrixXi(8, 12)) ); - CALL_SUBTEST_5( basicStuff(MatrixXcd(20, 20)) ); - CALL_SUBTEST_6( basicStuff(Matrix()) ); - CALL_SUBTEST_7( basicStuff(Matrix(10,10)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_bug_132.cpp b/external/eigen3/test/eigen2/eigen2_bug_132.cpp deleted file mode 100644 index 7fe3610..0000000 --- a/external/eigen3/test/eigen2/eigen2_bug_132.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2010 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -void test_eigen2_bug_132() { - int size = 100; - MatrixXd A(size, size); - VectorXd b(size), c(size); - { - VectorXd y = A.transpose() * (b-c); // bug 132: infinite recursion in coeffRef - VectorXd z = (b-c).transpose() * A; // bug 132: infinite recursion in coeffRef - } - - // the following ones weren't failing, but let's include them for completeness: - { - VectorXd y = A * (b-c); - VectorXd z = (b-c).transpose() * A.transpose(); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_cholesky.cpp b/external/eigen3/test/eigen2/eigen2_cholesky.cpp deleted file mode 100644 index 9c4b6f5..0000000 --- a/external/eigen3/test/eigen2/eigen2_cholesky.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_NO_ASSERTION_CHECKING -#include "main.h" -#include -#include - -#ifdef HAS_GSL -#include "gsl_helper.h" -#endif - -template void cholesky(const MatrixType& m) -{ - /* this test covers the following files: - LLT.h LDLT.h - */ - int rows = m.rows(); - int cols = m.cols(); - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix SquareMatrixType; - typedef Matrix VectorType; - - MatrixType a0 = MatrixType::Random(rows,cols); - VectorType vecB = VectorType::Random(rows), vecX(rows); - MatrixType matB = MatrixType::Random(rows,cols), matX(rows,cols); - SquareMatrixType symm = a0 * a0.adjoint(); - // let's make sure the matrix is not singular or near singular - MatrixType a1 = MatrixType::Random(rows,cols); - symm += a1 * a1.adjoint(); - - #ifdef HAS_GSL - if (ei_is_same_type::ret) - { - typedef GslTraits Gsl; - typename Gsl::Matrix gMatA=0, gSymm=0; - typename Gsl::Vector gVecB=0, gVecX=0; - convert(symm, gSymm); - convert(symm, gMatA); - convert(vecB, gVecB); - convert(vecB, gVecX); - Gsl::cholesky(gMatA); - Gsl::cholesky_solve(gMatA, gVecB, gVecX); - VectorType vecX(rows), _vecX, _vecB; - convert(gVecX, _vecX); - symm.llt().solve(vecB, &vecX); - Gsl::prod(gSymm, gVecX, gVecB); - convert(gVecB, _vecB); - // test gsl itself ! - VERIFY_IS_APPROX(vecB, _vecB); - VERIFY_IS_APPROX(vecX, _vecX); - - Gsl::free(gMatA); - Gsl::free(gSymm); - Gsl::free(gVecB); - Gsl::free(gVecX); - } - #endif - - { - LDLT ldlt(symm); - VERIFY(ldlt.isPositiveDefinite()); - // in eigen3, LDLT is pivoting - //VERIFY_IS_APPROX(symm, ldlt.matrixL() * ldlt.vectorD().asDiagonal() * ldlt.matrixL().adjoint()); - ldlt.solve(vecB, &vecX); - VERIFY_IS_APPROX(symm * vecX, vecB); - ldlt.solve(matB, &matX); - VERIFY_IS_APPROX(symm * matX, matB); - } - - { - LLT chol(symm); - VERIFY(chol.isPositiveDefinite()); - VERIFY_IS_APPROX(symm, chol.matrixL() * chol.matrixL().adjoint()); - chol.solve(vecB, &vecX); - VERIFY_IS_APPROX(symm * vecX, vecB); - chol.solve(matB, &matX); - VERIFY_IS_APPROX(symm * matX, matB); - } - -#if 0 // cholesky is not rank-revealing anyway - // test isPositiveDefinite on non definite matrix - if (rows>4) - { - SquareMatrixType symm = a0.block(0,0,rows,cols-4) * a0.block(0,0,rows,cols-4).adjoint(); - LLT chol(symm); - VERIFY(!chol.isPositiveDefinite()); - LDLT cholnosqrt(symm); - VERIFY(!cholnosqrt.isPositiveDefinite()); - } -#endif -} - -void test_eigen2_cholesky() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( cholesky(Matrix()) ); - CALL_SUBTEST_2( cholesky(Matrix2d()) ); - CALL_SUBTEST_3( cholesky(Matrix3f()) ); - CALL_SUBTEST_4( cholesky(Matrix4d()) ); - CALL_SUBTEST_5( cholesky(MatrixXcd(7,7)) ); - CALL_SUBTEST_6( cholesky(MatrixXf(17,17)) ); - CALL_SUBTEST_7( cholesky(MatrixXd(33,33)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_commainitializer.cpp b/external/eigen3/test/eigen2/eigen2_commainitializer.cpp deleted file mode 100644 index e0f901e..0000000 --- a/external/eigen3/test/eigen2/eigen2_commainitializer.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -void test_eigen2_commainitializer() -{ - Matrix3d m3; - Matrix4d m4; - - VERIFY_RAISES_ASSERT( (m3 << 1, 2, 3, 4, 5, 6, 7, 8) ); - - #ifndef _MSC_VER - VERIFY_RAISES_ASSERT( (m3 << 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) ); - #endif - - double data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - Matrix3d ref = Map >(data); - - m3 = Matrix3d::Random(); - m3 << 1, 2, 3, 4, 5, 6, 7, 8, 9; - VERIFY_IS_APPROX(m3, ref ); - - Vector3d vec[3]; - vec[0] << 1, 4, 7; - vec[1] << 2, 5, 8; - vec[2] << 3, 6, 9; - m3 = Matrix3d::Random(); - m3 << vec[0], vec[1], vec[2]; - VERIFY_IS_APPROX(m3, ref); - - vec[0] << 1, 2, 3; - vec[1] << 4, 5, 6; - vec[2] << 7, 8, 9; - m3 = Matrix3d::Random(); - m3 << vec[0].transpose(), - 4, 5, 6, - vec[2].transpose(); - VERIFY_IS_APPROX(m3, ref); -} diff --git a/external/eigen3/test/eigen2/eigen2_cwiseop.cpp b/external/eigen3/test/eigen2/eigen2_cwiseop.cpp deleted file mode 100644 index a36edd4..0000000 --- a/external/eigen3/test/eigen2/eigen2_cwiseop.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include -#include - -using namespace std; - -template struct AddIfNull { - const Scalar operator() (const Scalar a, const Scalar b) const {return a<=1e-3 ? b : a;} - enum { Cost = NumTraits::AddCost }; -}; - -template void cwiseops(const MatrixType& m) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - - int rows = m.rows(); - int cols = m.cols(); - - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols), - m3(rows, cols), - m4(rows, cols), - mzero = MatrixType::Zero(rows, cols), - mones = MatrixType::Ones(rows, cols), - identity = Matrix - ::Identity(rows, rows); - VectorType vzero = VectorType::Zero(rows), - vones = VectorType::Ones(rows), - v3(rows); - - int r = ei_random(0, rows-1), - c = ei_random(0, cols-1); - - Scalar s1 = ei_random(); - - // test Zero, Ones, Constant, and the set* variants - m3 = MatrixType::Constant(rows, cols, s1); - for (int j=0; j >(mones); - - VERIFY_IS_APPROX(m1.cwise().pow(2), m1.cwise().abs2()); - VERIFY_IS_APPROX(m1.cwise().pow(2), m1.cwise().square()); - VERIFY_IS_APPROX(m1.cwise().pow(3), m1.cwise().cube()); - - VERIFY_IS_APPROX(m1 + mones, m1.cwise()+Scalar(1)); - VERIFY_IS_APPROX(m1 - mones, m1.cwise()-Scalar(1)); - m3 = m1; m3.cwise() += 1; - VERIFY_IS_APPROX(m1 + mones, m3); - m3 = m1; m3.cwise() -= 1; - VERIFY_IS_APPROX(m1 - mones, m3); - - VERIFY_IS_APPROX(m2, m2.cwise() * mones); - VERIFY_IS_APPROX(m1.cwise() * m2, m2.cwise() * m1); - m3 = m1; - m3.cwise() *= m2; - VERIFY_IS_APPROX(m3, m1.cwise() * m2); - - VERIFY_IS_APPROX(mones, m2.cwise()/m2); - if(NumTraits::HasFloatingPoint) - { - VERIFY_IS_APPROX(m1.cwise() / m2, m1.cwise() * (m2.cwise().inverse())); - m3 = m1.cwise().abs().cwise().sqrt(); - VERIFY_IS_APPROX(m3.cwise().square(), m1.cwise().abs()); - VERIFY_IS_APPROX(m1.cwise().square().cwise().sqrt(), m1.cwise().abs()); - VERIFY_IS_APPROX(m1.cwise().abs().cwise().log().cwise().exp() , m1.cwise().abs()); - - VERIFY_IS_APPROX(m1.cwise().pow(2), m1.cwise().square()); - m3 = (m1.cwise().abs().cwise()<=RealScalar(0.01)).select(mones,m1); - VERIFY_IS_APPROX(m3.cwise().pow(-1), m3.cwise().inverse()); - m3 = m1.cwise().abs(); - VERIFY_IS_APPROX(m3.cwise().pow(RealScalar(0.5)), m3.cwise().sqrt()); - -// VERIFY_IS_APPROX(m1.cwise().tan(), m1.cwise().sin().cwise() / m1.cwise().cos()); - VERIFY_IS_APPROX(mones, m1.cwise().sin().cwise().square() + m1.cwise().cos().cwise().square()); - m3 = m1; - m3.cwise() /= m2; - VERIFY_IS_APPROX(m3, m1.cwise() / m2); - } - - // check min - VERIFY_IS_APPROX( m1.cwise().min(m2), m2.cwise().min(m1) ); - VERIFY_IS_APPROX( m1.cwise().min(m1+mones), m1 ); - VERIFY_IS_APPROX( m1.cwise().min(m1-mones), m1-mones ); - - // check max - VERIFY_IS_APPROX( m1.cwise().max(m2), m2.cwise().max(m1) ); - VERIFY_IS_APPROX( m1.cwise().max(m1-mones), m1 ); - VERIFY_IS_APPROX( m1.cwise().max(m1+mones), m1+mones ); - - VERIFY( (m1.cwise() == m1).all() ); - VERIFY( (m1.cwise() != m2).any() ); - VERIFY(!(m1.cwise() == (m1+mones)).any() ); - if (rows*cols>1) - { - m3 = m1; - m3(r,c) += 1; - VERIFY( (m1.cwise() == m3).any() ); - VERIFY( !(m1.cwise() == m3).all() ); - } - VERIFY( (m1.cwise().min(m2).cwise() <= m2).all() ); - VERIFY( (m1.cwise().max(m2).cwise() >= m2).all() ); - VERIFY( (m1.cwise().min(m2).cwise() < (m1+mones)).all() ); - VERIFY( (m1.cwise().max(m2).cwise() > (m1-mones)).all() ); - -#if(__cplusplus < 201103L) -// std::binder* are deprecated since c++11 and will be removed in c++17 - VERIFY( (m1.cwise()(), Scalar(1)))).all() ); - VERIFY( !(m1.cwise()(), Scalar(1)))).all() ); - VERIFY( !(m1.cwise()>m1.unaryExpr(bind2nd(plus(), Scalar(1)))).any() ); -#endif -} - -void test_eigen2_cwiseop() -{ - for(int i = 0; i < g_repeat ; i++) { - CALL_SUBTEST_1( cwiseops(Matrix()) ); - CALL_SUBTEST_2( cwiseops(Matrix4d()) ); - CALL_SUBTEST_3( cwiseops(MatrixXf(3, 3)) ); - CALL_SUBTEST_3( cwiseops(MatrixXf(22, 22)) ); - CALL_SUBTEST_4( cwiseops(MatrixXi(8, 12)) ); - CALL_SUBTEST_5( cwiseops(MatrixXd(20, 20)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_determinant.cpp b/external/eigen3/test/eigen2/eigen2_determinant.cpp deleted file mode 100644 index c7b4ad0..0000000 --- a/external/eigen3/test/eigen2/eigen2_determinant.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Benoit Jacob -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include - -template void determinant(const MatrixType& m) -{ - /* this test covers the following files: - Determinant.h - */ - int size = m.rows(); - - MatrixType m1(size, size), m2(size, size); - m1.setRandom(); - m2.setRandom(); - typedef typename MatrixType::Scalar Scalar; - Scalar x = ei_random(); - VERIFY_IS_APPROX(MatrixType::Identity(size, size).determinant(), Scalar(1)); - VERIFY_IS_APPROX((m1*m2).determinant(), m1.determinant() * m2.determinant()); - if(size==1) return; - int i = ei_random(0, size-1); - int j; - do { - j = ei_random(0, size-1); - } while(j==i); - m2 = m1; - m2.row(i).swap(m2.row(j)); - VERIFY_IS_APPROX(m2.determinant(), -m1.determinant()); - m2 = m1; - m2.col(i).swap(m2.col(j)); - VERIFY_IS_APPROX(m2.determinant(), -m1.determinant()); - VERIFY_IS_APPROX(m2.determinant(), m2.transpose().determinant()); - VERIFY_IS_APPROX(ei_conj(m2.determinant()), m2.adjoint().determinant()); - m2 = m1; - m2.row(i) += x*m2.row(j); - VERIFY_IS_APPROX(m2.determinant(), m1.determinant()); - m2 = m1; - m2.row(i) *= x; - VERIFY_IS_APPROX(m2.determinant(), m1.determinant() * x); -} - -void test_eigen2_determinant() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( determinant(Matrix()) ); - CALL_SUBTEST_2( determinant(Matrix()) ); - CALL_SUBTEST_3( determinant(Matrix()) ); - CALL_SUBTEST_4( determinant(Matrix()) ); - CALL_SUBTEST_5( determinant(Matrix, 10, 10>()) ); - CALL_SUBTEST_6( determinant(MatrixXd(20, 20)) ); - } - CALL_SUBTEST_6( determinant(MatrixXd(200, 200)) ); -} diff --git a/external/eigen3/test/eigen2/eigen2_dynalloc.cpp b/external/eigen3/test/eigen2/eigen2_dynalloc.cpp deleted file mode 100644 index 1891a9e..0000000 --- a/external/eigen3/test/eigen2/eigen2_dynalloc.cpp +++ /dev/null @@ -1,131 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -#if EIGEN_ARCH_WANTS_ALIGNMENT -#define ALIGNMENT 16 -#else -#define ALIGNMENT 1 -#endif - -void check_handmade_aligned_malloc() -{ - for(int i = 1; i < 1000; i++) - { - char *p = (char*)ei_handmade_aligned_malloc(i); - VERIFY(std::size_t(p)%ALIGNMENT==0); - // if the buffer is wrongly allocated this will give a bad write --> check with valgrind - for(int j = 0; j < i; j++) p[j]=0; - ei_handmade_aligned_free(p); - } -} - -void check_aligned_malloc() -{ - for(int i = 1; i < 1000; i++) - { - char *p = (char*)ei_aligned_malloc(i); - VERIFY(std::size_t(p)%ALIGNMENT==0); - // if the buffer is wrongly allocated this will give a bad write --> check with valgrind - for(int j = 0; j < i; j++) p[j]=0; - ei_aligned_free(p); - } -} - -void check_aligned_new() -{ - for(int i = 1; i < 1000; i++) - { - float *p = ei_aligned_new(i); - VERIFY(std::size_t(p)%ALIGNMENT==0); - // if the buffer is wrongly allocated this will give a bad write --> check with valgrind - for(int j = 0; j < i; j++) p[j]=0; - ei_aligned_delete(p,i); - } -} - -void check_aligned_stack_alloc() -{ - for(int i = 1; i < 1000; i++) - { - ei_declare_aligned_stack_constructed_variable(float, p, i, 0); - VERIFY(std::size_t(p)%ALIGNMENT==0); - // if the buffer is wrongly allocated this will give a bad write --> check with valgrind - for(int j = 0; j < i; j++) p[j]=0; - } -} - - -// test compilation with both a struct and a class... -struct MyStruct -{ - EIGEN_MAKE_ALIGNED_OPERATOR_NEW - char dummychar; - Vector4f avec; -}; - -class MyClassA -{ - public: - EIGEN_MAKE_ALIGNED_OPERATOR_NEW - char dummychar; - Vector4f avec; -}; - -template void check_dynaligned() -{ - T* obj = new T; - VERIFY(std::size_t(obj)%ALIGNMENT==0); - delete obj; -} - -void test_eigen2_dynalloc() -{ - // low level dynamic memory allocation - CALL_SUBTEST(check_handmade_aligned_malloc()); - CALL_SUBTEST(check_aligned_malloc()); - CALL_SUBTEST(check_aligned_new()); - CALL_SUBTEST(check_aligned_stack_alloc()); - - for (int i=0; i() ); - CALL_SUBTEST( check_dynaligned() ); - CALL_SUBTEST( check_dynaligned() ); - CALL_SUBTEST( check_dynaligned() ); - CALL_SUBTEST( check_dynaligned() ); - } - - // check static allocation, who knows ? - { - MyStruct foo0; VERIFY(std::size_t(foo0.avec.data())%ALIGNMENT==0); - MyClassA fooA; VERIFY(std::size_t(fooA.avec.data())%ALIGNMENT==0); - } - - // dynamic allocation, single object - for (int i=0; iavec.data())%ALIGNMENT==0); - MyClassA *fooA = new MyClassA(); VERIFY(std::size_t(fooA->avec.data())%ALIGNMENT==0); - delete foo0; - delete fooA; - } - - // dynamic allocation, array - const int N = 10; - for (int i=0; iavec.data())%ALIGNMENT==0); - MyClassA *fooA = new MyClassA[N]; VERIFY(std::size_t(fooA->avec.data())%ALIGNMENT==0); - delete[] foo0; - delete[] fooA; - } - -} diff --git a/external/eigen3/test/eigen2/eigen2_eigensolver.cpp b/external/eigen3/test/eigen2/eigen2_eigensolver.cpp deleted file mode 100644 index 48b4ace..0000000 --- a/external/eigen3/test/eigen2/eigen2_eigensolver.cpp +++ /dev/null @@ -1,146 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include - -#ifdef HAS_GSL -#include "gsl_helper.h" -#endif - -template void selfadjointeigensolver(const MatrixType& m) -{ - /* this test covers the following files: - EigenSolver.h, SelfAdjointEigenSolver.h (and indirectly: Tridiagonalization.h) - */ - int rows = m.rows(); - int cols = m.cols(); - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - typedef Matrix RealVectorType; - typedef typename std::complex::Real> Complex; - - RealScalar largerEps = 10*test_precision(); - - MatrixType a = MatrixType::Random(rows,cols); - MatrixType a1 = MatrixType::Random(rows,cols); - MatrixType symmA = a.adjoint() * a + a1.adjoint() * a1; - - MatrixType b = MatrixType::Random(rows,cols); - MatrixType b1 = MatrixType::Random(rows,cols); - MatrixType symmB = b.adjoint() * b + b1.adjoint() * b1; - - SelfAdjointEigenSolver eiSymm(symmA); - // generalized eigen pb - SelfAdjointEigenSolver eiSymmGen(symmA, symmB); - - #ifdef HAS_GSL - if (ei_is_same_type::ret) - { - typedef GslTraits Gsl; - typename Gsl::Matrix gEvec=0, gSymmA=0, gSymmB=0; - typename GslTraits::Vector gEval=0; - RealVectorType _eval; - MatrixType _evec; - convert(symmA, gSymmA); - convert(symmB, gSymmB); - convert(symmA, gEvec); - gEval = GslTraits::createVector(rows); - - Gsl::eigen_symm(gSymmA, gEval, gEvec); - convert(gEval, _eval); - convert(gEvec, _evec); - - // test gsl itself ! - VERIFY((symmA * _evec).isApprox(_evec * _eval.asDiagonal(), largerEps)); - - // compare with eigen - VERIFY_IS_APPROX(_eval, eiSymm.eigenvalues()); - VERIFY_IS_APPROX(_evec.cwise().abs(), eiSymm.eigenvectors().cwise().abs()); - - // generalized pb - Gsl::eigen_symm_gen(gSymmA, gSymmB, gEval, gEvec); - convert(gEval, _eval); - convert(gEvec, _evec); - // test GSL itself: - VERIFY((symmA * _evec).isApprox(symmB * (_evec * _eval.asDiagonal()), largerEps)); - - // compare with eigen - MatrixType normalized_eivec = eiSymmGen.eigenvectors()*eiSymmGen.eigenvectors().colwise().norm().asDiagonal().inverse(); - VERIFY_IS_APPROX(_eval, eiSymmGen.eigenvalues()); - VERIFY_IS_APPROX(_evec.cwiseAbs(), normalized_eivec.cwiseAbs()); - - Gsl::free(gSymmA); - Gsl::free(gSymmB); - GslTraits::free(gEval); - Gsl::free(gEvec); - } - #endif - - VERIFY((symmA * eiSymm.eigenvectors()).isApprox( - eiSymm.eigenvectors() * eiSymm.eigenvalues().asDiagonal(), largerEps)); - - // generalized eigen problem Ax = lBx - VERIFY((symmA * eiSymmGen.eigenvectors()).isApprox( - symmB * (eiSymmGen.eigenvectors() * eiSymmGen.eigenvalues().asDiagonal()), largerEps)); - - MatrixType sqrtSymmA = eiSymm.operatorSqrt(); - VERIFY_IS_APPROX(symmA, sqrtSymmA*sqrtSymmA); - VERIFY_IS_APPROX(sqrtSymmA, symmA*eiSymm.operatorInverseSqrt()); -} - -template void eigensolver(const MatrixType& m) -{ - /* this test covers the following files: - EigenSolver.h - */ - int rows = m.rows(); - int cols = m.cols(); - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - typedef Matrix RealVectorType; - typedef typename std::complex::Real> Complex; - - // RealScalar largerEps = 10*test_precision(); - - MatrixType a = MatrixType::Random(rows,cols); - MatrixType a1 = MatrixType::Random(rows,cols); - MatrixType symmA = a.adjoint() * a + a1.adjoint() * a1; - - EigenSolver ei0(symmA); - VERIFY_IS_APPROX(symmA * ei0.pseudoEigenvectors(), ei0.pseudoEigenvectors() * ei0.pseudoEigenvalueMatrix()); - VERIFY_IS_APPROX((symmA.template cast()) * (ei0.pseudoEigenvectors().template cast()), - (ei0.pseudoEigenvectors().template cast()) * (ei0.eigenvalues().asDiagonal())); - - EigenSolver ei1(a); - VERIFY_IS_APPROX(a * ei1.pseudoEigenvectors(), ei1.pseudoEigenvectors() * ei1.pseudoEigenvalueMatrix()); - VERIFY_IS_APPROX(a.template cast() * ei1.eigenvectors(), - ei1.eigenvectors() * ei1.eigenvalues().asDiagonal()); - -} - -void test_eigen2_eigensolver() -{ - for(int i = 0; i < g_repeat; i++) { - // very important to test a 3x3 matrix since we provide a special path for it - CALL_SUBTEST_1( selfadjointeigensolver(Matrix3f()) ); - CALL_SUBTEST_2( selfadjointeigensolver(Matrix4d()) ); - CALL_SUBTEST_3( selfadjointeigensolver(MatrixXf(7,7)) ); - CALL_SUBTEST_4( selfadjointeigensolver(MatrixXcd(5,5)) ); - CALL_SUBTEST_5( selfadjointeigensolver(MatrixXd(19,19)) ); - - CALL_SUBTEST_6( eigensolver(Matrix4f()) ); - CALL_SUBTEST_5( eigensolver(MatrixXd(17,17)) ); - } -} - diff --git a/external/eigen3/test/eigen2/eigen2_first_aligned.cpp b/external/eigen3/test/eigen2/eigen2_first_aligned.cpp deleted file mode 100644 index 51bb3ca..0000000 --- a/external/eigen3/test/eigen2/eigen2_first_aligned.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template -void test_eigen2_first_aligned_helper(Scalar *array, int size) -{ - const int packet_size = sizeof(Scalar) * ei_packet_traits::size; - VERIFY(((std::size_t(array) + sizeof(Scalar) * ei_alignmentOffset(array, size)) % packet_size) == 0); -} - -template -void test_eigen2_none_aligned_helper(Scalar *array, int size) -{ - VERIFY(ei_packet_traits::size == 1 || ei_alignmentOffset(array, size) == size); -} - -struct some_non_vectorizable_type { float x; }; - -void test_eigen2_first_aligned() -{ - EIGEN_ALIGN_128 float array_float[100]; - test_first_aligned_helper(array_float, 50); - test_first_aligned_helper(array_float+1, 50); - test_first_aligned_helper(array_float+2, 50); - test_first_aligned_helper(array_float+3, 50); - test_first_aligned_helper(array_float+4, 50); - test_first_aligned_helper(array_float+5, 50); - - EIGEN_ALIGN_128 double array_double[100]; - test_first_aligned_helper(array_double, 50); - test_first_aligned_helper(array_double+1, 50); - test_first_aligned_helper(array_double+2, 50); - - double *array_double_plus_4_bytes = (double*)(std::size_t(array_double)+4); - test_none_aligned_helper(array_double_plus_4_bytes, 50); - test_none_aligned_helper(array_double_plus_4_bytes+1, 50); - - some_non_vectorizable_type array_nonvec[100]; - test_first_aligned_helper(array_nonvec, 100); - test_none_aligned_helper(array_nonvec, 100); -} diff --git a/external/eigen3/test/eigen2/eigen2_geometry.cpp b/external/eigen3/test/eigen2/eigen2_geometry.cpp deleted file mode 100644 index 5140407..0000000 --- a/external/eigen3/test/eigen2/eigen2_geometry.cpp +++ /dev/null @@ -1,432 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include -#include -#include - -template void geometry(void) -{ - /* this test covers the following files: - Cross.h Quaternion.h, Transform.cpp - */ - - typedef Matrix Matrix2; - typedef Matrix Matrix3; - typedef Matrix Matrix4; - typedef Matrix Vector2; - typedef Matrix Vector3; - typedef Matrix Vector4; - typedef Quaternion Quaternionx; - typedef AngleAxis AngleAxisx; - typedef Transform Transform2; - typedef Transform Transform3; - typedef Scaling Scaling2; - typedef Scaling Scaling3; - typedef Translation Translation2; - typedef Translation Translation3; - - Scalar largeEps = test_precision(); - if (ei_is_same_type::ret) - largeEps = 1e-2f; - - Vector3 v0 = Vector3::Random(), - v1 = Vector3::Random(), - v2 = Vector3::Random(); - Vector2 u0 = Vector2::Random(); - Matrix3 matrot1; - - Scalar a = ei_random(-Scalar(M_PI), Scalar(M_PI)); - - // cross product - VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(v2).eigen2_dot(v1), Scalar(1)); - Matrix3 m; - m << v0.normalized(), - (v0.cross(v1)).normalized(), - (v0.cross(v1).cross(v0)).normalized(); - VERIFY(m.isUnitary()); - - // Quaternion: Identity(), setIdentity(); - Quaternionx q1, q2; - q2.setIdentity(); - VERIFY_IS_APPROX(Quaternionx(Quaternionx::Identity()).coeffs(), q2.coeffs()); - q1.coeffs().setRandom(); - VERIFY_IS_APPROX(q1.coeffs(), (q1*q2).coeffs()); - - // unitOrthogonal - VERIFY_IS_MUCH_SMALLER_THAN(u0.unitOrthogonal().eigen2_dot(u0), Scalar(1)); - VERIFY_IS_MUCH_SMALLER_THAN(v0.unitOrthogonal().eigen2_dot(v0), Scalar(1)); - VERIFY_IS_APPROX(u0.unitOrthogonal().norm(), Scalar(1)); - VERIFY_IS_APPROX(v0.unitOrthogonal().norm(), Scalar(1)); - - - VERIFY_IS_APPROX(v0, AngleAxisx(a, v0.normalized()) * v0); - VERIFY_IS_APPROX(-v0, AngleAxisx(Scalar(M_PI), v0.unitOrthogonal()) * v0); - VERIFY_IS_APPROX(ei_cos(a)*v0.squaredNorm(), v0.eigen2_dot(AngleAxisx(a, v0.unitOrthogonal()) * v0)); - m = AngleAxisx(a, v0.normalized()).toRotationMatrix().adjoint(); - VERIFY_IS_APPROX(Matrix3::Identity(), m * AngleAxisx(a, v0.normalized())); - VERIFY_IS_APPROX(Matrix3::Identity(), AngleAxisx(a, v0.normalized()) * m); - - q1 = AngleAxisx(a, v0.normalized()); - q2 = AngleAxisx(a, v1.normalized()); - - // angular distance - Scalar refangle = ei_abs(AngleAxisx(q1.inverse()*q2).angle()); - if (refangle>Scalar(M_PI)) - refangle = Scalar(2)*Scalar(M_PI) - refangle; - - if((q1.coeffs()-q2.coeffs()).norm() > 10*largeEps) - { - VERIFY(ei_isApprox(q1.angularDistance(q2), refangle, largeEps)); - } - - // rotation matrix conversion - VERIFY_IS_APPROX(q1 * v2, q1.toRotationMatrix() * v2); - VERIFY_IS_APPROX(q1 * q2 * v2, - q1.toRotationMatrix() * q2.toRotationMatrix() * v2); - - VERIFY( (q2*q1).isApprox(q1*q2, largeEps) || !(q2 * q1 * v2).isApprox( - q1.toRotationMatrix() * q2.toRotationMatrix() * v2)); - - q2 = q1.toRotationMatrix(); - VERIFY_IS_APPROX(q1*v1,q2*v1); - - matrot1 = AngleAxisx(Scalar(0.1), Vector3::UnitX()) - * AngleAxisx(Scalar(0.2), Vector3::UnitY()) - * AngleAxisx(Scalar(0.3), Vector3::UnitZ()); - VERIFY_IS_APPROX(matrot1 * v1, - AngleAxisx(Scalar(0.1), Vector3(1,0,0)).toRotationMatrix() - * (AngleAxisx(Scalar(0.2), Vector3(0,1,0)).toRotationMatrix() - * (AngleAxisx(Scalar(0.3), Vector3(0,0,1)).toRotationMatrix() * v1))); - - // angle-axis conversion - AngleAxisx aa = q1; - VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); - VERIFY_IS_NOT_APPROX(q1 * v1, Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1); - - // from two vector creation - VERIFY_IS_APPROX(v2.normalized(),(q2.setFromTwoVectors(v1,v2)*v1).normalized()); - VERIFY_IS_APPROX(v2.normalized(),(q2.setFromTwoVectors(v1,v2)*v1).normalized()); - - // inverse and conjugate - VERIFY_IS_APPROX(q1 * (q1.inverse() * v1), v1); - VERIFY_IS_APPROX(q1 * (q1.conjugate() * v1), v1); - - // AngleAxis - VERIFY_IS_APPROX(AngleAxisx(a,v1.normalized()).toRotationMatrix(), - Quaternionx(AngleAxisx(a,v1.normalized())).toRotationMatrix()); - - AngleAxisx aa1; - m = q1.toRotationMatrix(); - aa1 = m; - VERIFY_IS_APPROX(AngleAxisx(m).toRotationMatrix(), - Quaternionx(m).toRotationMatrix()); - - // Transform - // TODO complete the tests ! - a = 0; - while (ei_abs(a)(-Scalar(0.4)*Scalar(M_PI), Scalar(0.4)*Scalar(M_PI)); - q1 = AngleAxisx(a, v0.normalized()); - Transform3 t0, t1, t2; - // first test setIdentity() and Identity() - t0.setIdentity(); - VERIFY_IS_APPROX(t0.matrix(), Transform3::MatrixType::Identity()); - t0.matrix().setZero(); - t0 = Transform3::Identity(); - VERIFY_IS_APPROX(t0.matrix(), Transform3::MatrixType::Identity()); - - t0.linear() = q1.toRotationMatrix(); - t1.setIdentity(); - t1.linear() = q1.toRotationMatrix(); - - v0 << 50, 2, 1;//= ei_random_matrix().cwiseProduct(Vector3(10,2,0.5)); - t0.scale(v0); - t1.prescale(v0); - - VERIFY_IS_APPROX( (t0 * Vector3(1,0,0)).norm(), v0.x()); - //VERIFY(!ei_isApprox((t1 * Vector3(1,0,0)).norm(), v0.x())); - - t0.setIdentity(); - t1.setIdentity(); - v1 << 1, 2, 3; - t0.linear() = q1.toRotationMatrix(); - t0.pretranslate(v0); - t0.scale(v1); - t1.linear() = q1.conjugate().toRotationMatrix(); - t1.prescale(v1.cwise().inverse()); - t1.translate(-v0); - - VERIFY((t0.matrix() * t1.matrix()).isIdentity(test_precision())); - - t1.fromPositionOrientationScale(v0, q1, v1); - VERIFY_IS_APPROX(t1.matrix(), t0.matrix()); - VERIFY_IS_APPROX(t1*v1, t0*v1); - - t0.setIdentity(); t0.scale(v0).rotate(q1.toRotationMatrix()); - t1.setIdentity(); t1.scale(v0).rotate(q1); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - t0.setIdentity(); t0.scale(v0).rotate(AngleAxisx(q1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - VERIFY_IS_APPROX(t0.scale(a).matrix(), t1.scale(Vector3::Constant(a)).matrix()); - VERIFY_IS_APPROX(t0.prescale(a).matrix(), t1.prescale(Vector3::Constant(a)).matrix()); - - // More transform constructors, operator=, operator*= - - Matrix3 mat3 = Matrix3::Random(); - Matrix4 mat4; - mat4 << mat3 , Vector3::Zero() , Vector4::Zero().transpose(); - Transform3 tmat3(mat3), tmat4(mat4); - tmat4.matrix()(3,3) = Scalar(1); - VERIFY_IS_APPROX(tmat3.matrix(), tmat4.matrix()); - - Scalar a3 = ei_random(-Scalar(M_PI), Scalar(M_PI)); - Vector3 v3 = Vector3::Random().normalized(); - AngleAxisx aa3(a3, v3); - Transform3 t3(aa3); - Transform3 t4; - t4 = aa3; - VERIFY_IS_APPROX(t3.matrix(), t4.matrix()); - t4.rotate(AngleAxisx(-a3,v3)); - VERIFY_IS_APPROX(t4.matrix(), Matrix4::Identity()); - t4 *= aa3; - VERIFY_IS_APPROX(t3.matrix(), t4.matrix()); - - v3 = Vector3::Random(); - Translation3 tv3(v3); - Transform3 t5(tv3); - t4 = tv3; - VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); - t4.translate(-v3); - VERIFY_IS_APPROX(t4.matrix(), Matrix4::Identity()); - t4 *= tv3; - VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); - - Scaling3 sv3(v3); - Transform3 t6(sv3); - t4 = sv3; - VERIFY_IS_APPROX(t6.matrix(), t4.matrix()); - t4.scale(v3.cwise().inverse()); - VERIFY_IS_APPROX(t4.matrix(), Matrix4::Identity()); - t4 *= sv3; - VERIFY_IS_APPROX(t6.matrix(), t4.matrix()); - - // matrix * transform - VERIFY_IS_APPROX(Transform3(t3.matrix()*t4).matrix(), Transform3(t3*t4).matrix()); - - // chained Transform product - VERIFY_IS_APPROX(((t3*t4)*t5).matrix(), (t3*(t4*t5)).matrix()); - - // check that Transform product doesn't have aliasing problems - t5 = t4; - t5 = t5*t5; - VERIFY_IS_APPROX(t5, t4*t4); - - // 2D transformation - Transform2 t20, t21; - Vector2 v20 = Vector2::Random(); - Vector2 v21 = Vector2::Random(); - for (int k=0; k<2; ++k) - if (ei_abs(v21[k])(a).toRotationMatrix(); - VERIFY_IS_APPROX(t20.fromPositionOrientationScale(v20,a,v21).matrix(), - t21.pretranslate(v20).scale(v21).matrix()); - - t21.setIdentity(); - t21.linear() = Rotation2D(-a).toRotationMatrix(); - VERIFY( (t20.fromPositionOrientationScale(v20,a,v21) - * (t21.prescale(v21.cwise().inverse()).translate(-v20))).matrix().isIdentity(test_precision()) ); - - // Transform - new API - // 3D - t0.setIdentity(); - t0.rotate(q1).scale(v0).translate(v0); - // mat * scaling and mat * translation - t1 = (Matrix3(q1) * Scaling3(v0)) * Translation3(v0); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // mat * transformation and scaling * translation - t1 = Matrix3(q1) * (Scaling3(v0) * Translation3(v0)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - t0.setIdentity(); - t0.prerotate(q1).prescale(v0).pretranslate(v0); - // translation * scaling and transformation * mat - t1 = (Translation3(v0) * Scaling3(v0)) * Matrix3(q1); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // scaling * mat and translation * mat - t1 = Translation3(v0) * (Scaling3(v0) * Matrix3(q1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - t0.setIdentity(); - t0.scale(v0).translate(v0).rotate(q1); - // translation * mat and scaling * transformation - t1 = Scaling3(v0) * (Translation3(v0) * Matrix3(q1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // transformation * scaling - t0.scale(v0); - t1 = t1 * Scaling3(v0); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // transformation * translation - t0.translate(v0); - t1 = t1 * Translation3(v0); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // translation * transformation - t0.pretranslate(v0); - t1 = Translation3(v0) * t1; - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // transform * quaternion - t0.rotate(q1); - t1 = t1 * q1; - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // translation * quaternion - t0.translate(v1).rotate(q1); - t1 = t1 * (Translation3(v1) * q1); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // scaling * quaternion - t0.scale(v1).rotate(q1); - t1 = t1 * (Scaling3(v1) * q1); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // quaternion * transform - t0.prerotate(q1); - t1 = q1 * t1; - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // quaternion * translation - t0.rotate(q1).translate(v1); - t1 = t1 * (q1 * Translation3(v1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // quaternion * scaling - t0.rotate(q1).scale(v1); - t1 = t1 * (q1 * Scaling3(v1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // translation * vector - t0.setIdentity(); - t0.translate(v0); - VERIFY_IS_APPROX(t0 * v1, Translation3(v0) * v1); - - // scaling * vector - t0.setIdentity(); - t0.scale(v0); - VERIFY_IS_APPROX(t0 * v1, Scaling3(v0) * v1); - - // test transform inversion - t0.setIdentity(); - t0.translate(v0); - t0.linear().setRandom(); - VERIFY_IS_APPROX(t0.inverse(Affine), t0.matrix().inverse()); - t0.setIdentity(); - t0.translate(v0).rotate(q1); - VERIFY_IS_APPROX(t0.inverse(Isometry), t0.matrix().inverse()); - - // test extract rotation and scaling - t0.setIdentity(); - t0.translate(v0).rotate(q1).scale(v1); - VERIFY_IS_APPROX(t0.rotation() * v1, Matrix3(q1) * v1); - - Matrix3 mat_rotation, mat_scaling; - t0.setIdentity(); - t0.translate(v0).rotate(q1).scale(v1); - t0.computeRotationScaling(&mat_rotation, &mat_scaling); - VERIFY_IS_APPROX(t0.linear(), mat_rotation * mat_scaling); - VERIFY_IS_APPROX(mat_rotation*mat_rotation.adjoint(), Matrix3::Identity()); - VERIFY_IS_APPROX(mat_rotation.determinant(), Scalar(1)); - t0.computeScalingRotation(&mat_scaling, &mat_rotation); - VERIFY_IS_APPROX(t0.linear(), mat_scaling * mat_rotation); - VERIFY_IS_APPROX(mat_rotation*mat_rotation.adjoint(), Matrix3::Identity()); - VERIFY_IS_APPROX(mat_rotation.determinant(), Scalar(1)); - - // test casting - Transform t1f = t1.template cast(); - VERIFY_IS_APPROX(t1f.template cast(),t1); - Transform t1d = t1.template cast(); - VERIFY_IS_APPROX(t1d.template cast(),t1); - - Translation3 tr1(v0); - Translation tr1f = tr1.template cast(); - VERIFY_IS_APPROX(tr1f.template cast(),tr1); - Translation tr1d = tr1.template cast(); - VERIFY_IS_APPROX(tr1d.template cast(),tr1); - - Scaling3 sc1(v0); - Scaling sc1f = sc1.template cast(); - VERIFY_IS_APPROX(sc1f.template cast(),sc1); - Scaling sc1d = sc1.template cast(); - VERIFY_IS_APPROX(sc1d.template cast(),sc1); - - Quaternion q1f = q1.template cast(); - VERIFY_IS_APPROX(q1f.template cast(),q1); - Quaternion q1d = q1.template cast(); - VERIFY_IS_APPROX(q1d.template cast(),q1); - - AngleAxis aa1f = aa1.template cast(); - VERIFY_IS_APPROX(aa1f.template cast(),aa1); - AngleAxis aa1d = aa1.template cast(); - VERIFY_IS_APPROX(aa1d.template cast(),aa1); - - Rotation2D r2d1(ei_random()); - Rotation2D r2d1f = r2d1.template cast(); - VERIFY_IS_APPROX(r2d1f.template cast(),r2d1); - Rotation2D r2d1d = r2d1.template cast(); - VERIFY_IS_APPROX(r2d1d.template cast(),r2d1); - - m = q1; -// m.col(1) = Vector3(0,ei_random(),ei_random()).normalized(); -// m.col(0) = Vector3(-1,0,0).normalized(); -// m.col(2) = m.col(0).cross(m.col(1)); - #define VERIFY_EULER(I,J,K, X,Y,Z) { \ - Vector3 ea = m.eulerAngles(I,J,K); \ - Matrix3 m1 = Matrix3(AngleAxisx(ea[0], Vector3::Unit##X()) * AngleAxisx(ea[1], Vector3::Unit##Y()) * AngleAxisx(ea[2], Vector3::Unit##Z())); \ - VERIFY_IS_APPROX(m, m1); \ - VERIFY_IS_APPROX(m, Matrix3(AngleAxisx(ea[0], Vector3::Unit##X()) * AngleAxisx(ea[1], Vector3::Unit##Y()) * AngleAxisx(ea[2], Vector3::Unit##Z()))); \ - } - VERIFY_EULER(0,1,2, X,Y,Z); - VERIFY_EULER(0,1,0, X,Y,X); - VERIFY_EULER(0,2,1, X,Z,Y); - VERIFY_EULER(0,2,0, X,Z,X); - - VERIFY_EULER(1,2,0, Y,Z,X); - VERIFY_EULER(1,2,1, Y,Z,Y); - VERIFY_EULER(1,0,2, Y,X,Z); - VERIFY_EULER(1,0,1, Y,X,Y); - - VERIFY_EULER(2,0,1, Z,X,Y); - VERIFY_EULER(2,0,2, Z,X,Z); - VERIFY_EULER(2,1,0, Z,Y,X); - VERIFY_EULER(2,1,2, Z,Y,Z); - - // colwise/rowwise cross product - mat3.setRandom(); - Vector3 vec3 = Vector3::Random(); - Matrix3 mcross; - int i = ei_random(0,2); - mcross = mat3.colwise().cross(vec3); - VERIFY_IS_APPROX(mcross.col(i), mat3.col(i).cross(vec3)); - mcross = mat3.rowwise().cross(vec3); - VERIFY_IS_APPROX(mcross.row(i), mat3.row(i).cross(vec3)); - - -} - -void test_eigen2_geometry() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( geometry() ); - CALL_SUBTEST_2( geometry() ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_geometry_with_eigen2_prefix.cpp b/external/eigen3/test/eigen2/eigen2_geometry_with_eigen2_prefix.cpp deleted file mode 100644 index 12d4a71..0000000 --- a/external/eigen3/test/eigen2/eigen2_geometry_with_eigen2_prefix.cpp +++ /dev/null @@ -1,435 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN2_SUPPORT_STAGE15_RESOLVE_API_CONFLICTS_WARN - -#include "main.h" -#include -#include -#include - -template void geometry(void) -{ - /* this test covers the following files: - Cross.h Quaternion.h, Transform.cpp - */ - - typedef Matrix Matrix2; - typedef Matrix Matrix3; - typedef Matrix Matrix4; - typedef Matrix Vector2; - typedef Matrix Vector3; - typedef Matrix Vector4; - typedef eigen2_Quaternion Quaternionx; - typedef eigen2_AngleAxis AngleAxisx; - typedef eigen2_Transform Transform2; - typedef eigen2_Transform Transform3; - typedef eigen2_Scaling Scaling2; - typedef eigen2_Scaling Scaling3; - typedef eigen2_Translation Translation2; - typedef eigen2_Translation Translation3; - - Scalar largeEps = test_precision(); - if (ei_is_same_type::ret) - largeEps = 1e-2f; - - Vector3 v0 = Vector3::Random(), - v1 = Vector3::Random(), - v2 = Vector3::Random(); - Vector2 u0 = Vector2::Random(); - Matrix3 matrot1; - - Scalar a = ei_random(-Scalar(M_PI), Scalar(M_PI)); - - // cross product - VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(v2).eigen2_dot(v1), Scalar(1)); - Matrix3 m; - m << v0.normalized(), - (v0.cross(v1)).normalized(), - (v0.cross(v1).cross(v0)).normalized(); - VERIFY(m.isUnitary()); - - // Quaternion: Identity(), setIdentity(); - Quaternionx q1, q2; - q2.setIdentity(); - VERIFY_IS_APPROX(Quaternionx(Quaternionx::Identity()).coeffs(), q2.coeffs()); - q1.coeffs().setRandom(); - VERIFY_IS_APPROX(q1.coeffs(), (q1*q2).coeffs()); - - // unitOrthogonal - VERIFY_IS_MUCH_SMALLER_THAN(u0.unitOrthogonal().eigen2_dot(u0), Scalar(1)); - VERIFY_IS_MUCH_SMALLER_THAN(v0.unitOrthogonal().eigen2_dot(v0), Scalar(1)); - VERIFY_IS_APPROX(u0.unitOrthogonal().norm(), Scalar(1)); - VERIFY_IS_APPROX(v0.unitOrthogonal().norm(), Scalar(1)); - - - VERIFY_IS_APPROX(v0, AngleAxisx(a, v0.normalized()) * v0); - VERIFY_IS_APPROX(-v0, AngleAxisx(Scalar(M_PI), v0.unitOrthogonal()) * v0); - VERIFY_IS_APPROX(ei_cos(a)*v0.squaredNorm(), v0.eigen2_dot(AngleAxisx(a, v0.unitOrthogonal()) * v0)); - m = AngleAxisx(a, v0.normalized()).toRotationMatrix().adjoint(); - VERIFY_IS_APPROX(Matrix3::Identity(), m * AngleAxisx(a, v0.normalized())); - VERIFY_IS_APPROX(Matrix3::Identity(), AngleAxisx(a, v0.normalized()) * m); - - q1 = AngleAxisx(a, v0.normalized()); - q2 = AngleAxisx(a, v1.normalized()); - - // angular distance - Scalar refangle = ei_abs(AngleAxisx(q1.inverse()*q2).angle()); - if (refangle>Scalar(M_PI)) - refangle = Scalar(2)*Scalar(M_PI) - refangle; - - if((q1.coeffs()-q2.coeffs()).norm() > 10*largeEps) - { - VERIFY(ei_isApprox(q1.angularDistance(q2), refangle, largeEps)); - } - - // rotation matrix conversion - VERIFY_IS_APPROX(q1 * v2, q1.toRotationMatrix() * v2); - VERIFY_IS_APPROX(q1 * q2 * v2, - q1.toRotationMatrix() * q2.toRotationMatrix() * v2); - - VERIFY( (q2*q1).isApprox(q1*q2, largeEps) || !(q2 * q1 * v2).isApprox( - q1.toRotationMatrix() * q2.toRotationMatrix() * v2)); - - q2 = q1.toRotationMatrix(); - VERIFY_IS_APPROX(q1*v1,q2*v1); - - matrot1 = AngleAxisx(Scalar(0.1), Vector3::UnitX()) - * AngleAxisx(Scalar(0.2), Vector3::UnitY()) - * AngleAxisx(Scalar(0.3), Vector3::UnitZ()); - VERIFY_IS_APPROX(matrot1 * v1, - AngleAxisx(Scalar(0.1), Vector3(1,0,0)).toRotationMatrix() - * (AngleAxisx(Scalar(0.2), Vector3(0,1,0)).toRotationMatrix() - * (AngleAxisx(Scalar(0.3), Vector3(0,0,1)).toRotationMatrix() * v1))); - - // angle-axis conversion - AngleAxisx aa = q1; - VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); - VERIFY_IS_NOT_APPROX(q1 * v1, Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1); - - // from two vector creation - VERIFY_IS_APPROX(v2.normalized(),(q2.setFromTwoVectors(v1,v2)*v1).normalized()); - VERIFY_IS_APPROX(v2.normalized(),(q2.setFromTwoVectors(v1,v2)*v1).normalized()); - - // inverse and conjugate - VERIFY_IS_APPROX(q1 * (q1.inverse() * v1), v1); - VERIFY_IS_APPROX(q1 * (q1.conjugate() * v1), v1); - - // AngleAxis - VERIFY_IS_APPROX(AngleAxisx(a,v1.normalized()).toRotationMatrix(), - Quaternionx(AngleAxisx(a,v1.normalized())).toRotationMatrix()); - - AngleAxisx aa1; - m = q1.toRotationMatrix(); - aa1 = m; - VERIFY_IS_APPROX(AngleAxisx(m).toRotationMatrix(), - Quaternionx(m).toRotationMatrix()); - - // Transform - // TODO complete the tests ! - a = 0; - while (ei_abs(a)(-Scalar(0.4)*Scalar(M_PI), Scalar(0.4)*Scalar(M_PI)); - q1 = AngleAxisx(a, v0.normalized()); - Transform3 t0, t1, t2; - // first test setIdentity() and Identity() - t0.setIdentity(); - VERIFY_IS_APPROX(t0.matrix(), Transform3::MatrixType::Identity()); - t0.matrix().setZero(); - t0 = Transform3::Identity(); - VERIFY_IS_APPROX(t0.matrix(), Transform3::MatrixType::Identity()); - - t0.linear() = q1.toRotationMatrix(); - t1.setIdentity(); - t1.linear() = q1.toRotationMatrix(); - - v0 << 50, 2, 1;//= ei_random_matrix().cwiseProduct(Vector3(10,2,0.5)); - t0.scale(v0); - t1.prescale(v0); - - VERIFY_IS_APPROX( (t0 * Vector3(1,0,0)).norm(), v0.x()); - //VERIFY(!ei_isApprox((t1 * Vector3(1,0,0)).norm(), v0.x())); - - t0.setIdentity(); - t1.setIdentity(); - v1 << 1, 2, 3; - t0.linear() = q1.toRotationMatrix(); - t0.pretranslate(v0); - t0.scale(v1); - t1.linear() = q1.conjugate().toRotationMatrix(); - t1.prescale(v1.cwise().inverse()); - t1.translate(-v0); - - VERIFY((t0.matrix() * t1.matrix()).isIdentity(test_precision())); - - t1.fromPositionOrientationScale(v0, q1, v1); - VERIFY_IS_APPROX(t1.matrix(), t0.matrix()); - VERIFY_IS_APPROX(t1*v1, t0*v1); - - t0.setIdentity(); t0.scale(v0).rotate(q1.toRotationMatrix()); - t1.setIdentity(); t1.scale(v0).rotate(q1); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - t0.setIdentity(); t0.scale(v0).rotate(AngleAxisx(q1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - VERIFY_IS_APPROX(t0.scale(a).matrix(), t1.scale(Vector3::Constant(a)).matrix()); - VERIFY_IS_APPROX(t0.prescale(a).matrix(), t1.prescale(Vector3::Constant(a)).matrix()); - - // More transform constructors, operator=, operator*= - - Matrix3 mat3 = Matrix3::Random(); - Matrix4 mat4; - mat4 << mat3 , Vector3::Zero() , Vector4::Zero().transpose(); - Transform3 tmat3(mat3), tmat4(mat4); - tmat4.matrix()(3,3) = Scalar(1); - VERIFY_IS_APPROX(tmat3.matrix(), tmat4.matrix()); - - Scalar a3 = ei_random(-Scalar(M_PI), Scalar(M_PI)); - Vector3 v3 = Vector3::Random().normalized(); - AngleAxisx aa3(a3, v3); - Transform3 t3(aa3); - Transform3 t4; - t4 = aa3; - VERIFY_IS_APPROX(t3.matrix(), t4.matrix()); - t4.rotate(AngleAxisx(-a3,v3)); - VERIFY_IS_APPROX(t4.matrix(), Matrix4::Identity()); - t4 *= aa3; - VERIFY_IS_APPROX(t3.matrix(), t4.matrix()); - - v3 = Vector3::Random(); - Translation3 tv3(v3); - Transform3 t5(tv3); - t4 = tv3; - VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); - t4.translate(-v3); - VERIFY_IS_APPROX(t4.matrix(), Matrix4::Identity()); - t4 *= tv3; - VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); - - Scaling3 sv3(v3); - Transform3 t6(sv3); - t4 = sv3; - VERIFY_IS_APPROX(t6.matrix(), t4.matrix()); - t4.scale(v3.cwise().inverse()); - VERIFY_IS_APPROX(t4.matrix(), Matrix4::Identity()); - t4 *= sv3; - VERIFY_IS_APPROX(t6.matrix(), t4.matrix()); - - // matrix * transform - VERIFY_IS_APPROX(Transform3(t3.matrix()*t4).matrix(), Transform3(t3*t4).matrix()); - - // chained Transform product - VERIFY_IS_APPROX(((t3*t4)*t5).matrix(), (t3*(t4*t5)).matrix()); - - // check that Transform product doesn't have aliasing problems - t5 = t4; - t5 = t5*t5; - VERIFY_IS_APPROX(t5, t4*t4); - - // 2D transformation - Transform2 t20, t21; - Vector2 v20 = Vector2::Random(); - Vector2 v21 = Vector2::Random(); - for (int k=0; k<2; ++k) - if (ei_abs(v21[k])(a).toRotationMatrix(); - VERIFY_IS_APPROX(t20.fromPositionOrientationScale(v20,a,v21).matrix(), - t21.pretranslate(v20).scale(v21).matrix()); - - t21.setIdentity(); - t21.linear() = Rotation2D(-a).toRotationMatrix(); - VERIFY( (t20.fromPositionOrientationScale(v20,a,v21) - * (t21.prescale(v21.cwise().inverse()).translate(-v20))).matrix().isIdentity(test_precision()) ); - - // Transform - new API - // 3D - t0.setIdentity(); - t0.rotate(q1).scale(v0).translate(v0); - // mat * scaling and mat * translation - t1 = (Matrix3(q1) * Scaling3(v0)) * Translation3(v0); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // mat * transformation and scaling * translation - t1 = Matrix3(q1) * (Scaling3(v0) * Translation3(v0)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - t0.setIdentity(); - t0.prerotate(q1).prescale(v0).pretranslate(v0); - // translation * scaling and transformation * mat - t1 = (Translation3(v0) * Scaling3(v0)) * Matrix3(q1); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // scaling * mat and translation * mat - t1 = Translation3(v0) * (Scaling3(v0) * Matrix3(q1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - t0.setIdentity(); - t0.scale(v0).translate(v0).rotate(q1); - // translation * mat and scaling * transformation - t1 = Scaling3(v0) * (Translation3(v0) * Matrix3(q1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // transformation * scaling - t0.scale(v0); - t1 = t1 * Scaling3(v0); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // transformation * translation - t0.translate(v0); - t1 = t1 * Translation3(v0); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - // translation * transformation - t0.pretranslate(v0); - t1 = Translation3(v0) * t1; - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // transform * quaternion - t0.rotate(q1); - t1 = t1 * q1; - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // translation * quaternion - t0.translate(v1).rotate(q1); - t1 = t1 * (Translation3(v1) * q1); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // scaling * quaternion - t0.scale(v1).rotate(q1); - t1 = t1 * (Scaling3(v1) * q1); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // quaternion * transform - t0.prerotate(q1); - t1 = q1 * t1; - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // quaternion * translation - t0.rotate(q1).translate(v1); - t1 = t1 * (q1 * Translation3(v1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // quaternion * scaling - t0.rotate(q1).scale(v1); - t1 = t1 * (q1 * Scaling3(v1)); - VERIFY_IS_APPROX(t0.matrix(), t1.matrix()); - - // translation * vector - t0.setIdentity(); - t0.translate(v0); - VERIFY_IS_APPROX(t0 * v1, Translation3(v0) * v1); - - // scaling * vector - t0.setIdentity(); - t0.scale(v0); - VERIFY_IS_APPROX(t0 * v1, Scaling3(v0) * v1); - - // test transform inversion - t0.setIdentity(); - t0.translate(v0); - t0.linear().setRandom(); - VERIFY_IS_APPROX(t0.inverse(Affine), t0.matrix().inverse()); - t0.setIdentity(); - t0.translate(v0).rotate(q1); - VERIFY_IS_APPROX(t0.inverse(Isometry), t0.matrix().inverse()); - - // test extract rotation and scaling - t0.setIdentity(); - t0.translate(v0).rotate(q1).scale(v1); - VERIFY_IS_APPROX(t0.rotation() * v1, Matrix3(q1) * v1); - - Matrix3 mat_rotation, mat_scaling; - t0.setIdentity(); - t0.translate(v0).rotate(q1).scale(v1); - t0.computeRotationScaling(&mat_rotation, &mat_scaling); - VERIFY_IS_APPROX(t0.linear(), mat_rotation * mat_scaling); - VERIFY_IS_APPROX(mat_rotation*mat_rotation.adjoint(), Matrix3::Identity()); - VERIFY_IS_APPROX(mat_rotation.determinant(), Scalar(1)); - t0.computeScalingRotation(&mat_scaling, &mat_rotation); - VERIFY_IS_APPROX(t0.linear(), mat_scaling * mat_rotation); - VERIFY_IS_APPROX(mat_rotation*mat_rotation.adjoint(), Matrix3::Identity()); - VERIFY_IS_APPROX(mat_rotation.determinant(), Scalar(1)); - - // test casting - eigen2_Transform t1f = t1.template cast(); - VERIFY_IS_APPROX(t1f.template cast(),t1); - eigen2_Transform t1d = t1.template cast(); - VERIFY_IS_APPROX(t1d.template cast(),t1); - - Translation3 tr1(v0); - eigen2_Translation tr1f = tr1.template cast(); - VERIFY_IS_APPROX(tr1f.template cast(),tr1); - eigen2_Translation tr1d = tr1.template cast(); - VERIFY_IS_APPROX(tr1d.template cast(),tr1); - - Scaling3 sc1(v0); - eigen2_Scaling sc1f = sc1.template cast(); - VERIFY_IS_APPROX(sc1f.template cast(),sc1); - eigen2_Scaling sc1d = sc1.template cast(); - VERIFY_IS_APPROX(sc1d.template cast(),sc1); - - eigen2_Quaternion q1f = q1.template cast(); - VERIFY_IS_APPROX(q1f.template cast(),q1); - eigen2_Quaternion q1d = q1.template cast(); - VERIFY_IS_APPROX(q1d.template cast(),q1); - - eigen2_AngleAxis aa1f = aa1.template cast(); - VERIFY_IS_APPROX(aa1f.template cast(),aa1); - eigen2_AngleAxis aa1d = aa1.template cast(); - VERIFY_IS_APPROX(aa1d.template cast(),aa1); - - eigen2_Rotation2D r2d1(ei_random()); - eigen2_Rotation2D r2d1f = r2d1.template cast(); - VERIFY_IS_APPROX(r2d1f.template cast(),r2d1); - eigen2_Rotation2D r2d1d = r2d1.template cast(); - VERIFY_IS_APPROX(r2d1d.template cast(),r2d1); - - m = q1; -// m.col(1) = Vector3(0,ei_random(),ei_random()).normalized(); -// m.col(0) = Vector3(-1,0,0).normalized(); -// m.col(2) = m.col(0).cross(m.col(1)); - #define VERIFY_EULER(I,J,K, X,Y,Z) { \ - Vector3 ea = m.eulerAngles(I,J,K); \ - Matrix3 m1 = Matrix3(AngleAxisx(ea[0], Vector3::Unit##X()) * AngleAxisx(ea[1], Vector3::Unit##Y()) * AngleAxisx(ea[2], Vector3::Unit##Z())); \ - VERIFY_IS_APPROX(m, m1); \ - VERIFY_IS_APPROX(m, Matrix3(AngleAxisx(ea[0], Vector3::Unit##X()) * AngleAxisx(ea[1], Vector3::Unit##Y()) * AngleAxisx(ea[2], Vector3::Unit##Z()))); \ - } - VERIFY_EULER(0,1,2, X,Y,Z); - VERIFY_EULER(0,1,0, X,Y,X); - VERIFY_EULER(0,2,1, X,Z,Y); - VERIFY_EULER(0,2,0, X,Z,X); - - VERIFY_EULER(1,2,0, Y,Z,X); - VERIFY_EULER(1,2,1, Y,Z,Y); - VERIFY_EULER(1,0,2, Y,X,Z); - VERIFY_EULER(1,0,1, Y,X,Y); - - VERIFY_EULER(2,0,1, Z,X,Y); - VERIFY_EULER(2,0,2, Z,X,Z); - VERIFY_EULER(2,1,0, Z,Y,X); - VERIFY_EULER(2,1,2, Z,Y,Z); - - // colwise/rowwise cross product - mat3.setRandom(); - Vector3 vec3 = Vector3::Random(); - Matrix3 mcross; - int i = ei_random(0,2); - mcross = mat3.colwise().cross(vec3); - VERIFY_IS_APPROX(mcross.col(i), mat3.col(i).cross(vec3)); - mcross = mat3.rowwise().cross(vec3); - VERIFY_IS_APPROX(mcross.row(i), mat3.row(i).cross(vec3)); - - -} - -void test_eigen2_geometry_with_eigen2_prefix() -{ - std::cout << "eigen2 support: " << EIGEN2_SUPPORT_STAGE << std::endl; - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( geometry() ); - CALL_SUBTEST_2( geometry() ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_hyperplane.cpp b/external/eigen3/test/eigen2/eigen2_hyperplane.cpp deleted file mode 100644 index f3f85e1..0000000 --- a/external/eigen3/test/eigen2/eigen2_hyperplane.cpp +++ /dev/null @@ -1,126 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include -#include -#include - -template void hyperplane(const HyperplaneType& _plane) -{ - /* this test covers the following files: - Hyperplane.h - */ - - const int dim = _plane.dim(); - typedef typename HyperplaneType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - typedef Matrix MatrixType; - - VectorType p0 = VectorType::Random(dim); - VectorType p1 = VectorType::Random(dim); - - VectorType n0 = VectorType::Random(dim).normalized(); - VectorType n1 = VectorType::Random(dim).normalized(); - - HyperplaneType pl0(n0, p0); - HyperplaneType pl1(n1, p1); - HyperplaneType pl2 = pl1; - - Scalar s0 = ei_random(); - Scalar s1 = ei_random(); - - VERIFY_IS_APPROX( n1.eigen2_dot(n1), Scalar(1) ); - - VERIFY_IS_MUCH_SMALLER_THAN( pl0.absDistance(p0), Scalar(1) ); - VERIFY_IS_APPROX( pl1.signedDistance(p1 + n1 * s0), s0 ); - VERIFY_IS_MUCH_SMALLER_THAN( pl1.signedDistance(pl1.projection(p0)), Scalar(1) ); - VERIFY_IS_MUCH_SMALLER_THAN( pl1.absDistance(p1 + pl1.normal().unitOrthogonal() * s1), Scalar(1) ); - - // transform - if (!NumTraits::IsComplex) - { - MatrixType rot = MatrixType::Random(dim,dim).qr().matrixQ(); - Scaling scaling(VectorType::Random()); - Translation translation(VectorType::Random()); - - pl2 = pl1; - VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot).absDistance(rot * p1), Scalar(1) ); - pl2 = pl1; - VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot,Isometry).absDistance(rot * p1), Scalar(1) ); - pl2 = pl1; - VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling).absDistance((rot*scaling) * p1), Scalar(1) ); - pl2 = pl1; - VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling*translation) - .absDistance((rot*scaling*translation) * p1), Scalar(1) ); - pl2 = pl1; - VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*translation,Isometry) - .absDistance((rot*translation) * p1), Scalar(1) ); - } - - // casting - const int Dim = HyperplaneType::AmbientDimAtCompileTime; - typedef typename GetDifferentType::type OtherScalar; - Hyperplane hp1f = pl1.template cast(); - VERIFY_IS_APPROX(hp1f.template cast(),pl1); - Hyperplane hp1d = pl1.template cast(); - VERIFY_IS_APPROX(hp1d.template cast(),pl1); -} - -template void lines() -{ - typedef Hyperplane HLine; - typedef ParametrizedLine PLine; - typedef Matrix Vector; - typedef Matrix CoeffsType; - - for(int i = 0; i < 10; i++) - { - Vector center = Vector::Random(); - Vector u = Vector::Random(); - Vector v = Vector::Random(); - Scalar a = ei_random(); - while (ei_abs(a-1) < 1e-4) a = ei_random(); - while (u.norm() < 1e-4) u = Vector::Random(); - while (v.norm() < 1e-4) v = Vector::Random(); - - HLine line_u = HLine::Through(center + u, center + a*u); - HLine line_v = HLine::Through(center + v, center + a*v); - - // the line equations should be normalized so that a^2+b^2=1 - VERIFY_IS_APPROX(line_u.normal().norm(), Scalar(1)); - VERIFY_IS_APPROX(line_v.normal().norm(), Scalar(1)); - - Vector result = line_u.intersection(line_v); - - // the lines should intersect at the point we called "center" - VERIFY_IS_APPROX(result, center); - - // check conversions between two types of lines - PLine pl(line_u); // gcc 3.3 will commit suicide if we don't name this variable - CoeffsType converted_coeffs(HLine(pl).coeffs()); - converted_coeffs *= line_u.coeffs()(0)/converted_coeffs(0); - VERIFY(line_u.coeffs().isApprox(converted_coeffs)); - } -} - -void test_eigen2_hyperplane() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( hyperplane(Hyperplane()) ); - CALL_SUBTEST_2( hyperplane(Hyperplane()) ); - CALL_SUBTEST_3( hyperplane(Hyperplane()) ); - CALL_SUBTEST_4( hyperplane(Hyperplane,5>()) ); - CALL_SUBTEST_5( lines() ); - CALL_SUBTEST_6( lines() ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_inverse.cpp b/external/eigen3/test/eigen2/eigen2_inverse.cpp deleted file mode 100644 index ccd24a1..0000000 --- a/external/eigen3/test/eigen2/eigen2_inverse.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include - -template void inverse(const MatrixType& m) -{ - /* this test covers the following files: - Inverse.h - */ - int rows = m.rows(); - int cols = m.cols(); - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - - MatrixType m1 = MatrixType::Random(rows, cols), - m2(rows, cols), - identity = MatrixType::Identity(rows, rows); - - while(ei_abs(m1.determinant()) < RealScalar(0.1) && rows <= 8) - { - m1 = MatrixType::Random(rows, cols); - } - - m2 = m1.inverse(); - VERIFY_IS_APPROX(m1, m2.inverse() ); - - m1.computeInverse(&m2); - VERIFY_IS_APPROX(m1, m2.inverse() ); - - VERIFY_IS_APPROX((Scalar(2)*m2).inverse(), m2.inverse()*Scalar(0.5)); - - VERIFY_IS_APPROX(identity, m1.inverse() * m1 ); - VERIFY_IS_APPROX(identity, m1 * m1.inverse() ); - - VERIFY_IS_APPROX(m1, m1.inverse().inverse() ); - - // since for the general case we implement separately row-major and col-major, test that - VERIFY_IS_APPROX(m1.transpose().inverse(), m1.inverse().transpose()); -} - -void test_eigen2_inverse() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( inverse(Matrix()) ); - CALL_SUBTEST_2( inverse(Matrix2d()) ); - CALL_SUBTEST_3( inverse(Matrix3f()) ); - CALL_SUBTEST_4( inverse(Matrix4f()) ); - CALL_SUBTEST_5( inverse(MatrixXf(8,8)) ); - CALL_SUBTEST_6( inverse(MatrixXcd(7,7)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_linearstructure.cpp b/external/eigen3/test/eigen2/eigen2_linearstructure.cpp deleted file mode 100644 index 488f4c4..0000000 --- a/external/eigen3/test/eigen2/eigen2_linearstructure.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void linearStructure(const MatrixType& m) -{ - /* this test covers the following files: - Sum.h Difference.h Opposite.h ScalarMultiple.h - */ - - typedef typename MatrixType::Scalar Scalar; - typedef Matrix VectorType; - - int rows = m.rows(); - int cols = m.cols(); - - // this test relies a lot on Random.h, and there's not much more that we can do - // to test it, hence I consider that we will have tested Random.h - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols), - m3(rows, cols); - - Scalar s1 = ei_random(); - while (ei_abs(s1)<1e-3) s1 = ei_random(); - - int r = ei_random(0, rows-1), - c = ei_random(0, cols-1); - - VERIFY_IS_APPROX(-(-m1), m1); - VERIFY_IS_APPROX(m1+m1, 2*m1); - VERIFY_IS_APPROX(m1+m2-m1, m2); - VERIFY_IS_APPROX(-m2+m1+m2, m1); - VERIFY_IS_APPROX(m1*s1, s1*m1); - VERIFY_IS_APPROX((m1+m2)*s1, s1*m1+s1*m2); - VERIFY_IS_APPROX((-m1+m2)*s1, -s1*m1+s1*m2); - m3 = m2; m3 += m1; - VERIFY_IS_APPROX(m3, m1+m2); - m3 = m2; m3 -= m1; - VERIFY_IS_APPROX(m3, m2-m1); - m3 = m2; m3 *= s1; - VERIFY_IS_APPROX(m3, s1*m2); - if(NumTraits::HasFloatingPoint) - { - m3 = m2; m3 /= s1; - VERIFY_IS_APPROX(m3, m2/s1); - } - - // again, test operator() to check const-qualification - VERIFY_IS_APPROX((-m1)(r,c), -(m1(r,c))); - VERIFY_IS_APPROX((m1-m2)(r,c), (m1(r,c))-(m2(r,c))); - VERIFY_IS_APPROX((m1+m2)(r,c), (m1(r,c))+(m2(r,c))); - VERIFY_IS_APPROX((s1*m1)(r,c), s1*(m1(r,c))); - VERIFY_IS_APPROX((m1*s1)(r,c), (m1(r,c))*s1); - if(NumTraits::HasFloatingPoint) - VERIFY_IS_APPROX((m1/s1)(r,c), (m1(r,c))/s1); - - // use .block to disable vectorization and compare to the vectorized version - VERIFY_IS_APPROX(m1+m1.block(0,0,rows,cols), m1+m1); - VERIFY_IS_APPROX(m1.cwise() * m1.block(0,0,rows,cols), m1.cwise() * m1); - VERIFY_IS_APPROX(m1 - m1.block(0,0,rows,cols), m1 - m1); - VERIFY_IS_APPROX(m1.block(0,0,rows,cols) * s1, m1 * s1); -} - -void test_eigen2_linearstructure() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( linearStructure(Matrix()) ); - CALL_SUBTEST_2( linearStructure(Matrix2f()) ); - CALL_SUBTEST_3( linearStructure(Vector3d()) ); - CALL_SUBTEST_4( linearStructure(Matrix4d()) ); - CALL_SUBTEST_5( linearStructure(MatrixXcf(3, 3)) ); - CALL_SUBTEST_6( linearStructure(MatrixXf(8, 12)) ); - CALL_SUBTEST_7( linearStructure(MatrixXi(8, 12)) ); - CALL_SUBTEST_8( linearStructure(MatrixXcd(20, 20)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_lu.cpp b/external/eigen3/test/eigen2/eigen2_lu.cpp deleted file mode 100644 index e993b1c..0000000 --- a/external/eigen3/test/eigen2/eigen2_lu.cpp +++ /dev/null @@ -1,122 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include - -template -void doSomeRankPreservingOperations(Eigen::MatrixBase& m) -{ - typedef typename Derived::RealScalar RealScalar; - for(int a = 0; a < 3*(m.rows()+m.cols()); a++) - { - RealScalar d = Eigen::ei_random(-1,1); - int i = Eigen::ei_random(0,m.rows()-1); // i is a random row number - int j; - do { - j = Eigen::ei_random(0,m.rows()-1); - } while (i==j); // j is another one (must be different) - m.row(i) += d * m.row(j); - - i = Eigen::ei_random(0,m.cols()-1); // i is a random column number - do { - j = Eigen::ei_random(0,m.cols()-1); - } while (i==j); // j is another one (must be different) - m.col(i) += d * m.col(j); - } -} - -template void lu_non_invertible() -{ - /* this test covers the following files: - LU.h - */ - // NOTE there seems to be a problem with too small sizes -- could easily lie in the doSomeRankPreservingOperations function - int rows = ei_random(20,200), cols = ei_random(20,200), cols2 = ei_random(20,200); - int rank = ei_random(1, std::min(rows, cols)-1); - - MatrixType m1(rows, cols), m2(cols, cols2), m3(rows, cols2), k(1,1); - m1 = MatrixType::Random(rows,cols); - if(rows <= cols) - for(int i = rank; i < rows; i++) m1.row(i).setZero(); - else - for(int i = rank; i < cols; i++) m1.col(i).setZero(); - doSomeRankPreservingOperations(m1); - - LU lu(m1); - typename LU::KernelResultType m1kernel = lu.kernel(); - typename LU::ImageResultType m1image = lu.image(); - - VERIFY(rank == lu.rank()); - VERIFY(cols - lu.rank() == lu.dimensionOfKernel()); - VERIFY(!lu.isInjective()); - VERIFY(!lu.isInvertible()); - VERIFY(lu.isSurjective() == (lu.rank() == rows)); - VERIFY((m1 * m1kernel).isMuchSmallerThan(m1)); - VERIFY(m1image.lu().rank() == rank); - MatrixType sidebyside(m1.rows(), m1.cols() + m1image.cols()); - sidebyside << m1, m1image; - VERIFY(sidebyside.lu().rank() == rank); - m2 = MatrixType::Random(cols,cols2); - m3 = m1*m2; - m2 = MatrixType::Random(cols,cols2); - lu.solve(m3, &m2); - VERIFY_IS_APPROX(m3, m1*m2); - /* solve now always returns true - m3 = MatrixType::Random(rows,cols2); - VERIFY(!lu.solve(m3, &m2)); - */ -} - -template void lu_invertible() -{ - /* this test covers the following files: - LU.h - */ - typedef typename NumTraits::Real RealScalar; - int size = ei_random(10,200); - - MatrixType m1(size, size), m2(size, size), m3(size, size); - m1 = MatrixType::Random(size,size); - - if (ei_is_same_type::ret) - { - // let's build a matrix more stable to inverse - MatrixType a = MatrixType::Random(size,size*2); - m1 += a * a.adjoint(); - } - - LU lu(m1); - VERIFY(0 == lu.dimensionOfKernel()); - VERIFY(size == lu.rank()); - VERIFY(lu.isInjective()); - VERIFY(lu.isSurjective()); - VERIFY(lu.isInvertible()); - VERIFY(lu.image().lu().isInvertible()); - m3 = MatrixType::Random(size,size); - lu.solve(m3, &m2); - VERIFY_IS_APPROX(m3, m1*m2); - VERIFY_IS_APPROX(m2, lu.inverse()*m3); - m3 = MatrixType::Random(size,size); - VERIFY(lu.solve(m3, &m2)); -} - -void test_eigen2_lu() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( lu_non_invertible() ); - CALL_SUBTEST_2( lu_non_invertible() ); - CALL_SUBTEST_3( lu_non_invertible() ); - CALL_SUBTEST_4( lu_non_invertible() ); - CALL_SUBTEST_1( lu_invertible() ); - CALL_SUBTEST_2( lu_invertible() ); - CALL_SUBTEST_3( lu_invertible() ); - CALL_SUBTEST_4( lu_invertible() ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_map.cpp b/external/eigen3/test/eigen2/eigen2_map.cpp deleted file mode 100644 index 4a1c4e1..0000000 --- a/external/eigen3/test/eigen2/eigen2_map.cpp +++ /dev/null @@ -1,114 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2007-2010 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void map_class_vector(const VectorType& m) -{ - typedef typename VectorType::Scalar Scalar; - - int size = m.size(); - - // test Map.h - Scalar* array1 = ei_aligned_new(size); - Scalar* array2 = ei_aligned_new(size); - Scalar* array3 = new Scalar[size+1]; - Scalar* array3unaligned = std::size_t(array3)%16 == 0 ? array3+1 : array3; - - Map(array1, size) = VectorType::Random(size); - Map(array2, size) = Map(array1, size); - Map(array3unaligned, size) = Map((const Scalar*)array1, size); // test non-const-correctness support in eigen2 - VectorType ma1 = Map(array1, size); - VectorType ma2 = Map(array2, size); - VectorType ma3 = Map(array3unaligned, size); - VERIFY_IS_APPROX(ma1, ma2); - VERIFY_IS_APPROX(ma1, ma3); - - ei_aligned_delete(array1, size); - ei_aligned_delete(array2, size); - delete[] array3; -} - -template void map_class_matrix(const MatrixType& m) -{ - typedef typename MatrixType::Scalar Scalar; - - int rows = m.rows(), cols = m.cols(), size = rows*cols; - - // test Map.h - Scalar* array1 = ei_aligned_new(size); - for(int i = 0; i < size; i++) array1[i] = Scalar(1); - Scalar* array2 = ei_aligned_new(size); - for(int i = 0; i < size; i++) array2[i] = Scalar(1); - Scalar* array3 = new Scalar[size+1]; - for(int i = 0; i < size+1; i++) array3[i] = Scalar(1); - Scalar* array3unaligned = std::size_t(array3)%16 == 0 ? array3+1 : array3; - Map(array1, rows, cols) = MatrixType::Ones(rows,cols); - Map(array2, rows, cols) = Map((const Scalar*)array1, rows, cols); // test non-const-correctness support in eigen2 - Map(array3unaligned, rows, cols) = Map(array1, rows, cols); - MatrixType ma1 = Map(array1, rows, cols); - MatrixType ma2 = Map(array2, rows, cols); - VERIFY_IS_APPROX(ma1, ma2); - MatrixType ma3 = Map(array3unaligned, rows, cols); - VERIFY_IS_APPROX(ma1, ma3); - - ei_aligned_delete(array1, size); - ei_aligned_delete(array2, size); - delete[] array3; -} - -template void map_static_methods(const VectorType& m) -{ - typedef typename VectorType::Scalar Scalar; - - int size = m.size(); - - // test Map.h - Scalar* array1 = ei_aligned_new(size); - Scalar* array2 = ei_aligned_new(size); - Scalar* array3 = new Scalar[size+1]; - Scalar* array3unaligned = std::size_t(array3)%16 == 0 ? array3+1 : array3; - - VectorType::MapAligned(array1, size) = VectorType::Random(size); - VectorType::Map(array2, size) = VectorType::Map(array1, size); - VectorType::Map(array3unaligned, size) = VectorType::Map(array1, size); - VectorType ma1 = VectorType::Map(array1, size); - VectorType ma2 = VectorType::MapAligned(array2, size); - VectorType ma3 = VectorType::Map(array3unaligned, size); - VERIFY_IS_APPROX(ma1, ma2); - VERIFY_IS_APPROX(ma1, ma3); - - ei_aligned_delete(array1, size); - ei_aligned_delete(array2, size); - delete[] array3; -} - - -void test_eigen2_map() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( map_class_vector(Matrix()) ); - CALL_SUBTEST_2( map_class_vector(Vector4d()) ); - CALL_SUBTEST_3( map_class_vector(RowVector4f()) ); - CALL_SUBTEST_4( map_class_vector(VectorXcf(8)) ); - CALL_SUBTEST_5( map_class_vector(VectorXi(12)) ); - - CALL_SUBTEST_1( map_class_matrix(Matrix()) ); - CALL_SUBTEST_2( map_class_matrix(Matrix4d()) ); - CALL_SUBTEST_6( map_class_matrix(Matrix()) ); - CALL_SUBTEST_4( map_class_matrix(MatrixXcf(ei_random(1,10),ei_random(1,10))) ); - CALL_SUBTEST_5( map_class_matrix(MatrixXi(ei_random(1,10),ei_random(1,10))) ); - - CALL_SUBTEST_1( map_static_methods(Matrix()) ); - CALL_SUBTEST_2( map_static_methods(Vector3f()) ); - CALL_SUBTEST_7( map_static_methods(RowVector3d()) ); - CALL_SUBTEST_4( map_static_methods(VectorXcd(8)) ); - CALL_SUBTEST_5( map_static_methods(VectorXf(12)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_meta.cpp b/external/eigen3/test/eigen2/eigen2_meta.cpp deleted file mode 100644 index 1d01bd8..0000000 --- a/external/eigen3/test/eigen2/eigen2_meta.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -void test_eigen2_meta() -{ - typedef float & FloatRef; - typedef const float & ConstFloatRef; - - VERIFY((ei_meta_if<(3<4),ei_meta_true, ei_meta_false>::ret::ret)); - VERIFY(( ei_is_same_type::ret)); - VERIFY((!ei_is_same_type::ret)); - VERIFY((!ei_is_same_type::ret)); - VERIFY((!ei_is_same_type::ret)); - - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - VERIFY(( ei_is_same_type::type >::ret)); - - VERIFY(ei_meta_sqrt<1>::ret == 1); - #define VERIFY_META_SQRT(X) VERIFY(ei_meta_sqrt::ret == int(ei_sqrt(double(X)))) - VERIFY_META_SQRT(2); - VERIFY_META_SQRT(3); - VERIFY_META_SQRT(4); - VERIFY_META_SQRT(5); - VERIFY_META_SQRT(6); - VERIFY_META_SQRT(8); - VERIFY_META_SQRT(9); - VERIFY_META_SQRT(15); - VERIFY_META_SQRT(16); - VERIFY_META_SQRT(17); - VERIFY_META_SQRT(255); - VERIFY_META_SQRT(256); - VERIFY_META_SQRT(257); - VERIFY_META_SQRT(1023); - VERIFY_META_SQRT(1024); - VERIFY_META_SQRT(1025); -} diff --git a/external/eigen3/test/eigen2/eigen2_miscmatrices.cpp b/external/eigen3/test/eigen2/eigen2_miscmatrices.cpp deleted file mode 100644 index 8bbb20c..0000000 --- a/external/eigen3/test/eigen2/eigen2_miscmatrices.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void miscMatrices(const MatrixType& m) -{ - /* this test covers the following files: - DiagonalMatrix.h Ones.h - */ - - typedef typename MatrixType::Scalar Scalar; - typedef Matrix VectorType; - typedef Matrix RowVectorType; - int rows = m.rows(); - int cols = m.cols(); - - int r = ei_random(0, rows-1), r2 = ei_random(0, rows-1), c = ei_random(0, cols-1); - VERIFY_IS_APPROX(MatrixType::Ones(rows,cols)(r,c), static_cast(1)); - MatrixType m1 = MatrixType::Ones(rows,cols); - VERIFY_IS_APPROX(m1(r,c), static_cast(1)); - VectorType v1 = VectorType::Random(rows); - v1[0]; - Matrix - square = v1.asDiagonal(); - if(r==r2) VERIFY_IS_APPROX(square(r,r2), v1[r]); - else VERIFY_IS_MUCH_SMALLER_THAN(square(r,r2), static_cast(1)); - square = MatrixType::Zero(rows, rows); - square.diagonal() = VectorType::Ones(rows); - VERIFY_IS_APPROX(square, MatrixType::Identity(rows, rows)); -} - -void test_eigen2_miscmatrices() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( miscMatrices(Matrix()) ); - CALL_SUBTEST_2( miscMatrices(Matrix4d()) ); - CALL_SUBTEST_3( miscMatrices(MatrixXcf(3, 3)) ); - CALL_SUBTEST_4( miscMatrices(MatrixXi(8, 12)) ); - CALL_SUBTEST_5( miscMatrices(MatrixXcd(20, 20)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_mixingtypes.cpp b/external/eigen3/test/eigen2/eigen2_mixingtypes.cpp deleted file mode 100644 index fb5ac5d..0000000 --- a/external/eigen3/test/eigen2/eigen2_mixingtypes.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_NO_STATIC_ASSERT -#define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them -#endif - -#ifndef EIGEN_DONT_VECTORIZE -#define EIGEN_DONT_VECTORIZE // SSE intrinsics aren't designed to allow mixing types -#endif - -#include "main.h" - - -template void mixingtypes(int size = SizeAtCompileType) -{ - typedef Matrix Mat_f; - typedef Matrix Mat_d; - typedef Matrix, SizeAtCompileType, SizeAtCompileType> Mat_cf; - typedef Matrix, SizeAtCompileType, SizeAtCompileType> Mat_cd; - typedef Matrix Vec_f; - typedef Matrix Vec_d; - typedef Matrix, SizeAtCompileType, 1> Vec_cf; - typedef Matrix, SizeAtCompileType, 1> Vec_cd; - - Mat_f mf(size,size); - Mat_d md(size,size); - Mat_cf mcf(size,size); - Mat_cd mcd(size,size); - Vec_f vf(size,1); - Vec_d vd(size,1); - Vec_cf vcf(size,1); - Vec_cd vcd(size,1); - - mf+mf; - VERIFY_RAISES_ASSERT(mf+md); - VERIFY_RAISES_ASSERT(mf+mcf); - VERIFY_RAISES_ASSERT(vf=vd); - VERIFY_RAISES_ASSERT(vf+=vd); - VERIFY_RAISES_ASSERT(mcd=md); - - mf*mf; - md*mcd; - mcd*md; - mf*vcf; - mcf*vf; - mcf *= mf; - vcd = md*vcd; - vcf = mcf*vf; -#if 0 - // these are know generating hard build errors in eigen3 - VERIFY_RAISES_ASSERT(mf*md); - VERIFY_RAISES_ASSERT(mcf*mcd); - VERIFY_RAISES_ASSERT(mcf*vcd); - VERIFY_RAISES_ASSERT(vcf = mf*vf); - - vf.eigen2_dot(vf); - VERIFY_RAISES_ASSERT(vd.eigen2_dot(vf)); - VERIFY_RAISES_ASSERT(vcf.eigen2_dot(vf)); // yeah eventually we should allow this but i'm too lazy to make that change now in Dot.h - // especially as that might be rewritten as cwise product .sum() which would make that automatic. -#endif -} - -void test_eigen2_mixingtypes() -{ - // check that our operator new is indeed called: - CALL_SUBTEST_1(mixingtypes<3>()); - CALL_SUBTEST_2(mixingtypes<4>()); - CALL_SUBTEST_3(mixingtypes(20)); -} diff --git a/external/eigen3/test/eigen2/eigen2_newstdvector.cpp b/external/eigen3/test/eigen2/eigen2_newstdvector.cpp deleted file mode 100644 index 5f90099..0000000 --- a/external/eigen3/test/eigen2/eigen2_newstdvector.cpp +++ /dev/null @@ -1,149 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_USE_NEW_STDVECTOR -#include "main.h" -#include -#include - -template -void check_stdvector_matrix(const MatrixType& m) -{ - int rows = m.rows(); - int cols = m.cols(); - MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); - std::vector > v(10, MatrixType(rows,cols)), w(20, y); - v[5] = x; - w[6] = v[5]; - VERIFY_IS_APPROX(w[6], v[5]); - v = w; - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], v[i]); - } - - v.resize(21); - v[20] = x; - VERIFY_IS_APPROX(v[20], x); - v.resize(22,y); - VERIFY_IS_APPROX(v[21], y); - v.push_back(x); - VERIFY_IS_APPROX(v[22], x); - VERIFY((std::size_t)&(v[22]) == (std::size_t)&(v[21]) + sizeof(MatrixType)); - - // do a lot of push_back such that the vector gets internally resized - // (with memory reallocation) - MatrixType* ref = &w[0]; - for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) - v.push_back(w[i%w.size()]); - for(unsigned int i=23; i -void check_stdvector_transform(const TransformType&) -{ - typedef typename TransformType::MatrixType MatrixType; - TransformType x(MatrixType::Random()), y(MatrixType::Random()); - std::vector > v(10), w(20, y); - v[5] = x; - w[6] = v[5]; - VERIFY_IS_APPROX(w[6], v[5]); - v = w; - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], v[i]); - } - - v.resize(21); - v[20] = x; - VERIFY_IS_APPROX(v[20], x); - v.resize(22,y); - VERIFY_IS_APPROX(v[21], y); - v.push_back(x); - VERIFY_IS_APPROX(v[22], x); - VERIFY((std::size_t)&(v[22]) == (std::size_t)&(v[21]) + sizeof(TransformType)); - - // do a lot of push_back such that the vector gets internally resized - // (with memory reallocation) - TransformType* ref = &w[0]; - for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) - v.push_back(w[i%w.size()]); - for(unsigned int i=23; i -void check_stdvector_quaternion(const QuaternionType&) -{ - typedef typename QuaternionType::Coefficients Coefficients; - QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); - std::vector > v(10), w(20, y); - v[5] = x; - w[6] = v[5]; - VERIFY_IS_APPROX(w[6], v[5]); - v = w; - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], v[i]); - } - - v.resize(21); - v[20] = x; - VERIFY_IS_APPROX(v[20], x); - v.resize(22,y); - VERIFY_IS_APPROX(v[21], y); - v.push_back(x); - VERIFY_IS_APPROX(v[22], x); - VERIFY((std::size_t)&(v[22]) == (std::size_t)&(v[21]) + sizeof(QuaternionType)); - - // do a lot of push_back such that the vector gets internally resized - // (with memory reallocation) - QuaternionType* ref = &w[0]; - for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) - v.push_back(w[i%w.size()]); - for(unsigned int i=23; i -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// this hack is needed to make this file compiles with -pedantic (gcc) -#ifdef __GNUC__ -#define throw(X) -#endif -// discard stack allocation as that too bypasses malloc -#define EIGEN_STACK_ALLOCATION_LIMIT 0 -// any heap allocation will raise an assert -#define EIGEN_NO_MALLOC - -#include "main.h" - -template void nomalloc(const MatrixType& m) -{ - /* this test check no dynamic memory allocation are issued with fixed-size matrices - */ - - typedef typename MatrixType::Scalar Scalar; - - int rows = m.rows(); - int cols = m.cols(); - - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols); - - Scalar s1 = ei_random(); - - int r = ei_random(0, rows-1), - c = ei_random(0, cols-1); - - VERIFY_IS_APPROX((m1+m2)*s1, s1*m1+s1*m2); - VERIFY_IS_APPROX((m1+m2)(r,c), (m1(r,c))+(m2(r,c))); - VERIFY_IS_APPROX(m1.cwise() * m1.block(0,0,rows,cols), m1.cwise() * m1); - VERIFY_IS_APPROX((m1*m1.transpose())*m2, m1*(m1.transpose()*m2)); -} - -void test_eigen2_nomalloc() -{ - // check that our operator new is indeed called: - VERIFY_RAISES_ASSERT(MatrixXd dummy = MatrixXd::Random(3,3)); - CALL_SUBTEST_1( nomalloc(Matrix()) ); - CALL_SUBTEST_2( nomalloc(Matrix4d()) ); - CALL_SUBTEST_3( nomalloc(Matrix()) ); -} diff --git a/external/eigen3/test/eigen2/eigen2_packetmath.cpp b/external/eigen3/test/eigen2/eigen2_packetmath.cpp deleted file mode 100644 index b1f325f..0000000 --- a/external/eigen3/test/eigen2/eigen2_packetmath.cpp +++ /dev/null @@ -1,132 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -// using namespace Eigen; - -template bool areApprox(const Scalar* a, const Scalar* b, int size) -{ - for (int i=0; i const complex& min(const complex& a, const complex& b) -{ return a.real() < b.real() ? a : b; } - -template<> const complex& max(const complex& a, const complex& b) -{ return a.real() < b.real() ? b : a; } - -} - -template void packetmath() -{ - typedef typename ei_packet_traits::type Packet; - const int PacketSize = ei_packet_traits::size; - - const int size = PacketSize*4; - EIGEN_ALIGN_128 Scalar data1[ei_packet_traits::size*4]; - EIGEN_ALIGN_128 Scalar data2[ei_packet_traits::size*4]; - EIGEN_ALIGN_128 Packet packets[PacketSize*2]; - EIGEN_ALIGN_128 Scalar ref[ei_packet_traits::size*4]; - for (int i=0; i(); - data2[i] = ei_random(); - } - - ei_pstore(data2, ei_pload(data1)); - VERIFY(areApprox(data1, data2, PacketSize) && "aligned load/store"); - - for (int offset=0; offset(packets[0], packets[1]); - else if (offset==1) ei_palign<1>(packets[0], packets[1]); - else if (offset==2) ei_palign<2>(packets[0], packets[1]); - else if (offset==3) ei_palign<3>(packets[0], packets[1]); - ei_pstore(data2, packets[0]); - - for (int i=0; i Vector; - VERIFY(areApprox(ref, data2, PacketSize) && "ei_palign"); - } - - CHECK_CWISE(REF_ADD, ei_padd); - CHECK_CWISE(REF_SUB, ei_psub); - CHECK_CWISE(REF_MUL, ei_pmul); - #ifndef EIGEN_VECTORIZE_ALTIVEC - if (!ei_is_same_type::ret) - CHECK_CWISE(REF_DIV, ei_pdiv); - #endif - CHECK_CWISE(std::min, ei_pmin); - CHECK_CWISE(std::max, ei_pmax); - - for (int i=0; i() ); - CALL_SUBTEST_2( packetmath() ); - CALL_SUBTEST_3( packetmath() ); - CALL_SUBTEST_4( packetmath >() ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_parametrizedline.cpp b/external/eigen3/test/eigen2/eigen2_parametrizedline.cpp deleted file mode 100644 index 8147288..0000000 --- a/external/eigen3/test/eigen2/eigen2_parametrizedline.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include -#include -#include - -template void parametrizedline(const LineType& _line) -{ - /* this test covers the following files: - ParametrizedLine.h - */ - - const int dim = _line.dim(); - typedef typename LineType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef Matrix VectorType; - typedef Matrix MatrixType; - - VectorType p0 = VectorType::Random(dim); - VectorType p1 = VectorType::Random(dim); - - VectorType d0 = VectorType::Random(dim).normalized(); - - LineType l0(p0, d0); - - Scalar s0 = ei_random(); - Scalar s1 = ei_abs(ei_random()); - - VERIFY_IS_MUCH_SMALLER_THAN( l0.distance(p0), RealScalar(1) ); - VERIFY_IS_MUCH_SMALLER_THAN( l0.distance(p0+s0*d0), RealScalar(1) ); - VERIFY_IS_APPROX( (l0.projection(p1)-p1).norm(), l0.distance(p1) ); - VERIFY_IS_MUCH_SMALLER_THAN( l0.distance(l0.projection(p1)), RealScalar(1) ); - VERIFY_IS_APPROX( Scalar(l0.distance((p0+s0*d0) + d0.unitOrthogonal() * s1)), s1 ); - - // casting - const int Dim = LineType::AmbientDimAtCompileTime; - typedef typename GetDifferentType::type OtherScalar; - ParametrizedLine hp1f = l0.template cast(); - VERIFY_IS_APPROX(hp1f.template cast(),l0); - ParametrizedLine hp1d = l0.template cast(); - VERIFY_IS_APPROX(hp1d.template cast(),l0); -} - -void test_eigen2_parametrizedline() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( parametrizedline(ParametrizedLine()) ); - CALL_SUBTEST_2( parametrizedline(ParametrizedLine()) ); - CALL_SUBTEST_3( parametrizedline(ParametrizedLine()) ); - CALL_SUBTEST_4( parametrizedline(ParametrizedLine,5>()) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_prec_inverse_4x4.cpp b/external/eigen3/test/eigen2/eigen2_prec_inverse_4x4.cpp deleted file mode 100644 index 8bfa556..0000000 --- a/external/eigen3/test/eigen2/eigen2_prec_inverse_4x4.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include -#include - -template std::string type_name() { return "other"; } -template<> std::string type_name() { return "float"; } -template<> std::string type_name() { return "double"; } -template<> std::string type_name() { return "int"; } -template<> std::string type_name >() { return "complex"; } -template<> std::string type_name >() { return "complex"; } -template<> std::string type_name >() { return "complex"; } - -#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl; - -template inline typename NumTraits::Real epsilon() -{ - return std::numeric_limits::Real>::epsilon(); -} - -template void inverse_permutation_4x4() -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - Vector4i indices(0,1,2,3); - for(int i = 0; i < 24; ++i) - { - MatrixType m = MatrixType::Zero(); - m(indices(0),0) = 1; - m(indices(1),1) = 1; - m(indices(2),2) = 1; - m(indices(3),3) = 1; - MatrixType inv = m.inverse(); - double error = double( (m*inv-MatrixType::Identity()).norm() / epsilon() ); - VERIFY(error == 0.0); - std::next_permutation(indices.data(),indices.data()+4); - } -} - -template void inverse_general_4x4(int repeat) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - double error_sum = 0., error_max = 0.; - for(int i = 0; i < repeat; ++i) - { - MatrixType m; - RealScalar absdet; - do { - m = MatrixType::Random(); - absdet = ei_abs(m.determinant()); - } while(absdet < 10 * epsilon()); - MatrixType inv = m.inverse(); - double error = double( (m*inv-MatrixType::Identity()).norm() * absdet / epsilon() ); - error_sum += error; - error_max = std::max(error_max, error); - } - std::cerr << "inverse_general_4x4, Scalar = " << type_name() << std::endl; - double error_avg = error_sum / repeat; - EIGEN_DEBUG_VAR(error_avg); - EIGEN_DEBUG_VAR(error_max); - VERIFY(error_avg < (NumTraits::IsComplex ? 8.0 : 1.25)); - VERIFY(error_max < (NumTraits::IsComplex ? 64.0 : 20.0)); -} - -void test_eigen2_prec_inverse_4x4() -{ - CALL_SUBTEST_1((inverse_permutation_4x4())); - CALL_SUBTEST_1(( inverse_general_4x4(200000 * g_repeat) )); - - CALL_SUBTEST_2((inverse_permutation_4x4 >())); - CALL_SUBTEST_2(( inverse_general_4x4 >(200000 * g_repeat) )); - - CALL_SUBTEST_3((inverse_permutation_4x4())); - CALL_SUBTEST_3((inverse_general_4x4(50000 * g_repeat))); -} diff --git a/external/eigen3/test/eigen2/eigen2_product_large.cpp b/external/eigen3/test/eigen2/eigen2_product_large.cpp deleted file mode 100644 index 5149ef7..0000000 --- a/external/eigen3/test/eigen2/eigen2_product_large.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "product.h" - -void test_eigen2_product_large() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( product(MatrixXf(ei_random(1,320), ei_random(1,320))) ); - CALL_SUBTEST_2( product(MatrixXd(ei_random(1,320), ei_random(1,320))) ); - CALL_SUBTEST_3( product(MatrixXi(ei_random(1,320), ei_random(1,320))) ); - CALL_SUBTEST_4( product(MatrixXcf(ei_random(1,50), ei_random(1,50))) ); - CALL_SUBTEST_5( product(Matrix(ei_random(1,320), ei_random(1,320))) ); - } - -#ifdef EIGEN_TEST_PART_6 - { - // test a specific issue in DiagonalProduct - int N = 1000000; - VectorXf v = VectorXf::Ones(N); - MatrixXf m = MatrixXf::Ones(N,3); - m = (v+v).asDiagonal() * m; - VERIFY_IS_APPROX(m, MatrixXf::Constant(N,3,2)); - } - - { - // test deferred resizing in Matrix::operator= - MatrixXf a = MatrixXf::Random(10,4), b = MatrixXf::Random(4,10), c = a; - VERIFY_IS_APPROX((a = a * b), (c * b).eval()); - } - - { - MatrixXf mat1(10,10); mat1.setRandom(); - MatrixXf mat2(32,10); mat2.setRandom(); - MatrixXf result = mat1.row(2)*mat2.transpose(); - VERIFY_IS_APPROX(result, (mat1.row(2)*mat2.transpose()).eval()); - } -#endif -} diff --git a/external/eigen3/test/eigen2/eigen2_product_small.cpp b/external/eigen3/test/eigen2/eigen2_product_small.cpp deleted file mode 100644 index 4cd8c10..0000000 --- a/external/eigen3/test/eigen2/eigen2_product_small.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_NO_STATIC_ASSERT -#include "product.h" - -void test_eigen2_product_small() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( product(Matrix()) ); - CALL_SUBTEST_2( product(Matrix()) ); - CALL_SUBTEST_3( product(Matrix3d()) ); - CALL_SUBTEST_4( product(Matrix4d()) ); - CALL_SUBTEST_5( product(Matrix4f()) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_qr.cpp b/external/eigen3/test/eigen2/eigen2_qr.cpp deleted file mode 100644 index 76977e4..0000000 --- a/external/eigen3/test/eigen2/eigen2_qr.cpp +++ /dev/null @@ -1,69 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include - -template void qr(const MatrixType& m) -{ - /* this test covers the following files: - QR.h - */ - int rows = m.rows(); - int cols = m.cols(); - - typedef typename MatrixType::Scalar Scalar; - typedef Matrix SquareMatrixType; - typedef Matrix VectorType; - - MatrixType a = MatrixType::Random(rows,cols); - QR qrOfA(a); - VERIFY_IS_APPROX(a, qrOfA.matrixQ() * qrOfA.matrixR()); - VERIFY_IS_NOT_APPROX(a+MatrixType::Identity(rows, cols), qrOfA.matrixQ() * qrOfA.matrixR()); - - #if 0 // eigenvalues module not yet ready - SquareMatrixType b = a.adjoint() * a; - - // check tridiagonalization - Tridiagonalization tridiag(b); - VERIFY_IS_APPROX(b, tridiag.matrixQ() * tridiag.matrixT() * tridiag.matrixQ().adjoint()); - - // check hessenberg decomposition - HessenbergDecomposition hess(b); - VERIFY_IS_APPROX(b, hess.matrixQ() * hess.matrixH() * hess.matrixQ().adjoint()); - VERIFY_IS_APPROX(tridiag.matrixT(), hess.matrixH()); - b = SquareMatrixType::Random(cols,cols); - hess.compute(b); - VERIFY_IS_APPROX(b, hess.matrixQ() * hess.matrixH() * hess.matrixQ().adjoint()); - #endif -} - -void test_eigen2_qr() -{ - for(int i = 0; i < 1; i++) { - CALL_SUBTEST_1( qr(Matrix2f()) ); - CALL_SUBTEST_2( qr(Matrix4d()) ); - CALL_SUBTEST_3( qr(MatrixXf(12,8)) ); - CALL_SUBTEST_4( qr(MatrixXcd(5,5)) ); - CALL_SUBTEST_4( qr(MatrixXcd(7,3)) ); - } - -#ifdef EIGEN_TEST_PART_5 - // small isFullRank test - { - Matrix3d mat; - mat << 1, 45, 1, 2, 2, 2, 1, 2, 3; - VERIFY(mat.qr().isFullRank()); - mat << 1, 1, 1, 2, 2, 2, 1, 2, 3; - //always returns true in eigen2support - //VERIFY(!mat.qr().isFullRank()); - } - -#endif -} diff --git a/external/eigen3/test/eigen2/eigen2_qtvector.cpp b/external/eigen3/test/eigen2/eigen2_qtvector.cpp deleted file mode 100644 index 6cfb58a..0000000 --- a/external/eigen3/test/eigen2/eigen2_qtvector.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_WORK_AROUND_QT_BUG_CALLING_WRONG_OPERATOR_NEW_FIXED_IN_QT_4_5 - -#include "main.h" - -#include -#include - -#include - -template -void check_qtvector_matrix(const MatrixType& m) -{ - int rows = m.rows(); - int cols = m.cols(); - MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); - QVector v(10, MatrixType(rows,cols)), w(20, y); - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], y); - } - v[5] = x; - w[6] = v[5]; - VERIFY_IS_APPROX(w[6], v[5]); - v = w; - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], v[i]); - } - - v.resize(21); - v[20] = x; - VERIFY_IS_APPROX(v[20], x); - v.fill(y,22); - VERIFY_IS_APPROX(v[21], y); - v.push_back(x); - VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType)); - - // do a lot of push_back such that the vector gets internally resized - // (with memory reallocation) - MatrixType* ref = &w[0]; - for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) - v.push_back(w[i%w.size()]); - for(int i=23; i -void check_qtvector_transform(const TransformType&) -{ - typedef typename TransformType::MatrixType MatrixType; - TransformType x(MatrixType::Random()), y(MatrixType::Random()); - QVector v(10), w(20, y); - v[5] = x; - w[6] = v[5]; - VERIFY_IS_APPROX(w[6], v[5]); - v = w; - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], v[i]); - } - - v.resize(21); - v[20] = x; - VERIFY_IS_APPROX(v[20], x); - v.fill(y,22); - VERIFY_IS_APPROX(v[21], y); - v.push_back(x); - VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(TransformType)); - - // do a lot of push_back such that the vector gets internally resized - // (with memory reallocation) - TransformType* ref = &w[0]; - for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) - v.push_back(w[i%w.size()]); - for(unsigned int i=23; int(i) -void check_qtvector_quaternion(const QuaternionType&) -{ - typedef typename QuaternionType::Coefficients Coefficients; - QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); - QVector v(10), w(20, y); - v[5] = x; - w[6] = v[5]; - VERIFY_IS_APPROX(w[6], v[5]); - v = w; - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], v[i]); - } - - v.resize(21); - v[20] = x; - VERIFY_IS_APPROX(v[20], x); - v.fill(y,22); - VERIFY_IS_APPROX(v[21], y); - v.push_back(x); - VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(QuaternionType)); - - // do a lot of push_back such that the vector gets internally resized - // (with memory reallocation) - QuaternionType* ref = &w[0]; - for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) - v.push_back(w[i%w.size()]); - for(unsigned int i=23; int(i) -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include - -template -void makeNoisyCohyperplanarPoints(int numPoints, - VectorType **points, - HyperplaneType *hyperplane, - typename VectorType::Scalar noiseAmplitude) -{ - typedef typename VectorType::Scalar Scalar; - const int size = points[0]->size(); - // pick a random hyperplane, store the coefficients of its equation - hyperplane->coeffs().resize(size + 1); - for(int j = 0; j < size + 1; j++) - { - do { - hyperplane->coeffs().coeffRef(j) = ei_random(); - } while(ei_abs(hyperplane->coeffs().coeff(j)) < 0.5); - } - - // now pick numPoints random points on this hyperplane - for(int i = 0; i < numPoints; i++) - { - VectorType& cur_point = *(points[i]); - do - { - cur_point = VectorType::Random(size)/*.normalized()*/; - // project cur_point onto the hyperplane - Scalar x = - (hyperplane->coeffs().start(size).cwise()*cur_point).sum(); - cur_point *= hyperplane->coeffs().coeff(size) / x; - } while( cur_point.norm() < 0.5 - || cur_point.norm() > 2.0 ); - } - - // add some noise to these points - for(int i = 0; i < numPoints; i++ ) - *(points[i]) += noiseAmplitude * VectorType::Random(size); -} - -template -void check_linearRegression(int numPoints, - VectorType **points, - const VectorType& original, - typename VectorType::Scalar tolerance) -{ - int size = points[0]->size(); - assert(size==2); - VectorType result(size); - linearRegression(numPoints, points, &result, 1); - typename VectorType::Scalar error = (result - original).norm() / original.norm(); - VERIFY(ei_abs(error) < ei_abs(tolerance)); -} - -template -void check_fitHyperplane(int numPoints, - VectorType **points, - const HyperplaneType& original, - typename VectorType::Scalar tolerance) -{ - int size = points[0]->size(); - HyperplaneType result(size); - fitHyperplane(numPoints, points, &result); - result.coeffs() *= original.coeffs().coeff(size)/result.coeffs().coeff(size); - typename VectorType::Scalar error = (result.coeffs() - original.coeffs()).norm() / original.coeffs().norm(); - std::cout << ei_abs(error) << " xxx " << ei_abs(tolerance) << std::endl; - VERIFY(ei_abs(error) < ei_abs(tolerance)); -} - -void test_eigen2_regression() -{ - for(int i = 0; i < g_repeat; i++) - { -#ifdef EIGEN_TEST_PART_1 - { - Vector2f points2f [1000]; - Vector2f *points2f_ptrs [1000]; - for(int i = 0; i < 1000; i++) points2f_ptrs[i] = &(points2f[i]); - Vector2f coeffs2f; - Hyperplane coeffs3f; - makeNoisyCohyperplanarPoints(1000, points2f_ptrs, &coeffs3f, 0.01f); - coeffs2f[0] = -coeffs3f.coeffs()[0]/coeffs3f.coeffs()[1]; - coeffs2f[1] = -coeffs3f.coeffs()[2]/coeffs3f.coeffs()[1]; - CALL_SUBTEST(check_linearRegression(10, points2f_ptrs, coeffs2f, 0.05f)); - CALL_SUBTEST(check_linearRegression(100, points2f_ptrs, coeffs2f, 0.01f)); - CALL_SUBTEST(check_linearRegression(1000, points2f_ptrs, coeffs2f, 0.002f)); - } -#endif -#ifdef EIGEN_TEST_PART_2 - { - Vector2f points2f [1000]; - Vector2f *points2f_ptrs [1000]; - for(int i = 0; i < 1000; i++) points2f_ptrs[i] = &(points2f[i]); - Hyperplane coeffs3f; - makeNoisyCohyperplanarPoints(1000, points2f_ptrs, &coeffs3f, 0.01f); - CALL_SUBTEST(check_fitHyperplane(10, points2f_ptrs, coeffs3f, 0.05f)); - CALL_SUBTEST(check_fitHyperplane(100, points2f_ptrs, coeffs3f, 0.01f)); - CALL_SUBTEST(check_fitHyperplane(1000, points2f_ptrs, coeffs3f, 0.002f)); - } -#endif -#ifdef EIGEN_TEST_PART_3 - { - Vector4d points4d [1000]; - Vector4d *points4d_ptrs [1000]; - for(int i = 0; i < 1000; i++) points4d_ptrs[i] = &(points4d[i]); - Hyperplane coeffs5d; - makeNoisyCohyperplanarPoints(1000, points4d_ptrs, &coeffs5d, 0.01); - CALL_SUBTEST(check_fitHyperplane(10, points4d_ptrs, coeffs5d, 0.05)); - CALL_SUBTEST(check_fitHyperplane(100, points4d_ptrs, coeffs5d, 0.01)); - CALL_SUBTEST(check_fitHyperplane(1000, points4d_ptrs, coeffs5d, 0.002)); - } -#endif -#ifdef EIGEN_TEST_PART_4 - { - VectorXcd *points11cd_ptrs[1000]; - for(int i = 0; i < 1000; i++) points11cd_ptrs[i] = new VectorXcd(11); - Hyperplane,Dynamic> *coeffs12cd = new Hyperplane,Dynamic>(11); - makeNoisyCohyperplanarPoints(1000, points11cd_ptrs, coeffs12cd, 0.01); - CALL_SUBTEST(check_fitHyperplane(100, points11cd_ptrs, *coeffs12cd, 0.025)); - CALL_SUBTEST(check_fitHyperplane(1000, points11cd_ptrs, *coeffs12cd, 0.006)); - delete coeffs12cd; - for(int i = 0; i < 1000; i++) delete points11cd_ptrs[i]; - } -#endif - } -} diff --git a/external/eigen3/test/eigen2/eigen2_sizeof.cpp b/external/eigen3/test/eigen2/eigen2_sizeof.cpp deleted file mode 100644 index ec1af5a..0000000 --- a/external/eigen3/test/eigen2/eigen2_sizeof.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void verifySizeOf(const MatrixType&) -{ - typedef typename MatrixType::Scalar Scalar; - if (MatrixType::RowsAtCompileTime!=Dynamic && MatrixType::ColsAtCompileTime!=Dynamic) - VERIFY(sizeof(MatrixType)==sizeof(Scalar)*MatrixType::SizeAtCompileTime); - else - VERIFY(sizeof(MatrixType)==sizeof(Scalar*) + 2 * sizeof(typename MatrixType::Index)); -} - -void test_eigen2_sizeof() -{ - CALL_SUBTEST( verifySizeOf(Matrix()) ); - CALL_SUBTEST( verifySizeOf(Matrix4d()) ); - CALL_SUBTEST( verifySizeOf(Matrix()) ); - CALL_SUBTEST( verifySizeOf(Matrix()) ); - CALL_SUBTEST( verifySizeOf(MatrixXcf(3, 3)) ); - CALL_SUBTEST( verifySizeOf(MatrixXi(8, 12)) ); - CALL_SUBTEST( verifySizeOf(MatrixXcd(20, 20)) ); - CALL_SUBTEST( verifySizeOf(Matrix()) ); -} diff --git a/external/eigen3/test/eigen2/eigen2_smallvectors.cpp b/external/eigen3/test/eigen2/eigen2_smallvectors.cpp deleted file mode 100644 index 03962b1..0000000 --- a/external/eigen3/test/eigen2/eigen2_smallvectors.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void smallVectors() -{ - typedef Matrix V2; - typedef Matrix V3; - typedef Matrix V4; - Scalar x1 = ei_random(), - x2 = ei_random(), - x3 = ei_random(), - x4 = ei_random(); - V2 v2(x1, x2); - V3 v3(x1, x2, x3); - V4 v4(x1, x2, x3, x4); - VERIFY_IS_APPROX(x1, v2.x()); - VERIFY_IS_APPROX(x1, v3.x()); - VERIFY_IS_APPROX(x1, v4.x()); - VERIFY_IS_APPROX(x2, v2.y()); - VERIFY_IS_APPROX(x2, v3.y()); - VERIFY_IS_APPROX(x2, v4.y()); - VERIFY_IS_APPROX(x3, v3.z()); - VERIFY_IS_APPROX(x3, v4.z()); - VERIFY_IS_APPROX(x4, v4.w()); -} - -void test_eigen2_smallvectors() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST( smallVectors() ); - CALL_SUBTEST( smallVectors() ); - CALL_SUBTEST( smallVectors() ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_sparse_basic.cpp b/external/eigen3/test/eigen2/eigen2_sparse_basic.cpp deleted file mode 100644 index 0490776..0000000 --- a/external/eigen3/test/eigen2/eigen2_sparse_basic.cpp +++ /dev/null @@ -1,317 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Daniel Gomez Ferro -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "sparse.h" - -template -bool test_random_setter(SparseMatrix& sm, const DenseType& ref, const std::vector& nonzeroCoords) -{ - typedef SparseMatrix SparseType; - { - sm.setZero(); - SetterType w(sm); - std::vector remaining = nonzeroCoords; - while(!remaining.empty()) - { - int i = ei_random(0,remaining.size()-1); - w(remaining[i].x(),remaining[i].y()) = ref.coeff(remaining[i].x(),remaining[i].y()); - remaining[i] = remaining.back(); - remaining.pop_back(); - } - } - return sm.isApprox(ref); -} - -template -bool test_random_setter(DynamicSparseMatrix& sm, const DenseType& ref, const std::vector& nonzeroCoords) -{ - sm.setZero(); - std::vector remaining = nonzeroCoords; - while(!remaining.empty()) - { - int i = ei_random(0,remaining.size()-1); - sm.coeffRef(remaining[i].x(),remaining[i].y()) = ref.coeff(remaining[i].x(),remaining[i].y()); - remaining[i] = remaining.back(); - remaining.pop_back(); - } - return sm.isApprox(ref); -} - -template void sparse_basic(const SparseMatrixType& ref) -{ - const int rows = ref.rows(); - const int cols = ref.cols(); - typedef typename SparseMatrixType::Scalar Scalar; - enum { Flags = SparseMatrixType::Flags }; - - double density = std::max(8./(rows*cols), 0.01); - typedef Matrix DenseMatrix; - typedef Matrix DenseVector; - Scalar eps = 1e-6; - - SparseMatrixType m(rows, cols); - DenseMatrix refMat = DenseMatrix::Zero(rows, cols); - DenseVector vec1 = DenseVector::Random(rows); - Scalar s1 = ei_random(); - - std::vector zeroCoords; - std::vector nonzeroCoords; - initSparse(density, refMat, m, 0, &zeroCoords, &nonzeroCoords); - - if (zeroCoords.size()==0 || nonzeroCoords.size()==0) - return; - - // test coeff and coeffRef - for (int i=0; i<(int)zeroCoords.size(); ++i) - { - VERIFY_IS_MUCH_SMALLER_THAN( m.coeff(zeroCoords[i].x(),zeroCoords[i].y()), eps ); - if(ei_is_same_type >::ret) - VERIFY_RAISES_ASSERT( m.coeffRef(zeroCoords[0].x(),zeroCoords[0].y()) = 5 ); - } - VERIFY_IS_APPROX(m, refMat); - - m.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5); - refMat.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5); - - VERIFY_IS_APPROX(m, refMat); - /* - // test InnerIterators and Block expressions - for (int t=0; t<10; ++t) - { - int j = ei_random(0,cols-1); - int i = ei_random(0,rows-1); - int w = ei_random(1,cols-j-1); - int h = ei_random(1,rows-i-1); - -// VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); - for(int c=0; c w(m); -// for (int i=0; icoeffRef(nonzeroCoords[i].x(),nonzeroCoords[i].y()) = refMat.coeff(nonzeroCoords[i].x(),nonzeroCoords[i].y()); -// } -// } -// VERIFY_IS_APPROX(m, refMat); - - // random setter -// { -// m.setZero(); -// VERIFY_IS_NOT_APPROX(m, refMat); -// SparseSetter w(m); -// std::vector remaining = nonzeroCoords; -// while(!remaining.empty()) -// { -// int i = ei_random(0,remaining.size()-1); -// w->coeffRef(remaining[i].x(),remaining[i].y()) = refMat.coeff(remaining[i].x(),remaining[i].y()); -// remaining[i] = remaining.back(); -// remaining.pop_back(); -// } -// } -// VERIFY_IS_APPROX(m, refMat); - - VERIFY(( test_random_setter >(m,refMat,nonzeroCoords) )); - #ifdef EIGEN_UNORDERED_MAP_SUPPORT - VERIFY(( test_random_setter >(m,refMat,nonzeroCoords) )); - #endif - #ifdef _DENSE_HASH_MAP_H_ - VERIFY(( test_random_setter >(m,refMat,nonzeroCoords) )); - #endif - #ifdef _SPARSE_HASH_MAP_H_ - VERIFY(( test_random_setter >(m,refMat,nonzeroCoords) )); - #endif - - // test fillrand - { - DenseMatrix m1(rows,cols); - m1.setZero(); - SparseMatrixType m2(rows,cols); - m2.startFill(); - for (int j=0; j(0,rows-1); - if (m1.coeff(i,j)==Scalar(0)) - m2.fillrand(i,j) = m1(i,j) = ei_random(); - } - } - m2.endFill(); - VERIFY_IS_APPROX(m2,m1); - } - - // test RandomSetter - /*{ - SparseMatrixType m1(rows,cols), m2(rows,cols); - DenseMatrix refM1 = DenseMatrix::Zero(rows, rows); - initSparse(density, refM1, m1); - { - Eigen::RandomSetter setter(m2); - for (int j=0; j(density, refM1, m1); - initSparse(density, refM2, m2); - initSparse(density, refM3, m3); - initSparse(density, refM4, m4); - - VERIFY_IS_APPROX(m1+m2, refM1+refM2); - VERIFY_IS_APPROX(m1+m2+m3, refM1+refM2+refM3); - VERIFY_IS_APPROX(m3.cwise()*(m1+m2), refM3.cwise()*(refM1+refM2)); - VERIFY_IS_APPROX(m1*s1-m2, refM1*s1-refM2); - - VERIFY_IS_APPROX(m1*=s1, refM1*=s1); - VERIFY_IS_APPROX(m1/=s1, refM1/=s1); - - VERIFY_IS_APPROX(m1+=m2, refM1+=refM2); - VERIFY_IS_APPROX(m1-=m2, refM1-=refM2); - - VERIFY_IS_APPROX(m1.col(0).eigen2_dot(refM2.row(0)), refM1.col(0).eigen2_dot(refM2.row(0))); - - refM4.setRandom(); - // sparse cwise* dense - VERIFY_IS_APPROX(m3.cwise()*refM4, refM3.cwise()*refM4); -// VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4); - } - - // test innerVector() - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows); - SparseMatrixType m2(rows, rows); - initSparse(density, refMat2, m2); - int j0 = ei_random(0,rows-1); - int j1 = ei_random(0,rows-1); - VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0)); - VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1)); - //m2.innerVector(j0) = 2*m2.innerVector(j1); - //refMat2.col(j0) = 2*refMat2.col(j1); - //VERIFY_IS_APPROX(m2, refMat2); - } - - // test innerVectors() - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows); - SparseMatrixType m2(rows, rows); - initSparse(density, refMat2, m2); - int j0 = ei_random(0,rows-2); - int j1 = ei_random(0,rows-2); - int n0 = ei_random(1,rows-std::max(j0,j1)); - VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0)); - VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), - refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); - //m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0); - //refMat2.block(0,j0,rows,n0) = refMat2.block(0,j0,rows,n0) + refMat2.block(0,j1,rows,n0); - } - - // test transpose - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows); - SparseMatrixType m2(rows, rows); - initSparse(density, refMat2, m2); - VERIFY_IS_APPROX(m2.transpose().eval(), refMat2.transpose().eval()); - VERIFY_IS_APPROX(m2.transpose(), refMat2.transpose()); - } - - // test prune - { - SparseMatrixType m2(rows, rows); - DenseMatrix refM2(rows, rows); - refM2.setZero(); - int countFalseNonZero = 0; - int countTrueNonZero = 0; - m2.startFill(); - for (int j=0; j(0,1); - if (x<0.1) - { - // do nothing - } - else if (x<0.5) - { - countFalseNonZero++; - m2.fill(i,j) = Scalar(0); - } - else - { - countTrueNonZero++; - m2.fill(i,j) = refM2(i,j) = Scalar(1); - } - } - m2.endFill(); - VERIFY(countFalseNonZero+countTrueNonZero == m2.nonZeros()); - VERIFY_IS_APPROX(m2, refM2); - m2.prune(1); - VERIFY(countTrueNonZero==m2.nonZeros()); - VERIFY_IS_APPROX(m2, refM2); - } -} - -void test_eigen2_sparse_basic() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( sparse_basic(SparseMatrix(8, 8)) ); - CALL_SUBTEST_2( sparse_basic(SparseMatrix >(16, 16)) ); - CALL_SUBTEST_1( sparse_basic(SparseMatrix(33, 33)) ); - - CALL_SUBTEST_3( sparse_basic(DynamicSparseMatrix(8, 8)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_sparse_product.cpp b/external/eigen3/test/eigen2/eigen2_sparse_product.cpp deleted file mode 100644 index d28e76d..0000000 --- a/external/eigen3/test/eigen2/eigen2_sparse_product.cpp +++ /dev/null @@ -1,115 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Daniel Gomez Ferro -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "sparse.h" - -template void sparse_product(const SparseMatrixType& ref) -{ - const int rows = ref.rows(); - const int cols = ref.cols(); - typedef typename SparseMatrixType::Scalar Scalar; - enum { Flags = SparseMatrixType::Flags }; - - double density = std::max(8./(rows*cols), 0.01); - typedef Matrix DenseMatrix; - typedef Matrix DenseVector; - - // test matrix-matrix product - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows); - DenseMatrix refMat3 = DenseMatrix::Zero(rows, rows); - DenseMatrix refMat4 = DenseMatrix::Zero(rows, rows); - DenseMatrix dm4 = DenseMatrix::Zero(rows, rows); - SparseMatrixType m2(rows, rows); - SparseMatrixType m3(rows, rows); - SparseMatrixType m4(rows, rows); - initSparse(density, refMat2, m2); - initSparse(density, refMat3, m3); - initSparse(density, refMat4, m4); - VERIFY_IS_APPROX(m4=m2*m3, refMat4=refMat2*refMat3); - VERIFY_IS_APPROX(m4=m2.transpose()*m3, refMat4=refMat2.transpose()*refMat3); - VERIFY_IS_APPROX(m4=m2.transpose()*m3.transpose(), refMat4=refMat2.transpose()*refMat3.transpose()); - VERIFY_IS_APPROX(m4=m2*m3.transpose(), refMat4=refMat2*refMat3.transpose()); - - // sparse * dense - VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3); - VERIFY_IS_APPROX(dm4=m2*refMat3.transpose(), refMat4=refMat2*refMat3.transpose()); - VERIFY_IS_APPROX(dm4=m2.transpose()*refMat3, refMat4=refMat2.transpose()*refMat3); - VERIFY_IS_APPROX(dm4=m2.transpose()*refMat3.transpose(), refMat4=refMat2.transpose()*refMat3.transpose()); - - // dense * sparse - VERIFY_IS_APPROX(dm4=refMat2*m3, refMat4=refMat2*refMat3); - VERIFY_IS_APPROX(dm4=refMat2*m3.transpose(), refMat4=refMat2*refMat3.transpose()); - VERIFY_IS_APPROX(dm4=refMat2.transpose()*m3, refMat4=refMat2.transpose()*refMat3); - VERIFY_IS_APPROX(dm4=refMat2.transpose()*m3.transpose(), refMat4=refMat2.transpose()*refMat3.transpose()); - - VERIFY_IS_APPROX(m3=m3*m3, refMat3=refMat3*refMat3); - } - - // test matrix - diagonal product - if(false) // it compiles, but the precision is terrible. probably doesn't matter in this branch.... - { - DenseMatrix refM2 = DenseMatrix::Zero(rows, rows); - DenseMatrix refM3 = DenseMatrix::Zero(rows, rows); - DiagonalMatrix d1(DenseVector::Random(rows)); - SparseMatrixType m2(rows, rows); - SparseMatrixType m3(rows, rows); - initSparse(density, refM2, m2); - initSparse(density, refM3, m3); - VERIFY_IS_APPROX(m3=m2*d1, refM3=refM2*d1); - VERIFY_IS_APPROX(m3=m2.transpose()*d1, refM3=refM2.transpose()*d1); - VERIFY_IS_APPROX(m3=d1*m2, refM3=d1*refM2); - VERIFY_IS_APPROX(m3=d1*m2.transpose(), refM3=d1 * refM2.transpose()); - } - - // test self adjoint products - { - DenseMatrix b = DenseMatrix::Random(rows, rows); - DenseMatrix x = DenseMatrix::Random(rows, rows); - DenseMatrix refX = DenseMatrix::Random(rows, rows); - DenseMatrix refUp = DenseMatrix::Zero(rows, rows); - DenseMatrix refLo = DenseMatrix::Zero(rows, rows); - DenseMatrix refS = DenseMatrix::Zero(rows, rows); - SparseMatrixType mUp(rows, rows); - SparseMatrixType mLo(rows, rows); - SparseMatrixType mS(rows, rows); - do { - initSparse(density, refUp, mUp, ForceRealDiag|/*ForceNonZeroDiag|*/MakeUpperTriangular); - } while (refUp.isZero()); - refLo = refUp.transpose().conjugate(); - mLo = mUp.transpose().conjugate(); - refS = refUp + refLo; - refS.diagonal() *= 0.5; - mS = mUp + mLo; - for (int k=0; k()*b, refX=refS*b); - VERIFY_IS_APPROX(x=mLo.template marked()*b, refX=refS*b); - VERIFY_IS_APPROX(x=mS.template marked()*b, refX=refS*b); - } - -} - -void test_eigen2_sparse_product() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( sparse_product(SparseMatrix(8, 8)) ); - CALL_SUBTEST_2( sparse_product(SparseMatrix >(16, 16)) ); - CALL_SUBTEST_1( sparse_product(SparseMatrix(33, 33)) ); - - CALL_SUBTEST_3( sparse_product(DynamicSparseMatrix(8, 8)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_sparse_solvers.cpp b/external/eigen3/test/eigen2/eigen2_sparse_solvers.cpp deleted file mode 100644 index 3aef27a..0000000 --- a/external/eigen3/test/eigen2/eigen2_sparse_solvers.cpp +++ /dev/null @@ -1,200 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Daniel Gomez Ferro -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "sparse.h" - -template void -initSPD(double density, - Matrix& refMat, - SparseMatrix& sparseMat) -{ - Matrix aux(refMat.rows(),refMat.cols()); - initSparse(density,refMat,sparseMat); - refMat = refMat * refMat.adjoint(); - for (int k=0; k<2; ++k) - { - initSparse(density,aux,sparseMat,ForceNonZeroDiag); - refMat += aux * aux.adjoint(); - } - sparseMat.startFill(); - for (int j=0 ; j void sparse_solvers(int rows, int cols) -{ - double density = std::max(8./(rows*cols), 0.01); - typedef Matrix DenseMatrix; - typedef Matrix DenseVector; - // Scalar eps = 1e-6; - - DenseVector vec1 = DenseVector::Random(rows); - - std::vector zeroCoords; - std::vector nonzeroCoords; - - // test triangular solver - { - DenseVector vec2 = vec1, vec3 = vec1; - SparseMatrix m2(rows, cols); - DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); - - // lower - initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords); - VERIFY_IS_APPROX(refMat2.template marked().solveTriangular(vec2), - m2.template marked().solveTriangular(vec3)); - - // lower - transpose - initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords); - VERIFY_IS_APPROX(refMat2.template marked().transpose().solveTriangular(vec2), - m2.template marked().transpose().solveTriangular(vec3)); - - // upper - initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords); - VERIFY_IS_APPROX(refMat2.template marked().solveTriangular(vec2), - m2.template marked().solveTriangular(vec3)); - - // upper - transpose - initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords); - VERIFY_IS_APPROX(refMat2.template marked().transpose().solveTriangular(vec2), - m2.template marked().transpose().solveTriangular(vec3)); - } - - // test LLT - { - // TODO fix the issue with complex (see SparseLLT::solveInPlace) - SparseMatrix m2(rows, cols); - DenseMatrix refMat2(rows, cols); - - DenseVector b = DenseVector::Random(cols); - DenseVector refX(cols), x(cols); - - initSPD(density, refMat2, m2); - - refMat2.llt().solve(b, &refX); - typedef SparseMatrix SparseSelfAdjointMatrix; - if (!NumTraits::IsComplex) - { - x = b; - SparseLLT (m2).solveInPlace(x); - VERIFY(refX.isApprox(x,test_precision()) && "LLT: default"); - } - #ifdef EIGEN_CHOLMOD_SUPPORT - x = b; - SparseLLT(m2).solveInPlace(x); - VERIFY(refX.isApprox(x,test_precision()) && "LLT: cholmod"); - #endif - if (!NumTraits::IsComplex) - { - #ifdef EIGEN_TAUCS_SUPPORT - x = b; - SparseLLT(m2,IncompleteFactorization).solveInPlace(x); - VERIFY(refX.isApprox(x,test_precision()) && "LLT: taucs (IncompleteFactorization)"); - x = b; - SparseLLT(m2,SupernodalMultifrontal).solveInPlace(x); - VERIFY(refX.isApprox(x,test_precision()) && "LLT: taucs (SupernodalMultifrontal)"); - x = b; - SparseLLT(m2,SupernodalLeftLooking).solveInPlace(x); - VERIFY(refX.isApprox(x,test_precision()) && "LLT: taucs (SupernodalLeftLooking)"); - #endif - } - } - - // test LDLT - if (!NumTraits::IsComplex) - { - // TODO fix the issue with complex (see SparseLDLT::solveInPlace) - SparseMatrix m2(rows, cols); - DenseMatrix refMat2(rows, cols); - - DenseVector b = DenseVector::Random(cols); - DenseVector refX(cols), x(cols); - - //initSPD(density, refMat2, m2); - initSparse(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, 0, 0); - refMat2 += refMat2.adjoint(); - refMat2.diagonal() *= 0.5; - - refMat2.ldlt().solve(b, &refX); - typedef SparseMatrix SparseSelfAdjointMatrix; - x = b; - SparseLDLT ldlt(m2); - if (ldlt.succeeded()) - ldlt.solveInPlace(x); - VERIFY(refX.isApprox(x,test_precision()) && "LDLT: default"); - } - - // test LU - { - static int count = 0; - SparseMatrix m2(rows, cols); - DenseMatrix refMat2(rows, cols); - - DenseVector b = DenseVector::Random(cols); - DenseVector refX(cols), x(cols); - - initSparse(density, refMat2, m2, ForceNonZeroDiag, &zeroCoords, &nonzeroCoords); - - LU refLu(refMat2); - refLu.solve(b, &refX); - #if defined(EIGEN_SUPERLU_SUPPORT) || defined(EIGEN_UMFPACK_SUPPORT) - Scalar refDet = refLu.determinant(); - #endif - x.setZero(); - // // SparseLU > (m2).solve(b,&x); - // // VERIFY(refX.isApprox(x,test_precision()) && "LU: default"); - #ifdef EIGEN_SUPERLU_SUPPORT - { - x.setZero(); - SparseLU,SuperLU> slu(m2); - if (slu.succeeded()) - { - if (slu.solve(b,&x)) { - VERIFY(refX.isApprox(x,test_precision()) && "LU: SuperLU"); - } - // std::cerr << refDet << " == " << slu.determinant() << "\n"; - if (count==0) { - VERIFY_IS_APPROX(refDet,slu.determinant()); // FIXME det is not very stable for complex - } - } - } - #endif - #ifdef EIGEN_UMFPACK_SUPPORT - { - // check solve - x.setZero(); - SparseLU,UmfPack> slu(m2); - if (slu.succeeded()) { - if (slu.solve(b,&x)) { - if (count==0) { - VERIFY(refX.isApprox(x,test_precision()) && "LU: umfpack"); // FIXME solve is not very stable for complex - } - } - VERIFY_IS_APPROX(refDet,slu.determinant()); - // TODO check the extracted data - //std::cerr << slu.matrixL() << "\n"; - } - } - #endif - count++; - } - -} - -void test_eigen2_sparse_solvers() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( sparse_solvers(8, 8) ); - CALL_SUBTEST_2( sparse_solvers >(16, 16) ); - CALL_SUBTEST_1( sparse_solvers(101, 101) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_sparse_vector.cpp b/external/eigen3/test/eigen2/eigen2_sparse_vector.cpp deleted file mode 100644 index e6d2d77..0000000 --- a/external/eigen3/test/eigen2/eigen2_sparse_vector.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Daniel Gomez Ferro -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "sparse.h" - -template void sparse_vector(int rows, int cols) -{ - double densityMat = std::max(8./(rows*cols), 0.01); - double densityVec = std::max(8./float(rows), 0.1); - typedef Matrix DenseMatrix; - typedef Matrix DenseVector; - typedef SparseVector SparseVectorType; - typedef SparseMatrix SparseMatrixType; - Scalar eps = 1e-6; - - SparseMatrixType m1(rows,cols); - SparseVectorType v1(rows), v2(rows), v3(rows); - DenseMatrix refM1 = DenseMatrix::Zero(rows, cols); - DenseVector refV1 = DenseVector::Random(rows), - refV2 = DenseVector::Random(rows), - refV3 = DenseVector::Random(rows); - - std::vector zerocoords, nonzerocoords; - initSparse(densityVec, refV1, v1, &zerocoords, &nonzerocoords); - initSparse(densityMat, refM1, m1); - - initSparse(densityVec, refV2, v2); - initSparse(densityVec, refV3, v3); - - Scalar s1 = ei_random(); - - // test coeff and coeffRef - for (unsigned int i=0; i(8, 8) ); - CALL_SUBTEST_2( sparse_vector >(16, 16) ); - CALL_SUBTEST_1( sparse_vector(299, 535) ); - } -} - diff --git a/external/eigen3/test/eigen2/eigen2_stdvector.cpp b/external/eigen3/test/eigen2/eigen2_stdvector.cpp deleted file mode 100644 index 6ab05c2..0000000 --- a/external/eigen3/test/eigen2/eigen2_stdvector.cpp +++ /dev/null @@ -1,148 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include -#include "main.h" -#include - -template -void check_stdvector_matrix(const MatrixType& m) -{ - int rows = m.rows(); - int cols = m.cols(); - MatrixType x = MatrixType::Random(rows,cols), y = MatrixType::Random(rows,cols); - std::vector > v(10, MatrixType(rows,cols)), w(20, y); - v[5] = x; - w[6] = v[5]; - VERIFY_IS_APPROX(w[6], v[5]); - v = w; - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], v[i]); - } - - v.resize(21); - v[20] = x; - VERIFY_IS_APPROX(v[20], x); - v.resize(22,y); - VERIFY_IS_APPROX(v[21], y); - v.push_back(x); - VERIFY_IS_APPROX(v[22], x); - VERIFY((std::size_t)&(v[22]) == (std::size_t)&(v[21]) + sizeof(MatrixType)); - - // do a lot of push_back such that the vector gets internally resized - // (with memory reallocation) - MatrixType* ref = &w[0]; - for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) - v.push_back(w[i%w.size()]); - for(unsigned int i=23; i -void check_stdvector_transform(const TransformType&) -{ - typedef typename TransformType::MatrixType MatrixType; - TransformType x(MatrixType::Random()), y(MatrixType::Random()); - std::vector > v(10), w(20, y); - v[5] = x; - w[6] = v[5]; - VERIFY_IS_APPROX(w[6], v[5]); - v = w; - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], v[i]); - } - - v.resize(21); - v[20] = x; - VERIFY_IS_APPROX(v[20], x); - v.resize(22,y); - VERIFY_IS_APPROX(v[21], y); - v.push_back(x); - VERIFY_IS_APPROX(v[22], x); - VERIFY((std::size_t)&(v[22]) == (std::size_t)&(v[21]) + sizeof(TransformType)); - - // do a lot of push_back such that the vector gets internally resized - // (with memory reallocation) - TransformType* ref = &w[0]; - for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) - v.push_back(w[i%w.size()]); - for(unsigned int i=23; i -void check_stdvector_quaternion(const QuaternionType&) -{ - typedef typename QuaternionType::Coefficients Coefficients; - QuaternionType x(Coefficients::Random()), y(Coefficients::Random()); - std::vector > v(10), w(20, y); - v[5] = x; - w[6] = v[5]; - VERIFY_IS_APPROX(w[6], v[5]); - v = w; - for(int i = 0; i < 20; i++) - { - VERIFY_IS_APPROX(w[i], v[i]); - } - - v.resize(21); - v[20] = x; - VERIFY_IS_APPROX(v[20], x); - v.resize(22,y); - VERIFY_IS_APPROX(v[21], y); - v.push_back(x); - VERIFY_IS_APPROX(v[22], x); - VERIFY((std::size_t)&(v[22]) == (std::size_t)&(v[21]) + sizeof(QuaternionType)); - - // do a lot of push_back such that the vector gets internally resized - // (with memory reallocation) - QuaternionType* ref = &w[0]; - for(int i=0; i<30 || ((ref==&w[0]) && i<300); ++i) - v.push_back(w[i%w.size()]); - for(unsigned int i=23; i -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -// check minor separately in order to avoid the possible creation of a zero-sized -// array. Comes from a compilation error with gcc-3.4 or gcc-4 with -ansi -pedantic. -// Another solution would be to declare the array like this: T m_data[Size==0?1:Size]; in ei_matrix_storage -// but this is probably not bad to raise such an error at compile time... -template struct CheckMinor -{ - typedef Matrix MatrixType; - CheckMinor(MatrixType& m1, int r1, int c1) - { - int rows = m1.rows(); - int cols = m1.cols(); - - Matrix mi = m1.minor(0,0).eval(); - VERIFY_IS_APPROX(mi, m1.block(1,1,rows-1,cols-1)); - mi = m1.minor(r1,c1); - VERIFY_IS_APPROX(mi.transpose(), m1.transpose().minor(c1,r1)); - //check operator(), both constant and non-constant, on minor() - m1.minor(r1,c1)(0,0) = m1.minor(0,0)(0,0); - } -}; - -template struct CheckMinor -{ - typedef Matrix MatrixType; - CheckMinor(MatrixType&, int, int) {} -}; - -template void submatrices(const MatrixType& m) -{ - /* this test covers the following files: - Row.h Column.h Block.h Minor.h DiagonalCoeffs.h - */ - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef Matrix VectorType; - typedef Matrix RowVectorType; - int rows = m.rows(); - int cols = m.cols(); - - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols), - m3(rows, cols), - ones = MatrixType::Ones(rows, cols), - square = Matrix - ::Random(rows, rows); - VectorType v1 = VectorType::Random(rows); - - Scalar s1 = ei_random(); - - int r1 = ei_random(0,rows-1); - int r2 = ei_random(r1,rows-1); - int c1 = ei_random(0,cols-1); - int c2 = ei_random(c1,cols-1); - - //check row() and col() - VERIFY_IS_APPROX(m1.col(c1).transpose(), m1.transpose().row(c1)); - VERIFY_IS_APPROX(square.row(r1).eigen2_dot(m1.col(c1)), (square.lazy() * m1.conjugate())(r1,c1)); - //check operator(), both constant and non-constant, on row() and col() - m1.row(r1) += s1 * m1.row(r2); - m1.col(c1) += s1 * m1.col(c2); - - //check block() - Matrix b1(1,1); b1(0,0) = m1(r1,c1); - RowVectorType br1(m1.block(r1,0,1,cols)); - VectorType bc1(m1.block(0,c1,rows,1)); - VERIFY_IS_APPROX(b1, m1.block(r1,c1,1,1)); - VERIFY_IS_APPROX(m1.row(r1), br1); - VERIFY_IS_APPROX(m1.col(c1), bc1); - //check operator(), both constant and non-constant, on block() - m1.block(r1,c1,r2-r1+1,c2-c1+1) = s1 * m2.block(0, 0, r2-r1+1,c2-c1+1); - m1.block(r1,c1,r2-r1+1,c2-c1+1)(r2-r1,c2-c1) = m2.block(0, 0, r2-r1+1,c2-c1+1)(0,0); - - //check minor() - CheckMinor checkminor(m1,r1,c1); - - //check diagonal() - VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal()); - m2.diagonal() = 2 * m1.diagonal(); - m2.diagonal()[0] *= 3; - VERIFY_IS_APPROX(m2.diagonal()[0], static_cast(6) * m1.diagonal()[0]); - - enum { - BlockRows = EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::RowsAtCompileTime,2), - BlockCols = EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::ColsAtCompileTime,5) - }; - if (rows>=5 && cols>=8) - { - // test fixed block() as lvalue - m1.template block(1,1) *= s1; - // test operator() on fixed block() both as constant and non-constant - m1.template block(1,1)(0, 3) = m1.template block<2,5>(1,1)(1,2); - // check that fixed block() and block() agree - Matrix b = m1.template block(3,3); - VERIFY_IS_APPROX(b, m1.block(3,3,BlockRows,BlockCols)); - } - - if (rows>2) - { - // test sub vectors - VERIFY_IS_APPROX(v1.template start<2>(), v1.block(0,0,2,1)); - VERIFY_IS_APPROX(v1.template start<2>(), v1.start(2)); - VERIFY_IS_APPROX(v1.template start<2>(), v1.segment(0,2)); - VERIFY_IS_APPROX(v1.template start<2>(), v1.template segment<2>(0)); - int i = rows-2; - VERIFY_IS_APPROX(v1.template end<2>(), v1.block(i,0,2,1)); - VERIFY_IS_APPROX(v1.template end<2>(), v1.end(2)); - VERIFY_IS_APPROX(v1.template end<2>(), v1.segment(i,2)); - VERIFY_IS_APPROX(v1.template end<2>(), v1.template segment<2>(i)); - i = ei_random(0,rows-2); - VERIFY_IS_APPROX(v1.segment(i,2), v1.template segment<2>(i)); - } - - // stress some basic stuffs with block matrices - VERIFY(ei_real(ones.col(c1).sum()) == RealScalar(rows)); - VERIFY(ei_real(ones.row(r1).sum()) == RealScalar(cols)); - - VERIFY(ei_real(ones.col(c1).eigen2_dot(ones.col(c2))) == RealScalar(rows)); - VERIFY(ei_real(ones.row(r1).eigen2_dot(ones.row(r2))) == RealScalar(cols)); -} - -void test_eigen2_submatrices() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( submatrices(Matrix()) ); - CALL_SUBTEST_2( submatrices(Matrix4d()) ); - CALL_SUBTEST_3( submatrices(MatrixXcf(3, 3)) ); - CALL_SUBTEST_4( submatrices(MatrixXi(8, 12)) ); - CALL_SUBTEST_5( submatrices(MatrixXcd(20, 20)) ); - CALL_SUBTEST_6( submatrices(MatrixXf(20, 20)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_sum.cpp b/external/eigen3/test/eigen2/eigen2_sum.cpp deleted file mode 100644 index b47057c..0000000 --- a/external/eigen3/test/eigen2/eigen2_sum.cpp +++ /dev/null @@ -1,71 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void matrixSum(const MatrixType& m) -{ - typedef typename MatrixType::Scalar Scalar; - - int rows = m.rows(); - int cols = m.cols(); - - MatrixType m1 = MatrixType::Random(rows, cols); - - VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1)); - VERIFY_IS_APPROX(MatrixType::Ones(rows, cols).sum(), Scalar(float(rows*cols))); // the float() here to shut up excessive MSVC warning about int->complex conversion being lossy - Scalar x = Scalar(0); - for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) x += m1(i,j); - VERIFY_IS_APPROX(m1.sum(), x); -} - -template void vectorSum(const VectorType& w) -{ - typedef typename VectorType::Scalar Scalar; - int size = w.size(); - - VectorType v = VectorType::Random(size); - for(int i = 1; i < size; i++) - { - Scalar s = Scalar(0); - for(int j = 0; j < i; j++) s += v[j]; - VERIFY_IS_APPROX(s, v.start(i).sum()); - } - - for(int i = 0; i < size-1; i++) - { - Scalar s = Scalar(0); - for(int j = i; j < size; j++) s += v[j]; - VERIFY_IS_APPROX(s, v.end(size-i).sum()); - } - - for(int i = 0; i < size/2; i++) - { - Scalar s = Scalar(0); - for(int j = i; j < size-i; j++) s += v[j]; - VERIFY_IS_APPROX(s, v.segment(i, size-2*i).sum()); - } -} - -void test_eigen2_sum() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( matrixSum(Matrix()) ); - CALL_SUBTEST_2( matrixSum(Matrix2f()) ); - CALL_SUBTEST_3( matrixSum(Matrix4d()) ); - CALL_SUBTEST_4( matrixSum(MatrixXcf(3, 3)) ); - CALL_SUBTEST_5( matrixSum(MatrixXf(8, 12)) ); - CALL_SUBTEST_6( matrixSum(MatrixXi(8, 12)) ); - } - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_5( vectorSum(VectorXf(5)) ); - CALL_SUBTEST_7( vectorSum(VectorXd(10)) ); - CALL_SUBTEST_5( vectorSum(VectorXf(33)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_svd.cpp b/external/eigen3/test/eigen2/eigen2_svd.cpp deleted file mode 100644 index d4689a5..0000000 --- a/external/eigen3/test/eigen2/eigen2_svd.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include - -template void svd(const MatrixType& m) -{ - /* this test covers the following files: - SVD.h - */ - int rows = m.rows(); - int cols = m.cols(); - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - MatrixType a = MatrixType::Random(rows,cols); - Matrix b = - Matrix::Random(rows,1); - Matrix x(cols,1), x2(cols,1); - - RealScalar largerEps = test_precision(); - if (ei_is_same_type::ret) - largerEps = 1e-3f; - - { - SVD svd(a); - MatrixType sigma = MatrixType::Zero(rows,cols); - MatrixType matU = MatrixType::Zero(rows,rows); - sigma.block(0,0,cols,cols) = svd.singularValues().asDiagonal(); - matU.block(0,0,rows,cols) = svd.matrixU(); - VERIFY_IS_APPROX(a, matU * sigma * svd.matrixV().transpose()); - } - - - if (rows==cols) - { - if (ei_is_same_type::ret) - { - MatrixType a1 = MatrixType::Random(rows,cols); - a += a * a.adjoint() + a1 * a1.adjoint(); - } - SVD svd(a); - svd.solve(b, &x); - VERIFY_IS_APPROX(a * x,b); - } - - - if(rows==cols) - { - SVD svd(a); - MatrixType unitary, positive; - svd.computeUnitaryPositive(&unitary, &positive); - VERIFY_IS_APPROX(unitary * unitary.adjoint(), MatrixType::Identity(unitary.rows(),unitary.rows())); - VERIFY_IS_APPROX(positive, positive.adjoint()); - for(int i = 0; i < rows; i++) VERIFY(positive.diagonal()[i] >= 0); // cheap necessary (not sufficient) condition for positivity - VERIFY_IS_APPROX(unitary*positive, a); - - svd.computePositiveUnitary(&positive, &unitary); - VERIFY_IS_APPROX(unitary * unitary.adjoint(), MatrixType::Identity(unitary.rows(),unitary.rows())); - VERIFY_IS_APPROX(positive, positive.adjoint()); - for(int i = 0; i < rows; i++) VERIFY(positive.diagonal()[i] >= 0); // cheap necessary (not sufficient) condition for positivity - VERIFY_IS_APPROX(positive*unitary, a); - } -} - -void test_eigen2_svd() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( svd(Matrix3f()) ); - CALL_SUBTEST_2( svd(Matrix4d()) ); - CALL_SUBTEST_3( svd(MatrixXf(7,7)) ); - CALL_SUBTEST_4( svd(MatrixXd(14,7)) ); - // complex are not implemented yet -// CALL_SUBTEST( svd(MatrixXcd(6,6)) ); -// CALL_SUBTEST( svd(MatrixXcf(3,3)) ); - SVD s; - MatrixXf m = MatrixXf::Random(10,1); - s.compute(m); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_swap.cpp b/external/eigen3/test/eigen2/eigen2_swap.cpp deleted file mode 100644 index f3a8846..0000000 --- a/external/eigen3/test/eigen2/eigen2_swap.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2009 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_NO_STATIC_ASSERT -#include "main.h" - -template -struct other_matrix_type -{ - typedef int type; -}; - -template -struct other_matrix_type > -{ - typedef Matrix<_Scalar, _Rows, _Cols, _Options^RowMajor, _MaxRows, _MaxCols> type; -}; - -template void swap(const MatrixType& m) -{ - typedef typename other_matrix_type::type OtherMatrixType; - typedef typename MatrixType::Scalar Scalar; - - ei_assert((!ei_is_same_type::ret)); - int rows = m.rows(); - int cols = m.cols(); - - // construct 3 matrix guaranteed to be distinct - MatrixType m1 = MatrixType::Random(rows,cols); - MatrixType m2 = MatrixType::Random(rows,cols) + Scalar(100) * MatrixType::Identity(rows,cols); - OtherMatrixType m3 = OtherMatrixType::Random(rows,cols) + Scalar(200) * OtherMatrixType::Identity(rows,cols); - - MatrixType m1_copy = m1; - MatrixType m2_copy = m2; - OtherMatrixType m3_copy = m3; - - // test swapping 2 matrices of same type - m1.swap(m2); - VERIFY_IS_APPROX(m1,m2_copy); - VERIFY_IS_APPROX(m2,m1_copy); - m1 = m1_copy; - m2 = m2_copy; - - // test swapping 2 matrices of different types - m1.swap(m3); - VERIFY_IS_APPROX(m1,m3_copy); - VERIFY_IS_APPROX(m3,m1_copy); - m1 = m1_copy; - m3 = m3_copy; - - // test swapping matrix with expression - m1.swap(m2.block(0,0,rows,cols)); - VERIFY_IS_APPROX(m1,m2_copy); - VERIFY_IS_APPROX(m2,m1_copy); - m1 = m1_copy; - m2 = m2_copy; - - // test swapping two expressions of different types - m1.transpose().swap(m3.transpose()); - VERIFY_IS_APPROX(m1,m3_copy); - VERIFY_IS_APPROX(m3,m1_copy); - m1 = m1_copy; - m3 = m3_copy; - - // test assertion on mismatching size -- matrix case - VERIFY_RAISES_ASSERT(m1.swap(m1.row(0))); - // test assertion on mismatching size -- xpr case - VERIFY_RAISES_ASSERT(m1.row(0).swap(m1)); -} - -void test_eigen2_swap() -{ - CALL_SUBTEST_1( swap(Matrix3f()) ); // fixed size, no vectorization - CALL_SUBTEST_1( swap(Matrix4d()) ); // fixed size, possible vectorization - CALL_SUBTEST_1( swap(MatrixXd(3,3)) ); // dyn size, no vectorization - CALL_SUBTEST_1( swap(MatrixXf(30,30)) ); // dyn size, possible vectorization -} diff --git a/external/eigen3/test/eigen2/eigen2_triangular.cpp b/external/eigen3/test/eigen2/eigen2_triangular.cpp deleted file mode 100644 index 6f17b7d..0000000 --- a/external/eigen3/test/eigen2/eigen2_triangular.cpp +++ /dev/null @@ -1,148 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void triangular(const MatrixType& m) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - - RealScalar largerEps = 10*test_precision(); - - int rows = m.rows(); - int cols = m.cols(); - - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols), - m3(rows, cols), - m4(rows, cols), - r1(rows, cols), - r2(rows, cols); - - MatrixType m1up = m1.template part(); - MatrixType m2up = m2.template part(); - - if (rows*cols>1) - { - VERIFY(m1up.isUpperTriangular()); - VERIFY(m2up.transpose().isLowerTriangular()); - VERIFY(!m2.isLowerTriangular()); - } - -// VERIFY_IS_APPROX(m1up.transpose() * m2, m1.upper().transpose().lower() * m2); - - // test overloaded operator+= - r1.setZero(); - r2.setZero(); - r1.template part() += m1; - r2 += m1up; - VERIFY_IS_APPROX(r1,r2); - - // test overloaded operator= - m1.setZero(); - m1.template part() = (m2.transpose() * m2).lazy(); - m3 = m2.transpose() * m2; - VERIFY_IS_APPROX(m3.template part().transpose(), m1); - - // test overloaded operator= - m1.setZero(); - m1.template part() = (m2.transpose() * m2).lazy(); - VERIFY_IS_APPROX(m3.template part(), m1); - - VERIFY_IS_APPROX(m3.template part(), m3.diagonal().asDiagonal()); - - m1 = MatrixType::Random(rows, cols); - for (int i=0; i(); - - Transpose trm4(m4); - // test back and forward subsitution - m3 = m1.template part(); - VERIFY(m3.template marked().solveTriangular(m3).cwise().abs().isIdentity(test_precision())); - VERIFY(m3.transpose().template marked() - .solveTriangular(m3.transpose()).cwise().abs().isIdentity(test_precision())); - // check M * inv(L) using in place API - m4 = m3; - m3.transpose().template marked().solveTriangularInPlace(trm4); - VERIFY(m4.cwise().abs().isIdentity(test_precision())); - - m3 = m1.template part(); - VERIFY(m3.template marked().solveTriangular(m3).cwise().abs().isIdentity(test_precision())); - VERIFY(m3.transpose().template marked() - .solveTriangular(m3.transpose()).cwise().abs().isIdentity(test_precision())); - // check M * inv(U) using in place API - m4 = m3; - m3.transpose().template marked().solveTriangularInPlace(trm4); - VERIFY(m4.cwise().abs().isIdentity(test_precision())); - - m3 = m1.template part(); - VERIFY(m2.isApprox(m3 * (m3.template marked().solveTriangular(m2)), largerEps)); - m3 = m1.template part(); - VERIFY(m2.isApprox(m3 * (m3.template marked().solveTriangular(m2)), largerEps)); - - VERIFY((m1.template part() * m2.template part()).isUpperTriangular()); - - // test swap - m1.setOnes(); - m2.setZero(); - m2.template part().swap(m1); - m3.setZero(); - m3.template part().setOnes(); - VERIFY_IS_APPROX(m2,m3); - -} - -void selfadjoint() -{ - Matrix2i m; - m << 1, 2, - 3, 4; - - Matrix2i m1 = Matrix2i::Zero(); - m1.part() = m; - Matrix2i ref1; - ref1 << 1, 2, - 2, 4; - VERIFY(m1 == ref1); - - Matrix2i m2 = Matrix2i::Zero(); - m2.part() = m.part(); - Matrix2i ref2; - ref2 << 1, 2, - 2, 4; - VERIFY(m2 == ref2); - - Matrix2i m3 = Matrix2i::Zero(); - m3.part() = m.part(); - Matrix2i ref3; - ref3 << 1, 0, - 0, 4; - VERIFY(m3 == ref3); - - // example inspired from bug 159 - int array[] = {1, 2, 3, 4}; - Matrix2i::Map(array).part() = Matrix2i::Random().part(); - - std::cout << "hello\n" << array << std::endl; -} - -void test_eigen2_triangular() -{ - CALL_SUBTEST_8( selfadjoint() ); - for(int i = 0; i < g_repeat ; i++) { - CALL_SUBTEST_1( triangular(Matrix()) ); - CALL_SUBTEST_2( triangular(Matrix()) ); - CALL_SUBTEST_3( triangular(Matrix3d()) ); - CALL_SUBTEST_4( triangular(MatrixXcf(4, 4)) ); - CALL_SUBTEST_5( triangular(Matrix,8, 8>()) ); - CALL_SUBTEST_6( triangular(MatrixXd(17,17)) ); - CALL_SUBTEST_7( triangular(Matrix(5, 5)) ); - } -} diff --git a/external/eigen3/test/eigen2/eigen2_unalignedassert.cpp b/external/eigen3/test/eigen2/eigen2_unalignedassert.cpp deleted file mode 100644 index d10b6f5..0000000 --- a/external/eigen3/test/eigen2/eigen2_unalignedassert.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -struct Good1 -{ - MatrixXd m; // good: m will allocate its own array, taking care of alignment. - Good1() : m(20,20) {} -}; - -struct Good2 -{ - Matrix3d m; // good: m's size isn't a multiple of 16 bytes, so m doesn't have to be aligned -}; - -struct Good3 -{ - Vector2f m; // good: same reason -}; - -struct Bad4 -{ - Vector2d m; // bad: sizeof(m)%16==0 so alignment is required -}; - -struct Bad5 -{ - Matrix m; // bad: same reason -}; - -struct Bad6 -{ - Matrix m; // bad: same reason -}; - -struct Good7 -{ - EIGEN_MAKE_ALIGNED_OPERATOR_NEW - Vector2d m; - float f; // make the struct have sizeof%16!=0 to make it a little more tricky when we allow an array of 2 such objects -}; - -struct Good8 -{ - EIGEN_MAKE_ALIGNED_OPERATOR_NEW - float f; // try the f at first -- the EIGEN_ALIGN_128 attribute of m should make that still work - Matrix4f m; -}; - -struct Good9 -{ - Matrix m; // good: no alignment requested - float f; -}; - -template struct Depends -{ - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(Align) - Vector2d m; - float f; -}; - -template -void check_unalignedassert_good() -{ - T *x, *y; - x = new T; - delete x; - y = new T[2]; - delete[] y; -} - -#if EIGEN_ARCH_WANTS_ALIGNMENT -template -void check_unalignedassert_bad() -{ - float buf[sizeof(T)+16]; - float *unaligned = buf; - while((reinterpret_cast(unaligned)&0xf)==0) ++unaligned; // make sure unaligned is really unaligned - T *x = ::new(static_cast(unaligned)) T; - x->~T(); -} -#endif - -void unalignedassert() -{ - check_unalignedassert_good(); - check_unalignedassert_good(); - check_unalignedassert_good(); -#if EIGEN_ARCH_WANTS_ALIGNMENT - VERIFY_RAISES_ASSERT(check_unalignedassert_bad()); - VERIFY_RAISES_ASSERT(check_unalignedassert_bad()); - VERIFY_RAISES_ASSERT(check_unalignedassert_bad()); -#endif - - check_unalignedassert_good(); - check_unalignedassert_good(); - check_unalignedassert_good(); - check_unalignedassert_good >(); - -#if EIGEN_ARCH_WANTS_ALIGNMENT - VERIFY_RAISES_ASSERT(check_unalignedassert_bad >()); -#endif -} - -void test_eigen2_unalignedassert() -{ - CALL_SUBTEST(unalignedassert()); -} diff --git a/external/eigen3/test/eigen2/eigen2_visitor.cpp b/external/eigen3/test/eigen2/eigen2_visitor.cpp deleted file mode 100644 index 4781991..0000000 --- a/external/eigen3/test/eigen2/eigen2_visitor.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" - -template void matrixVisitor(const MatrixType& p) -{ - typedef typename MatrixType::Scalar Scalar; - - int rows = p.rows(); - int cols = p.cols(); - - // construct a random matrix where all coefficients are different - MatrixType m; - m = MatrixType::Random(rows, cols); - for(int i = 0; i < m.size(); i++) - for(int i2 = 0; i2 < i; i2++) - while(m(i) == m(i2)) // yes, == - m(i) = ei_random(); - - Scalar minc = Scalar(1000), maxc = Scalar(-1000); - int minrow=0,mincol=0,maxrow=0,maxcol=0; - for(int j = 0; j < cols; j++) - for(int i = 0; i < rows; i++) - { - if(m(i,j) < minc) - { - minc = m(i,j); - minrow = i; - mincol = j; - } - if(m(i,j) > maxc) - { - maxc = m(i,j); - maxrow = i; - maxcol = j; - } - } - int eigen_minrow, eigen_mincol, eigen_maxrow, eigen_maxcol; - Scalar eigen_minc, eigen_maxc; - eigen_minc = m.minCoeff(&eigen_minrow,&eigen_mincol); - eigen_maxc = m.maxCoeff(&eigen_maxrow,&eigen_maxcol); - VERIFY(minrow == eigen_minrow); - VERIFY(maxrow == eigen_maxrow); - VERIFY(mincol == eigen_mincol); - VERIFY(maxcol == eigen_maxcol); - VERIFY_IS_APPROX(minc, eigen_minc); - VERIFY_IS_APPROX(maxc, eigen_maxc); - VERIFY_IS_APPROX(minc, m.minCoeff()); - VERIFY_IS_APPROX(maxc, m.maxCoeff()); -} - -template void vectorVisitor(const VectorType& w) -{ - typedef typename VectorType::Scalar Scalar; - - int size = w.size(); - - // construct a random vector where all coefficients are different - VectorType v; - v = VectorType::Random(size); - for(int i = 0; i < size; i++) - for(int i2 = 0; i2 < i; i2++) - while(v(i) == v(i2)) // yes, == - v(i) = ei_random(); - - Scalar minc = Scalar(1000), maxc = Scalar(-1000); - int minidx=0,maxidx=0; - for(int i = 0; i < size; i++) - { - if(v(i) < minc) - { - minc = v(i); - minidx = i; - } - if(v(i) > maxc) - { - maxc = v(i); - maxidx = i; - } - } - int eigen_minidx, eigen_maxidx; - Scalar eigen_minc, eigen_maxc; - eigen_minc = v.minCoeff(&eigen_minidx); - eigen_maxc = v.maxCoeff(&eigen_maxidx); - VERIFY(minidx == eigen_minidx); - VERIFY(maxidx == eigen_maxidx); - VERIFY_IS_APPROX(minc, eigen_minc); - VERIFY_IS_APPROX(maxc, eigen_maxc); - VERIFY_IS_APPROX(minc, v.minCoeff()); - VERIFY_IS_APPROX(maxc, v.maxCoeff()); -} - -void test_eigen2_visitor() -{ - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( matrixVisitor(Matrix()) ); - CALL_SUBTEST_2( matrixVisitor(Matrix2f()) ); - CALL_SUBTEST_3( matrixVisitor(Matrix4d()) ); - CALL_SUBTEST_4( matrixVisitor(MatrixXd(8, 12)) ); - CALL_SUBTEST_5( matrixVisitor(Matrix(20, 20)) ); - CALL_SUBTEST_6( matrixVisitor(MatrixXi(8, 12)) ); - } - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_7( vectorVisitor(Vector4f()) ); - CALL_SUBTEST_4( vectorVisitor(VectorXd(10)) ); - CALL_SUBTEST_4( vectorVisitor(RowVectorXd(10)) ); - CALL_SUBTEST_8( vectorVisitor(VectorXf(33)) ); - } -} diff --git a/external/eigen3/test/eigen2/gsl_helper.h b/external/eigen3/test/eigen2/gsl_helper.h deleted file mode 100644 index d1d8545..0000000 --- a/external/eigen3/test/eigen2/gsl_helper.h +++ /dev/null @@ -1,175 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_GSL_HELPER -#define EIGEN_GSL_HELPER - -#include - -#include -#include -#include -#include -#include -#include - -namespace Eigen { - -template::IsComplex> struct GslTraits -{ - typedef gsl_matrix* Matrix; - typedef gsl_vector* Vector; - static Matrix createMatrix(int rows, int cols) { return gsl_matrix_alloc(rows,cols); } - static Vector createVector(int size) { return gsl_vector_alloc(size); } - static void free(Matrix& m) { gsl_matrix_free(m); m=0; } - static void free(Vector& m) { gsl_vector_free(m); m=0; } - static void prod(const Matrix& m, const Vector& v, Vector& x) { gsl_blas_dgemv(CblasNoTrans,1,m,v,0,x); } - static void cholesky(Matrix& m) { gsl_linalg_cholesky_decomp(m); } - static void cholesky_solve(const Matrix& m, const Vector& b, Vector& x) { gsl_linalg_cholesky_solve(m,b,x); } - static void eigen_symm(const Matrix& m, Vector& eval, Matrix& evec) - { - gsl_eigen_symmv_workspace * w = gsl_eigen_symmv_alloc(m->size1); - Matrix a = createMatrix(m->size1, m->size2); - gsl_matrix_memcpy(a, m); - gsl_eigen_symmv(a,eval,evec,w); - gsl_eigen_symmv_sort(eval, evec, GSL_EIGEN_SORT_VAL_ASC); - gsl_eigen_symmv_free(w); - free(a); - } - static void eigen_symm_gen(const Matrix& m, const Matrix& _b, Vector& eval, Matrix& evec) - { - gsl_eigen_gensymmv_workspace * w = gsl_eigen_gensymmv_alloc(m->size1); - Matrix a = createMatrix(m->size1, m->size2); - Matrix b = createMatrix(_b->size1, _b->size2); - gsl_matrix_memcpy(a, m); - gsl_matrix_memcpy(b, _b); - gsl_eigen_gensymmv(a,b,eval,evec,w); - gsl_eigen_symmv_sort(eval, evec, GSL_EIGEN_SORT_VAL_ASC); - gsl_eigen_gensymmv_free(w); - free(a); - } -}; - -template struct GslTraits -{ - typedef gsl_matrix_complex* Matrix; - typedef gsl_vector_complex* Vector; - static Matrix createMatrix(int rows, int cols) { return gsl_matrix_complex_alloc(rows,cols); } - static Vector createVector(int size) { return gsl_vector_complex_alloc(size); } - static void free(Matrix& m) { gsl_matrix_complex_free(m); m=0; } - static void free(Vector& m) { gsl_vector_complex_free(m); m=0; } - static void cholesky(Matrix& m) { gsl_linalg_complex_cholesky_decomp(m); } - static void cholesky_solve(const Matrix& m, const Vector& b, Vector& x) { gsl_linalg_complex_cholesky_solve(m,b,x); } - static void prod(const Matrix& m, const Vector& v, Vector& x) - { gsl_blas_zgemv(CblasNoTrans,gsl_complex_rect(1,0),m,v,gsl_complex_rect(0,0),x); } - static void eigen_symm(const Matrix& m, gsl_vector* &eval, Matrix& evec) - { - gsl_eigen_hermv_workspace * w = gsl_eigen_hermv_alloc(m->size1); - Matrix a = createMatrix(m->size1, m->size2); - gsl_matrix_complex_memcpy(a, m); - gsl_eigen_hermv(a,eval,evec,w); - gsl_eigen_hermv_sort(eval, evec, GSL_EIGEN_SORT_VAL_ASC); - gsl_eigen_hermv_free(w); - free(a); - } - static void eigen_symm_gen(const Matrix& m, const Matrix& _b, gsl_vector* &eval, Matrix& evec) - { - gsl_eigen_genhermv_workspace * w = gsl_eigen_genhermv_alloc(m->size1); - Matrix a = createMatrix(m->size1, m->size2); - Matrix b = createMatrix(_b->size1, _b->size2); - gsl_matrix_complex_memcpy(a, m); - gsl_matrix_complex_memcpy(b, _b); - gsl_eigen_genhermv(a,b,eval,evec,w); - gsl_eigen_hermv_sort(eval, evec, GSL_EIGEN_SORT_VAL_ASC); - gsl_eigen_genhermv_free(w); - free(a); - } -}; - -template -void convert(const MatrixType& m, gsl_matrix* &res) -{ -// if (res) -// gsl_matrix_free(res); - res = gsl_matrix_alloc(m.rows(), m.cols()); - for (int i=0 ; i -void convert(const gsl_matrix* m, MatrixType& res) -{ - res.resize(int(m->size1), int(m->size2)); - for (int i=0 ; i -void convert(const VectorType& m, gsl_vector* &res) -{ - if (res) gsl_vector_free(res); - res = gsl_vector_alloc(m.size()); - for (int i=0 ; i -void convert(const gsl_vector* m, VectorType& res) -{ - res.resize (m->size); - for (int i=0 ; i -void convert(const MatrixType& m, gsl_matrix_complex* &res) -{ - res = gsl_matrix_complex_alloc(m.rows(), m.cols()); - for (int i=0 ; i -void convert(const gsl_matrix_complex* m, MatrixType& res) -{ - res.resize(int(m->size1), int(m->size2)); - for (int i=0 ; i -void convert(const VectorType& m, gsl_vector_complex* &res) -{ - res = gsl_vector_complex_alloc(m.size()); - for (int i=0 ; i -void convert(const gsl_vector_complex* m, VectorType& res) -{ - res.resize(m->size); - for (int i=0 ; i -// Copyright (C) 2008 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include -#include -#include -#include -#include - -#ifndef EIGEN_TEST_FUNC -#error EIGEN_TEST_FUNC must be defined -#endif - -#define DEFAULT_REPEAT 10 - -namespace Eigen -{ - static std::vector g_test_stack; - static int g_repeat; -} - -#define EI_PP_MAKE_STRING2(S) #S -#define EI_PP_MAKE_STRING(S) EI_PP_MAKE_STRING2(S) - -#define EI_PP_CAT2(a,b) a ## b -#define EI_PP_CAT(a,b) EI_PP_CAT2(a,b) - -#ifndef EIGEN_NO_ASSERTION_CHECKING - - namespace Eigen - { - static const bool should_raise_an_assert = false; - - // Used to avoid to raise two exceptions at a time in which - // case the exception is not properly caught. - // This may happen when a second exceptions is raise in a destructor. - static bool no_more_assert = false; - - struct eigen_assert_exception - { - eigen_assert_exception(void) {} - ~eigen_assert_exception() { Eigen::no_more_assert = false; } - }; - } - - // If EIGEN_DEBUG_ASSERTS is defined and if no assertion is raised while - // one should have been, then the list of excecuted assertions is printed out. - // - // EIGEN_DEBUG_ASSERTS is not enabled by default as it - // significantly increases the compilation time - // and might even introduce side effects that would hide - // some memory errors. - #ifdef EIGEN_DEBUG_ASSERTS - - namespace Eigen - { - static bool ei_push_assert = false; - static std::vector eigen_assert_list; - } - - #define eigen_assert(a) \ - if( (!(a)) && (!no_more_assert) ) \ - { \ - Eigen::no_more_assert = true; \ - throw Eigen::eigen_assert_exception(); \ - } \ - else if (Eigen::ei_push_assert) \ - { \ - eigen_assert_list.push_back(std::string(EI_PP_MAKE_STRING(__FILE__)" ("EI_PP_MAKE_STRING(__LINE__)") : "#a) ); \ - } - - #define VERIFY_RAISES_ASSERT(a) \ - { \ - Eigen::no_more_assert = false; \ - try { \ - Eigen::eigen_assert_list.clear(); \ - Eigen::ei_push_assert = true; \ - a; \ - Eigen::ei_push_assert = false; \ - std::cerr << "One of the following asserts should have been raised:\n"; \ - for (uint ai=0 ; ai - - -#define VERIFY(a) do { if (!(a)) { \ - std::cerr << "Test " << g_test_stack.back() << " failed in "EI_PP_MAKE_STRING(__FILE__) << " (" << EI_PP_MAKE_STRING(__LINE__) << ")" \ - << std::endl << " " << EI_PP_MAKE_STRING(a) << std::endl << std::endl; \ - abort(); \ - } } while (0) - -#define VERIFY_IS_APPROX(a, b) VERIFY(test_ei_isApprox(a, b)) -#define VERIFY_IS_NOT_APPROX(a, b) VERIFY(!test_ei_isApprox(a, b)) -#define VERIFY_IS_MUCH_SMALLER_THAN(a, b) VERIFY(test_ei_isMuchSmallerThan(a, b)) -#define VERIFY_IS_NOT_MUCH_SMALLER_THAN(a, b) VERIFY(!test_ei_isMuchSmallerThan(a, b)) -#define VERIFY_IS_APPROX_OR_LESS_THAN(a, b) VERIFY(test_ei_isApproxOrLessThan(a, b)) -#define VERIFY_IS_NOT_APPROX_OR_LESS_THAN(a, b) VERIFY(!test_ei_isApproxOrLessThan(a, b)) - -#define CALL_SUBTEST(FUNC) do { \ - g_test_stack.push_back(EI_PP_MAKE_STRING(FUNC)); \ - FUNC; \ - g_test_stack.pop_back(); \ - } while (0) - -namespace Eigen { - -template inline typename NumTraits::Real test_precision(); -template<> inline int test_precision() { return 0; } -template<> inline float test_precision() { return 1e-3f; } -template<> inline double test_precision() { return 1e-6; } -template<> inline float test_precision >() { return test_precision(); } -template<> inline double test_precision >() { return test_precision(); } -template<> inline long double test_precision() { return 1e-6; } - -inline bool test_ei_isApprox(const int& a, const int& b) -{ return ei_isApprox(a, b, test_precision()); } -inline bool test_ei_isMuchSmallerThan(const int& a, const int& b) -{ return ei_isMuchSmallerThan(a, b, test_precision()); } -inline bool test_ei_isApproxOrLessThan(const int& a, const int& b) -{ return ei_isApproxOrLessThan(a, b, test_precision()); } - -inline bool test_ei_isApprox(const float& a, const float& b) -{ return ei_isApprox(a, b, test_precision()); } -inline bool test_ei_isMuchSmallerThan(const float& a, const float& b) -{ return ei_isMuchSmallerThan(a, b, test_precision()); } -inline bool test_ei_isApproxOrLessThan(const float& a, const float& b) -{ return ei_isApproxOrLessThan(a, b, test_precision()); } - -inline bool test_ei_isApprox(const double& a, const double& b) -{ return ei_isApprox(a, b, test_precision()); } -inline bool test_ei_isMuchSmallerThan(const double& a, const double& b) -{ return ei_isMuchSmallerThan(a, b, test_precision()); } -inline bool test_ei_isApproxOrLessThan(const double& a, const double& b) -{ return ei_isApproxOrLessThan(a, b, test_precision()); } - -inline bool test_ei_isApprox(const std::complex& a, const std::complex& b) -{ return ei_isApprox(a, b, test_precision >()); } -inline bool test_ei_isMuchSmallerThan(const std::complex& a, const std::complex& b) -{ return ei_isMuchSmallerThan(a, b, test_precision >()); } - -inline bool test_ei_isApprox(const std::complex& a, const std::complex& b) -{ return ei_isApprox(a, b, test_precision >()); } -inline bool test_ei_isMuchSmallerThan(const std::complex& a, const std::complex& b) -{ return ei_isMuchSmallerThan(a, b, test_precision >()); } - -inline bool test_ei_isApprox(const long double& a, const long double& b) -{ return ei_isApprox(a, b, test_precision()); } -inline bool test_ei_isMuchSmallerThan(const long double& a, const long double& b) -{ return ei_isMuchSmallerThan(a, b, test_precision()); } -inline bool test_ei_isApproxOrLessThan(const long double& a, const long double& b) -{ return ei_isApproxOrLessThan(a, b, test_precision()); } - -template -inline bool test_ei_isApprox(const Type1& a, const Type2& b) -{ - return a.isApprox(b, test_precision()); -} - -template -inline bool test_ei_isMuchSmallerThan(const MatrixBase& m1, - const MatrixBase& m2) -{ - return m1.isMuchSmallerThan(m2, test_precision::Scalar>()); -} - -template -inline bool test_ei_isMuchSmallerThan(const MatrixBase& m, - const typename NumTraits::Scalar>::Real& s) -{ - return m.isMuchSmallerThan(s, test_precision::Scalar>()); -} - -} // end namespace Eigen - -template struct GetDifferentType; - -template<> struct GetDifferentType { typedef double type; }; -template<> struct GetDifferentType { typedef float type; }; -template struct GetDifferentType > -{ typedef std::complex::type> type; }; - -// forward declaration of the main test function -void EI_PP_CAT(test_,EIGEN_TEST_FUNC)(); - -using namespace Eigen; - -#ifdef EIGEN_TEST_PART_1 -#define CALL_SUBTEST_1(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_1(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_2 -#define CALL_SUBTEST_2(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_2(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_3 -#define CALL_SUBTEST_3(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_3(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_4 -#define CALL_SUBTEST_4(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_4(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_5 -#define CALL_SUBTEST_5(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_5(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_6 -#define CALL_SUBTEST_6(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_6(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_7 -#define CALL_SUBTEST_7(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_7(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_8 -#define CALL_SUBTEST_8(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_8(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_9 -#define CALL_SUBTEST_9(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_9(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_10 -#define CALL_SUBTEST_10(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_10(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_11 -#define CALL_SUBTEST_11(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_11(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_12 -#define CALL_SUBTEST_12(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_12(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_13 -#define CALL_SUBTEST_13(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_13(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_14 -#define CALL_SUBTEST_14(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_14(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_15 -#define CALL_SUBTEST_15(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_15(FUNC) -#endif - -#ifdef EIGEN_TEST_PART_16 -#define CALL_SUBTEST_16(FUNC) CALL_SUBTEST(FUNC) -#else -#define CALL_SUBTEST_16(FUNC) -#endif - - - -int main(int argc, char *argv[]) -{ - bool has_set_repeat = false; - bool has_set_seed = false; - bool need_help = false; - unsigned int seed = 0; - int repeat = DEFAULT_REPEAT; - - for(int i = 1; i < argc; i++) - { - if(argv[i][0] == 'r') - { - if(has_set_repeat) - { - std::cout << "Argument " << argv[i] << " conflicting with a former argument" << std::endl; - return 1; - } - repeat = std::atoi(argv[i]+1); - has_set_repeat = true; - if(repeat <= 0) - { - std::cout << "Invalid \'repeat\' value " << argv[i]+1 << std::endl; - return 1; - } - } - else if(argv[i][0] == 's') - { - if(has_set_seed) - { - std::cout << "Argument " << argv[i] << " conflicting with a former argument" << std::endl; - return 1; - } - seed = int(std::strtoul(argv[i]+1, 0, 10)); - has_set_seed = true; - bool ok = seed!=0; - if(!ok) - { - std::cout << "Invalid \'seed\' value " << argv[i]+1 << std::endl; - return 1; - } - } - else - { - need_help = true; - } - } - - if(need_help) - { - std::cout << "This test application takes the following optional arguments:" << std::endl; - std::cout << " rN Repeat each test N times (default: " << DEFAULT_REPEAT << ")" << std::endl; - std::cout << " sN Use N as seed for random numbers (default: based on current time)" << std::endl; - return 1; - } - - if(!has_set_seed) seed = (unsigned int) std::time(NULL); - if(!has_set_repeat) repeat = DEFAULT_REPEAT; - - std::cout << "Initializing random number generator with seed " << seed << std::endl; - std::srand(seed); - std::cout << "Repeating each test " << repeat << " times" << std::endl; - - Eigen::g_repeat = repeat; - Eigen::g_test_stack.push_back(EI_PP_MAKE_STRING(EIGEN_TEST_FUNC)); - - EI_PP_CAT(test_,EIGEN_TEST_FUNC)(); - return 0; -} - - - diff --git a/external/eigen3/test/eigen2/product.h b/external/eigen3/test/eigen2/product.h deleted file mode 100644 index ae1b4ba..0000000 --- a/external/eigen3/test/eigen2/product.h +++ /dev/null @@ -1,129 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2006-2008 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "main.h" -#include -#include - -template -bool areNotApprox(const MatrixBase& m1, const MatrixBase& m2, typename Derived1::RealScalar epsilon = precision()) -{ - return !((m1-m2).cwise().abs2().maxCoeff() < epsilon * epsilon - * std::max(m1.cwise().abs2().maxCoeff(), m2.cwise().abs2().maxCoeff())); -} - -template void product(const MatrixType& m) -{ - /* this test covers the following files: - Identity.h Product.h - */ - - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::FloatingPoint FloatingPoint; - typedef Matrix RowVectorType; - typedef Matrix ColVectorType; - typedef Matrix RowSquareMatrixType; - typedef Matrix ColSquareMatrixType; - typedef Matrix OtherMajorMatrixType; - - int rows = m.rows(); - int cols = m.cols(); - - // this test relies a lot on Random.h, and there's not much more that we can do - // to test it, hence I consider that we will have tested Random.h - MatrixType m1 = MatrixType::Random(rows, cols), - m2 = MatrixType::Random(rows, cols), - m3(rows, cols); - RowSquareMatrixType - identity = RowSquareMatrixType::Identity(rows, rows), - square = RowSquareMatrixType::Random(rows, rows), - res = RowSquareMatrixType::Random(rows, rows); - ColSquareMatrixType - square2 = ColSquareMatrixType::Random(cols, cols), - res2 = ColSquareMatrixType::Random(cols, cols); - RowVectorType v1 = RowVectorType::Random(rows); - ColVectorType vc2 = ColVectorType::Random(cols), vcres(cols); - OtherMajorMatrixType tm1 = m1; - - Scalar s1 = ei_random(); - - int r = ei_random(0, rows-1), - c = ei_random(0, cols-1); - - // begin testing Product.h: only associativity for now - // (we use Transpose.h but this doesn't count as a test for it) - - VERIFY_IS_APPROX((m1*m1.transpose())*m2, m1*(m1.transpose()*m2)); - m3 = m1; - m3 *= m1.transpose() * m2; - VERIFY_IS_APPROX(m3, m1 * (m1.transpose()*m2)); - VERIFY_IS_APPROX(m3, m1.lazy() * (m1.transpose()*m2)); - - // continue testing Product.h: distributivity - VERIFY_IS_APPROX(square*(m1 + m2), square*m1+square*m2); - VERIFY_IS_APPROX(square*(m1 - m2), square*m1-square*m2); - - // continue testing Product.h: compatibility with ScalarMultiple.h - VERIFY_IS_APPROX(s1*(square*m1), (s1*square)*m1); - VERIFY_IS_APPROX(s1*(square*m1), square*(m1*s1)); - - // again, test operator() to check const-qualification - s1 += (square.lazy() * m1)(r,c); - - // test Product.h together with Identity.h - VERIFY_IS_APPROX(v1, identity*v1); - VERIFY_IS_APPROX(v1.transpose(), v1.transpose() * identity); - // again, test operator() to check const-qualification - VERIFY_IS_APPROX(MatrixType::Identity(rows, cols)(r,c), static_cast(r==c)); - - if (rows!=cols) - VERIFY_RAISES_ASSERT(m3 = m1*m1); - - // test the previous tests were not screwed up because operator* returns 0 - // (we use the more accurate default epsilon) - if (NumTraits::HasFloatingPoint && std::min(rows,cols)>1) - { - VERIFY(areNotApprox(m1.transpose()*m2,m2.transpose()*m1)); - } - - // test optimized operator+= path - res = square; - res += (m1 * m2.transpose()).lazy(); - VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); - if (NumTraits::HasFloatingPoint && std::min(rows,cols)>1) - { - VERIFY(areNotApprox(res,square + m2 * m1.transpose())); - } - vcres = vc2; - vcres += (m1.transpose() * v1).lazy(); - VERIFY_IS_APPROX(vcres, vc2 + m1.transpose() * v1); - tm1 = m1; - VERIFY_IS_APPROX(tm1.transpose() * v1, m1.transpose() * v1); - VERIFY_IS_APPROX(v1.transpose() * tm1, v1.transpose() * m1); - - // test submatrix and matrix/vector product - for (int i=0; i::HasFloatingPoint && std::min(rows,cols)>1) - { - VERIFY(areNotApprox(res2,square2 + m2.transpose() * m1)); - } -} - diff --git a/external/eigen3/test/eigen2/runtest.sh b/external/eigen3/test/eigen2/runtest.sh deleted file mode 100755 index bc693af..0000000 --- a/external/eigen3/test/eigen2/runtest.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -black='\E[30m' -red='\E[31m' -green='\E[32m' -yellow='\E[33m' -blue='\E[34m' -magenta='\E[35m' -cyan='\E[36m' -white='\E[37m' - -if make test_$1 > /dev/null 2> .runtest.log ; then - if ! ./test_$1 r20 > /dev/null 2> .runtest.log ; then - echo -e $red Test $1 failed: $black - echo -e $blue - cat .runtest.log - echo -e $black - exit 1 - else - echo -e $green Test $1 passed$black - fi -else - echo -e $red Build of target $1 failed: $black - echo -e $blue - cat .runtest.log - echo -e $black - exit 1 -fi diff --git a/external/eigen3/test/eigen2/sparse.h b/external/eigen3/test/eigen2/sparse.h deleted file mode 100644 index e12f899..0000000 --- a/external/eigen3/test/eigen2/sparse.h +++ /dev/null @@ -1,154 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2008 Daniel Gomez Ferro -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_TESTSPARSE_H - -#include "main.h" - -#if EIGEN_GNUC_AT_LEAST(4,0) && !defined __ICC -#include -#define EIGEN_UNORDERED_MAP_SUPPORT -namespace std { - using std::tr1::unordered_map; -} -#endif - -#ifdef EIGEN_GOOGLEHASH_SUPPORT - #include -#endif - -#include -#include -#include - -enum { - ForceNonZeroDiag = 1, - MakeLowerTriangular = 2, - MakeUpperTriangular = 4, - ForceRealDiag = 8 -}; - -/* Initializes both a sparse and dense matrix with same random values, - * and a ratio of \a density non zero entries. - * \param flags is a union of ForceNonZeroDiag, MakeLowerTriangular and MakeUpperTriangular - * allowing to control the shape of the matrix. - * \param zeroCoords and nonzeroCoords allows to get the coordinate lists of the non zero, - * and zero coefficients respectively. - */ -template void -initSparse(double density, - Matrix& refMat, - SparseMatrix& sparseMat, - int flags = 0, - std::vector* zeroCoords = 0, - std::vector* nonzeroCoords = 0) -{ - sparseMat.startFill(int(refMat.rows()*refMat.cols()*density)); - for(int j=0; j(0,1) < density) ? ei_random() : Scalar(0); - if ((flags&ForceNonZeroDiag) && (i==j)) - { - v = ei_random()*Scalar(3.); - v = v*v + Scalar(5.); - } - if ((flags & MakeLowerTriangular) && j>i) - v = Scalar(0); - else if ((flags & MakeUpperTriangular) && jpush_back(Vector2i(i,j)); - } - else if (zeroCoords) - { - zeroCoords->push_back(Vector2i(i,j)); - } - refMat(i,j) = v; - } - } - sparseMat.endFill(); -} - -template void -initSparse(double density, - Matrix& refMat, - DynamicSparseMatrix& sparseMat, - int flags = 0, - std::vector* zeroCoords = 0, - std::vector* nonzeroCoords = 0) -{ - sparseMat.startFill(int(refMat.rows()*refMat.cols()*density)); - for(int j=0; j(0,1) < density) ? ei_random() : Scalar(0); - if ((flags&ForceNonZeroDiag) && (i==j)) - { - v = ei_random()*Scalar(3.); - v = v*v + Scalar(5.); - } - if ((flags & MakeLowerTriangular) && j>i) - v = Scalar(0); - else if ((flags & MakeUpperTriangular) && jpush_back(Vector2i(i,j)); - } - else if (zeroCoords) - { - zeroCoords->push_back(Vector2i(i,j)); - } - refMat(i,j) = v; - } - } - sparseMat.endFill(); -} - -template void -initSparse(double density, - Matrix& refVec, - SparseVector& sparseVec, - std::vector* zeroCoords = 0, - std::vector* nonzeroCoords = 0) -{ - sparseVec.reserve(int(refVec.size()*density)); - sparseVec.setZero(); - for(int i=0; i(0,1) < density) ? ei_random() : Scalar(0); - if (v!=Scalar(0)) - { - sparseVec.fill(i) = v; - if (nonzeroCoords) - nonzeroCoords->push_back(i); - } - else if (zeroCoords) - zeroCoords->push_back(i); - refVec[i] = v; - } -} - -#endif // EIGEN_TESTSPARSE_H diff --git a/external/eigen3/test/eigen2/testsuite.cmake b/external/eigen3/test/eigen2/testsuite.cmake deleted file mode 100644 index 12b6bfa..0000000 --- a/external/eigen3/test/eigen2/testsuite.cmake +++ /dev/null @@ -1,197 +0,0 @@ - -#################################################################### -# -# Usage: -# - create a new folder, let's call it cdash -# - in that folder, do: -# ctest -S path/to/eigen2/test/testsuite.cmake[,option1=value1[,option2=value2]] -# -# Options: -# - EIGEN_CXX: compiler, eg.: g++-4.2 -# default: default c++ compiler -# - EIGEN_SITE: eg, INRIA-Bdx_pc-gael, or the name of the contributor, etc. -# default: hostname -# - EIGEN_BUILD_STRING: a string which identify the system/compiler. It should be formed like that: -# --- -# with: -# = opensuse, debian, osx, windows, cygwin, freebsd, solaris, etc. -# = 11.1, XP, vista, leopard, etc. -# = i386, x86_64, ia64, powerpc, etc. -# = gcc-4.3.2, icc-11.0, MSVC-2008, etc. -# - EIGEN_EXPLICIT_VECTORIZATION: novec, SSE2, Altivec -# default: SSE2 for x86_64 systems, novec otherwise -# Its value is automatically appended to EIGEN_BUILD_STRING -# - EIGEN_CMAKE_DIR: path to cmake executable -# - EIGEN_MODE: dashboard model, can be Experimental, Nightly, or Continuous -# default: Nightly -# - EIGEN_WORK_DIR: directory used to download the source files and make the builds -# default: folder which contains this script -# - EIGEN_CMAKE_ARGS: additional arguments passed to cmake -# - CTEST_SOURCE_DIRECTORY: path to eigen's src (use a new and empty folder, not the one you are working on) -# default: /src -# - CTEST_BINARY_DIRECTORY: build directory -# default: /nightly- -# -# Here is an example running several compilers on a linux system: -# #!/bin/bash -# ARCH=`uname -m` -# SITE=`hostname` -# VERSION=opensuse-11.1 -# WORK_DIR=/home/gael/Coding/eigen2/cdash -# # get the last version of the script -# wget http://bitbucket.org/eigen/eigen/raw/tip/test/testsuite.cmake -o $WORK_DIR/testsuite.cmake -# COMMON="ctest -S $WORK_DIR/testsuite.cmake,EIGEN_WORK_DIR=$WORK_DIR,EIGEN_SITE=$SITE,EIGEN_MODE=$1,EIGEN_BUILD_STRING=$OS_VERSION-$ARCH" -# $COMMON-gcc-3.4.6,EIGEN_CXX=g++-3.4 -# $COMMON-gcc-4.0.1,EIGEN_CXX=g++-4.0.1 -# $COMMON-gcc-4.3.2,EIGEN_CXX=g++-4.3,EIGEN_EXPLICIT_VECTORIZATION=novec -# $COMMON-gcc-4.3.2,EIGEN_CXX=g++-4.3,EIGEN_EXPLICIT_VECTORIZATION=SSE2 -# $COMMON-icc-11.0,EIGEN_CXX=icpc -# -#################################################################### - -# process the arguments - -set(ARGLIST ${CTEST_SCRIPT_ARG}) -while(${ARGLIST} MATCHES ".+.*") - - # pick first - string(REGEX MATCH "([^,]*)(,.*)?" DUMMY ${ARGLIST}) - SET(TOP ${CMAKE_MATCH_1}) - - # remove first - string(REGEX MATCHALL "[^,]*,(.*)" DUMMY ${ARGLIST}) - SET(ARGLIST ${CMAKE_MATCH_1}) - - # decompose as a pair key=value - string(REGEX MATCH "([^=]*)(=.*)?" DUMMY ${TOP}) - SET(KEY ${CMAKE_MATCH_1}) - - string(REGEX MATCH "[^=]*=(.*)" DUMMY ${TOP}) - SET(VALUE ${CMAKE_MATCH_1}) - - # set the variable to the specified value - if(VALUE) - SET(${KEY} ${VALUE}) - else(VALUE) - SET(${KEY} ON) - endif(VALUE) - -endwhile(${ARGLIST} MATCHES ".+.*") - -#################################################################### -# Automatically set some user variables if they have not been defined manually -#################################################################### -cmake_minimum_required(VERSION 2.6 FATAL_ERROR) - -if(NOT EIGEN_SITE) - site_name(EIGEN_SITE) -endif(NOT EIGEN_SITE) - -if(NOT EIGEN_CMAKE_DIR) - SET(EIGEN_CMAKE_DIR "") -endif(NOT EIGEN_CMAKE_DIR) - -if(NOT EIGEN_BUILD_STRING) - - # let's try to find all information we need to make the build string ourself - - # OS - build_name(EIGEN_OS_VERSION) - - # arch - set(EIGEN_ARCH ${CMAKE_SYSTEM_PROCESSOR}) - if(WIN32) - set(EIGEN_ARCH $ENV{PROCESSOR_ARCHITECTURE}) - else(WIN32) - execute_process(COMMAND uname -m OUTPUT_VARIABLE EIGEN_ARCH OUTPUT_STRIP_TRAILING_WHITESPACE) - endif(WIN32) - - set(EIGEN_BUILD_STRING ${EIGEN_OS_VERSION}${EIGEN_ARCH}-${EIGEN_CXX}) - -endif(NOT EIGEN_BUILD_STRING) - -if(DEFINED EIGEN_EXPLICIT_VECTORIZATION) - set(EIGEN_BUILD_STRING ${EIGEN_BUILD_STRING}-${EIGEN_EXPLICIT_VECTORIZATION}) -endif(DEFINED EIGEN_EXPLICIT_VECTORIZATION) - -if(NOT EIGEN_WORK_DIR) - set(EIGEN_WORK_DIR ${CTEST_SCRIPT_DIRECTORY}) -endif(NOT EIGEN_WORK_DIR) - -if(NOT CTEST_SOURCE_DIRECTORY) - SET (CTEST_SOURCE_DIRECTORY "${EIGEN_WORK_DIR}/src") -endif(NOT CTEST_SOURCE_DIRECTORY) - -if(NOT CTEST_BINARY_DIRECTORY) - SET (CTEST_BINARY_DIRECTORY "${EIGEN_WORK_DIR}/nightly_${EIGEN_CXX}") -endif(NOT CTEST_BINARY_DIRECTORY) - -if(NOT EIGEN_MODE) - set(EIGEN_MODE Nightly) -endif(NOT EIGEN_MODE) - -## mandatory variables (the default should be ok in most cases): - -SET (CTEST_CVS_COMMAND "hg") -SET (CTEST_CVS_CHECKOUT "${CTEST_CVS_COMMAND} clone -r 2.0 http://bitbucket.org/eigen/eigen \"${CTEST_SOURCE_DIRECTORY}\"") - -# which ctest command to use for running the dashboard -SET (CTEST_COMMAND "${EIGEN_CMAKE_DIR}ctest -D ${EIGEN_MODE}") - -# what cmake command to use for configuring this dashboard -SET (CTEST_CMAKE_COMMAND "${EIGEN_CMAKE_DIR}cmake -DEIGEN_BUILD_TESTS=on ") - -#################################################################### -# The values in this section are optional you can either -# have them or leave them commented out -#################################################################### - -# this make sure we get consistent outputs -SET($ENV{LC_MESSAGES} "en_EN") - -# should ctest wipe the binary tree before running -SET(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY TRUE) -SET(CTEST_BACKUP_AND_RESTORE TRUE) - -# this is the initial cache to use for the binary tree, be careful to escape -# any quotes inside of this string if you use it -if(WIN32 AND NOT UNIX) - #message(SEND_ERROR "win32") - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -G \"NMake Makefiles\" -DCMAKE_MAKE_PROGRAM=nmake") - SET (CTEST_INITIAL_CACHE " - MAKECOMMAND:STRING=nmake -i - CMAKE_MAKE_PROGRAM:FILEPATH=nmake - CMAKE_GENERATOR:INTERNAL=NMake Makefiles - BUILDNAME:STRING=${EIGEN_BUILD_STRING} - SITE:STRING=${EIGEN_SITE} - ") -else(WIN32 AND NOT UNIX) - SET (CTEST_INITIAL_CACHE " - BUILDNAME:STRING=${EIGEN_BUILD_STRING} - SITE:STRING=${EIGEN_SITE} - ") -endif(WIN32 AND NOT UNIX) - -# set any extra environment variables to use during the execution of the script here: - -if(EIGEN_CXX) - set(CTEST_ENVIRONMENT "CXX=${EIGEN_CXX}") -endif(EIGEN_CXX) - -if(DEFINED EIGEN_EXPLICIT_VECTORIZATION) - if(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE2) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_SSE2=ON") - elseif(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE3) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_SSE2=ON -DEIGEN_TEST_SSE3=ON") - elseif(EIGEN_EXPLICIT_VECTORIZATION MATCHES Altivec) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_ALTIVEC=ON") - elseif(EIGEN_EXPLICIT_VECTORIZATION MATCHES novec) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_NO_EXPLICIT_VECTORIZATION=ON") - else(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE2) - message(FATAL_ERROR "Invalid value for EIGEN_EXPLICIT_VECTORIZATION (${EIGEN_EXPLICIT_VECTORIZATION}), must be: novec, SSE2, SSE3, Altivec") - endif(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE2) -endif(DEFINED EIGEN_EXPLICIT_VECTORIZATION) - -if(DEFINED EIGEN_CMAKE_ARGS) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} ${EIGEN_CMAKE_ARGS}") -endif(DEFINED EIGEN_CMAKE_ARGS) diff --git a/external/eigen3/test/eigen2support.cpp b/external/eigen3/test/eigen2support.cpp index 1fa49a8..ad1d980 100644 --- a/external/eigen3/test/eigen2support.cpp +++ b/external/eigen3/test/eigen2support.cpp @@ -8,7 +8,6 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #define EIGEN2_SUPPORT -#define EIGEN_NO_EIGEN2_DEPRECATED_WARNING #include "main.h" diff --git a/external/eigen3/test/eigensolver_complex.cpp b/external/eigen3/test/eigensolver_complex.cpp index c9d8c08..293b1b2 100644 --- a/external/eigen3/test/eigensolver_complex.cpp +++ b/external/eigen3/test/eigensolver_complex.cpp @@ -13,20 +13,59 @@ #include #include -/* Check that two column vectors are approximately equal upto permutations, - by checking that the k-th power sums are equal for k = 1, ..., vec1.rows() */ +template bool find_pivot(typename MatrixType::Scalar tol, MatrixType &diffs, Index col=0) +{ + bool match = diffs.diagonal().sum() <= tol; + if(match || col==diffs.cols()) + { + return match; + } + else + { + Index n = diffs.cols(); + std::vector > transpositions; + for(Index i=col; i tol) + break; + + best_index += col; + + diffs.row(col).swap(diffs.row(best_index)); + if(find_pivot(tol,diffs,col+1)) return true; + diffs.row(col).swap(diffs.row(best_index)); + + // move current pivot to the end + diffs.row(n-(i-col)-1).swap(diffs.row(best_index)); + transpositions.push_back(std::pair(n-(i-col)-1,best_index)); + } + // restore + for(Index k=transpositions.size()-1; k>=0; --k) + diffs.row(transpositions[k].first).swap(diffs.row(transpositions[k].second)); + } + return false; +} + +/* Check that two column vectors are approximately equal upto permutations. + * Initially, this method checked that the k-th power sums are equal for all k = 1, ..., vec1.rows(), + * however this strategy is numerically inacurate because of numerical cancellation issues. + */ template void verify_is_approx_upto_permutation(const VectorType& vec1, const VectorType& vec2) { - typedef typename NumTraits::Real RealScalar; + typedef typename VectorType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; VERIFY(vec1.cols() == 1); VERIFY(vec2.cols() == 1); VERIFY(vec1.rows() == vec2.rows()); - for (int k = 1; k <= vec1.rows(); ++k) - { - VERIFY_IS_APPROX(vec1.array().pow(RealScalar(k)).sum(), vec2.array().pow(RealScalar(k)).sum()); - } + + Index n = vec1.rows(); + RealScalar tol = test_precision()*test_precision()*numext::maxi(vec1.squaredNorm(),vec2.squaredNorm()); + Matrix diffs = (vec1.rowwise().replicate(n) - vec2.rowwise().replicate(n).transpose()).cwiseAbs2(); + + VERIFY( find_pivot(tol, diffs) ); } @@ -79,13 +118,28 @@ template void eigensolver(const MatrixType& m) MatrixType id = MatrixType::Identity(rows, cols); VERIFY_IS_APPROX(id.operatorNorm(), RealScalar(1)); - if (rows > 1) + if (rows > 1 && rows < 20) { // Test matrix with NaN a(0,0) = std::numeric_limits::quiet_NaN(); ComplexEigenSolver eiNaN(a); VERIFY_IS_EQUAL(eiNaN.info(), NoConvergence); } + + // regression test for bug 1098 + { + ComplexEigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } + + // regression test for bug 478 + { + a.setZero(); + ComplexEigenSolver ei3(a); + VERIFY_IS_EQUAL(ei3.info(), Success); + VERIFY_IS_MUCH_SMALLER_THAN(ei3.eigenvalues().norm(),RealScalar(1)); + VERIFY((ei3.eigenvectors().transpose()*ei3.eigenvectors().transpose()).eval().isIdentity()); + } } template void eigensolver_verify_assert(const MatrixType& m) @@ -108,6 +162,7 @@ void test_eigensolver_complex() CALL_SUBTEST_2( eigensolver(MatrixXcd(s,s)) ); CALL_SUBTEST_3( eigensolver(Matrix, 1, 1>()) ); CALL_SUBTEST_4( eigensolver(Matrix3f()) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } CALL_SUBTEST_1( eigensolver_verify_assert(Matrix4cf()) ); s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); diff --git a/external/eigen3/test/eigensolver_generalized_real.cpp b/external/eigen3/test/eigensolver_generalized_real.cpp index 566a4bd..9c0838b 100644 --- a/external/eigen3/test/eigensolver_generalized_real.cpp +++ b/external/eigen3/test/eigensolver_generalized_real.cpp @@ -1,15 +1,17 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2012 Gael Guennebaud +// Copyright (C) 2012-2016 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define EIGEN_RUNTIME_NO_MALLOC #include "main.h" #include #include +#include template void generalized_eigensolver_real(const MatrixType& m) { @@ -21,6 +23,7 @@ template void generalized_eigensolver_real(const MatrixType Index cols = m.cols(); typedef typename MatrixType::Scalar Scalar; + typedef std::complex ComplexScalar; typedef Matrix VectorType; MatrixType a = MatrixType::Random(rows,cols); @@ -31,14 +34,49 @@ template void generalized_eigensolver_real(const MatrixType MatrixType spdB = b.adjoint() * b + b1.adjoint() * b1; // lets compare to GeneralizedSelfAdjointEigenSolver - GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); - GeneralizedEigenSolver eig(spdA, spdB); + { + GeneralizedSelfAdjointEigenSolver symmEig(spdA, spdB); + GeneralizedEigenSolver eig(spdA, spdB); - VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); + VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0); - VectorType realEigenvalues = eig.eigenvalues().real(); - std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); - VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + VectorType realEigenvalues = eig.eigenvalues().real(); + std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size()); + VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues()); + + // check eigenvectors + typename GeneralizedEigenSolver::EigenvectorsType D = eig.eigenvalues().asDiagonal(); + typename GeneralizedEigenSolver::EigenvectorsType V = eig.eigenvectors(); + VERIFY_IS_APPROX(spdA*V, spdB*V*D); + } + + // non symmetric case: + { + GeneralizedEigenSolver eig(rows); + // TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition + //Eigen::internal::set_is_malloc_allowed(false); + eig.compute(a,b); + //Eigen::internal::set_is_malloc_allowed(true); + for(Index k=0; k tmp = (eig.betas()(k)*a).template cast() - eig.alphas()(k)*b; + if(tmp.size()>1 && tmp.norm()>(std::numeric_limits::min)()) + tmp /= tmp.norm(); + VERIFY_IS_MUCH_SMALLER_THAN( std::abs(tmp.determinant()), Scalar(1) ); + } + // check eigenvectors + typename GeneralizedEigenSolver::EigenvectorsType D = eig.eigenvalues().asDiagonal(); + typename GeneralizedEigenSolver::EigenvectorsType V = eig.eigenvectors(); + VERIFY_IS_APPROX(a*V, b*V*D); + } + + // regression test for bug 1098 + { + GeneralizedSelfAdjointEigenSolver eig1(a.adjoint() * a,b.adjoint() * b); + eig1.compute(a.adjoint() * a,b.adjoint() * b); + GeneralizedEigenSolver eig2(a.adjoint() * a,b.adjoint() * b); + eig2.compute(a.adjoint() * a,b.adjoint() * b); + } } void test_eigensolver_generalized_real() @@ -49,7 +87,7 @@ void test_eigensolver_generalized_real() s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(s,s)) ); - // some trivial but implementation-wise tricky cases + // some trivial but implementation-wise special cases CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(1,1)) ); CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) ); CALL_SUBTEST_3( generalized_eigensolver_real(Matrix()) ); diff --git a/external/eigen3/test/eigensolver_generic.cpp b/external/eigen3/test/eigensolver_generic.cpp index 005af81..d0e644d 100644 --- a/external/eigen3/test/eigensolver_generic.cpp +++ b/external/eigen3/test/eigensolver_generic.cpp @@ -63,13 +63,28 @@ template void eigensolver(const MatrixType& m) MatrixType id = MatrixType::Identity(rows, cols); VERIFY_IS_APPROX(id.operatorNorm(), RealScalar(1)); - if (rows > 2) + if (rows > 2 && rows < 20) { // Test matrix with NaN a(0,0) = std::numeric_limits::quiet_NaN(); EigenSolver eiNaN(a); VERIFY_IS_EQUAL(eiNaN.info(), NoConvergence); } + + // regression test for bug 1098 + { + EigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } + + // regression test for bug 478 + { + a.setZero(); + EigenSolver ei3(a); + VERIFY_IS_EQUAL(ei3.info(), Success); + VERIFY_IS_MUCH_SMALLER_THAN(ei3.eigenvalues().norm(),RealScalar(1)); + VERIFY((ei3.eigenvectors().transpose()*ei3.eigenvectors().transpose()).eval().isIdentity()); + } } template void eigensolver_verify_assert(const MatrixType& m) @@ -93,6 +108,7 @@ void test_eigensolver_generic() CALL_SUBTEST_1( eigensolver(Matrix4f()) ); s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_2( eigensolver(MatrixXd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) // some trivial but implementation-wise tricky cases CALL_SUBTEST_2( eigensolver(MatrixXd(1,1)) ); @@ -114,12 +130,37 @@ void test_eigensolver_generic() CALL_SUBTEST_2( { MatrixXd A(1,1); - A(0,0) = std::sqrt(-1.); + A(0,0) = std::sqrt(-1.); // is Not-a-Number Eigen::EigenSolver solver(A); - MatrixXd V(1, 1); - V(0,0) = solver.eigenvectors()(0,0).real(); + VERIFY_IS_EQUAL(solver.info(), NumericalIssue); } ); +#ifdef EIGEN_TEST_PART_2 + { + // regression test for bug 793 + MatrixXd a(3,3); + a << 0, 0, 1, + 1, 1, 1, + 1, 1e+200, 1; + Eigen::EigenSolver eig(a); + double scale = 1e-200; // scale to avoid overflow during the comparisons + VERIFY_IS_APPROX(a * eig.pseudoEigenvectors()*scale, eig.pseudoEigenvectors() * eig.pseudoEigenvalueMatrix()*scale); + VERIFY_IS_APPROX(a * eig.eigenvectors()*scale, eig.eigenvectors() * eig.eigenvalues().asDiagonal()*scale); + } + { + // check a case where all eigenvalues are null. + MatrixXd a(2,2); + a << 1, 1, + -1, -1; + Eigen::EigenSolver eig(a); + VERIFY_IS_APPROX(eig.pseudoEigenvectors().squaredNorm(), 2.); + VERIFY_IS_APPROX((a * eig.pseudoEigenvectors()).norm()+1., 1.); + VERIFY_IS_APPROX((eig.pseudoEigenvectors() * eig.pseudoEigenvalueMatrix()).norm()+1., 1.); + VERIFY_IS_APPROX((a * eig.eigenvectors()).norm()+1., 1.); + VERIFY_IS_APPROX((eig.eigenvectors() * eig.eigenvalues().asDiagonal()).norm()+1., 1.); + } +#endif + TEST_SET_BUT_UNUSED_VARIABLE(s) } diff --git a/external/eigen3/test/eigensolver_selfadjoint.cpp b/external/eigen3/test/eigensolver_selfadjoint.cpp index 38689cf..39ad413 100644 --- a/external/eigen3/test/eigensolver_selfadjoint.cpp +++ b/external/eigen3/test/eigensolver_selfadjoint.cpp @@ -9,8 +9,62 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #include "main.h" +#include "svd_fill.h" #include #include +#include + + +template void selfadjointeigensolver_essential_check(const MatrixType& m) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + RealScalar eival_eps = numext::mini(test_precision(), NumTraits::dummy_precision()*20000); + + SelfAdjointEigenSolver eiSymm(m); + VERIFY_IS_EQUAL(eiSymm.info(), Success); + + RealScalar scaling = m.cwiseAbs().maxCoeff(); + + if(scaling<(std::numeric_limits::min)()) + { + VERIFY(eiSymm.eigenvalues().cwiseAbs().maxCoeff() <= (std::numeric_limits::min)()); + } + else + { + VERIFY_IS_APPROX((m.template selfadjointView() * eiSymm.eigenvectors())/scaling, + (eiSymm.eigenvectors() * eiSymm.eigenvalues().asDiagonal())/scaling); + } + VERIFY_IS_APPROX(m.template selfadjointView().eigenvalues(), eiSymm.eigenvalues()); + VERIFY_IS_UNITARY(eiSymm.eigenvectors()); + + if(m.cols()<=4) + { + SelfAdjointEigenSolver eiDirect; + eiDirect.computeDirect(m); + VERIFY_IS_EQUAL(eiDirect.info(), Success); + if(! eiSymm.eigenvalues().isApprox(eiDirect.eigenvalues(), eival_eps) ) + { + std::cerr << "reference eigenvalues: " << eiSymm.eigenvalues().transpose() << "\n" + << "obtained eigenvalues: " << eiDirect.eigenvalues().transpose() << "\n" + << "diff: " << (eiSymm.eigenvalues()-eiDirect.eigenvalues()).transpose() << "\n" + << "error (eps): " << (eiSymm.eigenvalues()-eiDirect.eigenvalues()).norm() / eiSymm.eigenvalues().norm() << " (" << eival_eps << ")\n"; + } + if(scaling<(std::numeric_limits::min)()) + { + VERIFY(eiDirect.eigenvalues().cwiseAbs().maxCoeff() <= (std::numeric_limits::min)()); + } + else + { + VERIFY_IS_APPROX(eiSymm.eigenvalues()/scaling, eiDirect.eigenvalues()/scaling); + VERIFY_IS_APPROX((m.template selfadjointView() * eiDirect.eigenvectors())/scaling, + (eiDirect.eigenvectors() * eiDirect.eigenvalues().asDiagonal())/scaling); + VERIFY_IS_APPROX(m.template selfadjointView().eigenvalues()/scaling, eiDirect.eigenvalues()/scaling); + } + + VERIFY_IS_UNITARY(eiDirect.eigenvectors()); + } +} template void selfadjointeigensolver(const MatrixType& m) { @@ -31,17 +85,8 @@ template void selfadjointeigensolver(const MatrixType& m) MatrixType symmA = a.adjoint() * a + a1.adjoint() * a1; MatrixType symmC = symmA; - // randomly nullify some rows/columns - { - Index count = 1;//internal::random(-cols,cols); - for(Index k=0; k(0,cols-1); - symmA.row(i).setZero(); - symmA.col(i).setZero(); - } - } - + svd_fill_random(symmA,Symmetric); + symmA.template triangularView().setZero(); symmC.template triangularView().setZero(); @@ -49,23 +94,13 @@ template void selfadjointeigensolver(const MatrixType& m) MatrixType b1 = MatrixType::Random(rows,cols); MatrixType symmB = b.adjoint() * b + b1.adjoint() * b1; symmB.template triangularView().setZero(); + + CALL_SUBTEST( selfadjointeigensolver_essential_check(symmA) ); SelfAdjointEigenSolver eiSymm(symmA); - SelfAdjointEigenSolver eiDirect; - eiDirect.computeDirect(symmA); // generalized eigen pb GeneralizedSelfAdjointEigenSolver eiSymmGen(symmC, symmB); - VERIFY_IS_EQUAL(eiSymm.info(), Success); - VERIFY((symmA.template selfadjointView() * eiSymm.eigenvectors()).isApprox( - eiSymm.eigenvectors() * eiSymm.eigenvalues().asDiagonal(), largerEps)); - VERIFY_IS_APPROX(symmA.template selfadjointView().eigenvalues(), eiSymm.eigenvalues()); - - VERIFY_IS_EQUAL(eiDirect.info(), Success); - VERIFY((symmA.template selfadjointView() * eiDirect.eigenvectors()).isApprox( - eiDirect.eigenvectors() * eiDirect.eigenvalues().asDiagonal(), largerEps)); - VERIFY_IS_APPROX(symmA.template selfadjointView().eigenvalues(), eiDirect.eigenvalues()); - SelfAdjointEigenSolver eiSymmNoEivecs(symmA, false); VERIFY_IS_EQUAL(eiSymmNoEivecs.info(), Success); VERIFY_IS_APPROX(eiSymm.eigenvalues(), eiSymmNoEivecs.eigenvalues()); @@ -111,37 +146,111 @@ template void selfadjointeigensolver(const MatrixType& m) // test Tridiagonalization's methods Tridiagonalization tridiag(symmC); - // FIXME tridiag.matrixQ().adjoint() does not work + VERIFY_IS_APPROX(tridiag.diagonal(), tridiag.matrixT().diagonal()); + VERIFY_IS_APPROX(tridiag.subDiagonal(), tridiag.matrixT().template diagonal<-1>()); + Matrix T = tridiag.matrixT(); + if(rows>1 && cols>1) { + // FIXME check that upper and lower part are 0: + //VERIFY(T.topRightCorner(rows-2, cols-2).template triangularView().isZero()); + } + VERIFY_IS_APPROX(tridiag.diagonal(), T.diagonal()); + VERIFY_IS_APPROX(tridiag.subDiagonal(), T.template diagonal<1>()); VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView()), tridiag.matrixQ() * tridiag.matrixT().eval() * MatrixType(tridiag.matrixQ()).adjoint()); + VERIFY_IS_APPROX(MatrixType(symmC.template selfadjointView()), tridiag.matrixQ() * tridiag.matrixT() * tridiag.matrixQ().adjoint()); - if (rows > 1) + // Test computation of eigenvalues from tridiagonal matrix + if(rows > 1) + { + SelfAdjointEigenSolver eiSymmTridiag; + eiSymmTridiag.computeFromTridiagonal(tridiag.matrixT().diagonal(), tridiag.matrixT().diagonal(-1), ComputeEigenvectors); + VERIFY_IS_APPROX(eiSymm.eigenvalues(), eiSymmTridiag.eigenvalues()); + VERIFY_IS_APPROX(tridiag.matrixT(), eiSymmTridiag.eigenvectors().real() * eiSymmTridiag.eigenvalues().asDiagonal() * eiSymmTridiag.eigenvectors().real().transpose()); + } + + if (rows > 1 && rows < 20) { // Test matrix with NaN symmC(0,0) = std::numeric_limits::quiet_NaN(); SelfAdjointEigenSolver eiSymmNaN(symmC); VERIFY_IS_EQUAL(eiSymmNaN.info(), NoConvergence); } + + // regression test for bug 1098 + { + SelfAdjointEigenSolver eig(a.adjoint() * a); + eig.compute(a.adjoint() * a); + } + + // regression test for bug 478 + { + a.setZero(); + SelfAdjointEigenSolver ei3(a); + VERIFY_IS_EQUAL(ei3.info(), Success); + VERIFY_IS_MUCH_SMALLER_THAN(ei3.eigenvalues().norm(),RealScalar(1)); + VERIFY((ei3.eigenvectors().transpose()*ei3.eigenvectors().transpose()).eval().isIdentity()); + } +} + +template +void bug_854() +{ + Matrix3d m; + m << 850.961, 51.966, 0, + 51.966, 254.841, 0, + 0, 0, 0; + selfadjointeigensolver_essential_check(m); +} + +template +void bug_1014() +{ + Matrix3d m; + m << 0.11111111111111114658, 0, 0, + 0, 0.11111111111111109107, 0, + 0, 0, 0.11111111111111107719; + selfadjointeigensolver_essential_check(m); +} + +template +void bug_1225() +{ + Matrix3d m1, m2; + m1.setRandom(); + m1 = m1*m1.transpose(); + m2 = m1.triangularView(); + SelfAdjointEigenSolver eig1(m1); + SelfAdjointEigenSolver eig2(m2.selfadjointView()); + VERIFY_IS_APPROX(eig1.eigenvalues(), eig2.eigenvalues()); +} + +template +void bug_1204() +{ + SparseMatrix A(2,2); + A.setIdentity(); + SelfAdjointEigenSolver > eig(A); } void test_eigensolver_selfadjoint() { int s = 0; for(int i = 0; i < g_repeat; i++) { + // trivial test for 1x1 matrices: + CALL_SUBTEST_1( selfadjointeigensolver(Matrix())); + CALL_SUBTEST_1( selfadjointeigensolver(Matrix())); // very important to test 3x3 and 2x2 matrices since we provide special paths for them - CALL_SUBTEST_1( selfadjointeigensolver(Matrix2f()) ); - CALL_SUBTEST_1( selfadjointeigensolver(Matrix2d()) ); - CALL_SUBTEST_1( selfadjointeigensolver(Matrix3f()) ); - CALL_SUBTEST_1( selfadjointeigensolver(Matrix3d()) ); + CALL_SUBTEST_12( selfadjointeigensolver(Matrix2f()) ); + CALL_SUBTEST_12( selfadjointeigensolver(Matrix2d()) ); + CALL_SUBTEST_13( selfadjointeigensolver(Matrix3f()) ); + CALL_SUBTEST_13( selfadjointeigensolver(Matrix3d()) ); CALL_SUBTEST_2( selfadjointeigensolver(Matrix4d()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_3( selfadjointeigensolver(MatrixXf(s,s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(s,s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_5( selfadjointeigensolver(MatrixXcd(s,s)) ); - - s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); CALL_SUBTEST_9( selfadjointeigensolver(Matrix,Dynamic,Dynamic,RowMajor>(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) // some trivial but implementation-wise tricky cases CALL_SUBTEST_4( selfadjointeigensolver(MatrixXd(1,1)) ); @@ -149,6 +258,11 @@ void test_eigensolver_selfadjoint() CALL_SUBTEST_6( selfadjointeigensolver(Matrix()) ); CALL_SUBTEST_7( selfadjointeigensolver(Matrix()) ); } + + CALL_SUBTEST_13( bug_854<0>() ); + CALL_SUBTEST_13( bug_1014<0>() ); + CALL_SUBTEST_13( bug_1204<0>() ); + CALL_SUBTEST_13( bug_1225<0>() ); // Test problem size constructors s = internal::random(1,EIGEN_TEST_MAX_SIZE/4); diff --git a/external/eigen3/test/evaluator_common.h b/external/eigen3/test/evaluator_common.h new file mode 100644 index 0000000..e69de29 diff --git a/external/eigen3/test/evaluators.cpp b/external/eigen3/test/evaluators.cpp new file mode 100644 index 0000000..aed5a05 --- /dev/null +++ b/external/eigen3/test/evaluators.cpp @@ -0,0 +1,499 @@ + +#include "main.h" + +namespace Eigen { + + template + const Product + prod(const Lhs& lhs, const Rhs& rhs) + { + return Product(lhs,rhs); + } + + template + const Product + lazyprod(const Lhs& lhs, const Rhs& rhs) + { + return Product(lhs,rhs); + } + + template + EIGEN_STRONG_INLINE + DstXprType& copy_using_evaluator(const EigenBase &dst, const SrcXprType &src) + { + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + return dst.const_cast_derived(); + } + + template class StorageBase, typename SrcXprType> + EIGEN_STRONG_INLINE + const DstXprType& copy_using_evaluator(const NoAlias& dst, const SrcXprType &src) + { + call_assignment(dst, src.derived(), internal::assign_op()); + return dst.expression(); + } + + template + EIGEN_STRONG_INLINE + DstXprType& copy_using_evaluator(const PlainObjectBase &dst, const SrcXprType &src) + { + #ifdef EIGEN_NO_AUTOMATIC_RESIZING + eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size()) + : (dst.rows() == src.rows() && dst.cols() == src.cols()))) + && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + #else + dst.const_cast_derived().resizeLike(src.derived()); + #endif + + call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op()); + return dst.const_cast_derived(); + } + + template + void add_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(const_cast(dst), src.derived(), internal::add_assign_op()); + } + + template + void subtract_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(const_cast(dst), src.derived(), internal::sub_assign_op()); + } + + template + void multiply_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op()); + } + + template + void divide_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op()); + } + + template + void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) + { + typedef typename DstXprType::Scalar Scalar; + call_assignment(dst.const_cast_derived(), src.const_cast_derived(), internal::swap_assign_op()); + } + + namespace internal { + template class StorageBase, typename Src, typename Func> + EIGEN_DEVICE_FUNC void call_assignment(const NoAlias& dst, const Src& src, const Func& func) + { + call_assignment_no_alias(dst.expression(), src, func); + } + } + +} + +template long get_cost(const XprType& ) { return Eigen::internal::evaluator::CoeffReadCost; } + +using namespace std; + +#define VERIFY_IS_APPROX_EVALUATOR(DEST,EXPR) VERIFY_IS_APPROX(copy_using_evaluator(DEST,(EXPR)), (EXPR).eval()); +#define VERIFY_IS_APPROX_EVALUATOR2(DEST,EXPR,REF) VERIFY_IS_APPROX(copy_using_evaluator(DEST,(EXPR)), (REF).eval()); + +void test_evaluators() +{ + // Testing Matrix evaluator and Transpose + Vector2d v = Vector2d::Random(); + const Vector2d v_const(v); + Vector2d v2; + RowVector2d w; + + VERIFY_IS_APPROX_EVALUATOR(v2, v); + VERIFY_IS_APPROX_EVALUATOR(v2, v_const); + + // Testing Transpose + VERIFY_IS_APPROX_EVALUATOR(w, v.transpose()); // Transpose as rvalue + VERIFY_IS_APPROX_EVALUATOR(w, v_const.transpose()); + + copy_using_evaluator(w.transpose(), v); // Transpose as lvalue + VERIFY_IS_APPROX(w,v.transpose().eval()); + + copy_using_evaluator(w.transpose(), v_const); + VERIFY_IS_APPROX(w,v_const.transpose().eval()); + + // Testing Array evaluator + { + ArrayXXf a(2,3); + ArrayXXf b(3,2); + a << 1,2,3, 4,5,6; + const ArrayXXf a_const(a); + + VERIFY_IS_APPROX_EVALUATOR(b, a.transpose()); + + VERIFY_IS_APPROX_EVALUATOR(b, a_const.transpose()); + + // Testing CwiseNullaryOp evaluator + copy_using_evaluator(w, RowVector2d::Random()); + VERIFY((w.array() >= -1).all() && (w.array() <= 1).all()); // not easy to test ... + + VERIFY_IS_APPROX_EVALUATOR(w, RowVector2d::Zero()); + + VERIFY_IS_APPROX_EVALUATOR(w, RowVector2d::Constant(3)); + + // mix CwiseNullaryOp and transpose + VERIFY_IS_APPROX_EVALUATOR(w, Vector2d::Zero().transpose()); + } + + { + // test product expressions + int s = internal::random(1,100); + MatrixXf a(s,s), b(s,s), c(s,s), d(s,s); + a.setRandom(); + b.setRandom(); + c.setRandom(); + d.setRandom(); + VERIFY_IS_APPROX_EVALUATOR(d, (a + b)); + VERIFY_IS_APPROX_EVALUATOR(d, (a + b).transpose()); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b), a*b); + VERIFY_IS_APPROX_EVALUATOR2(d.noalias(), prod(a,b), a*b); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + c, a*b + c); + VERIFY_IS_APPROX_EVALUATOR2(d, s * prod(a,b), s * a*b); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b).transpose(), (a*b).transpose()); + VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + prod(b,c), a*b + b*c); + + // check that prod works even with aliasing present + c = a*a; + copy_using_evaluator(a, prod(a,a)); + VERIFY_IS_APPROX(a,c); + + // check compound assignment of products + d = c; + add_assign_using_evaluator(c.noalias(), prod(a,b)); + d.noalias() += a*b; + VERIFY_IS_APPROX(c, d); + + d = c; + subtract_assign_using_evaluator(c.noalias(), prod(a,b)); + d.noalias() -= a*b; + VERIFY_IS_APPROX(c, d); + } + + { + // test product with all possible sizes + int s = internal::random(1,100); + Matrix m11, res11; m11.setRandom(1,1); + Matrix m14, res14; m14.setRandom(1,4); + Matrix m1X, res1X; m1X.setRandom(1,s); + Matrix m41, res41; m41.setRandom(4,1); + Matrix m44, res44; m44.setRandom(4,4); + Matrix m4X, res4X; m4X.setRandom(4,s); + Matrix mX1, resX1; mX1.setRandom(s,1); + Matrix mX4, resX4; mX4.setRandom(s,4); + Matrix mXX, resXX; mXX.setRandom(s,s); + + VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m11,m11), m11*m11); + VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m14,m41), m14*m41); + VERIFY_IS_APPROX_EVALUATOR2(res11, prod(m1X,mX1), m1X*mX1); + VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m11,m14), m11*m14); + VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m14,m44), m14*m44); + VERIFY_IS_APPROX_EVALUATOR2(res14, prod(m1X,mX4), m1X*mX4); + VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m11,m1X), m11*m1X); + VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m14,m4X), m14*m4X); + VERIFY_IS_APPROX_EVALUATOR2(res1X, prod(m1X,mXX), m1X*mXX); + VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m41,m11), m41*m11); + VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m44,m41), m44*m41); + VERIFY_IS_APPROX_EVALUATOR2(res41, prod(m4X,mX1), m4X*mX1); + VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m41,m14), m41*m14); + VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m44,m44), m44*m44); + VERIFY_IS_APPROX_EVALUATOR2(res44, prod(m4X,mX4), m4X*mX4); + VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m41,m1X), m41*m1X); + VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m44,m4X), m44*m4X); + VERIFY_IS_APPROX_EVALUATOR2(res4X, prod(m4X,mXX), m4X*mXX); + VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mX1,m11), mX1*m11); + VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mX4,m41), mX4*m41); + VERIFY_IS_APPROX_EVALUATOR2(resX1, prod(mXX,mX1), mXX*mX1); + VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mX1,m14), mX1*m14); + VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mX4,m44), mX4*m44); + VERIFY_IS_APPROX_EVALUATOR2(resX4, prod(mXX,mX4), mXX*mX4); + VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mX1,m1X), mX1*m1X); + VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mX4,m4X), mX4*m4X); + VERIFY_IS_APPROX_EVALUATOR2(resXX, prod(mXX,mXX), mXX*mXX); + } + + { + ArrayXXf a(2,3); + ArrayXXf b(3,2); + a << 1,2,3, 4,5,6; + const ArrayXXf a_const(a); + + // this does not work because Random is eval-before-nested: + // copy_using_evaluator(w, Vector2d::Random().transpose()); + + // test CwiseUnaryOp + VERIFY_IS_APPROX_EVALUATOR(v2, 3 * v); + VERIFY_IS_APPROX_EVALUATOR(w, (3 * v).transpose()); + VERIFY_IS_APPROX_EVALUATOR(b, (a + 3).transpose()); + VERIFY_IS_APPROX_EVALUATOR(b, (2 * a_const + 3).transpose()); + + // test CwiseBinaryOp + VERIFY_IS_APPROX_EVALUATOR(v2, v + Vector2d::Ones()); + VERIFY_IS_APPROX_EVALUATOR(w, (v + Vector2d::Ones()).transpose().cwiseProduct(RowVector2d::Constant(3))); + + // dynamic matrices and arrays + MatrixXd mat1(6,6), mat2(6,6); + VERIFY_IS_APPROX_EVALUATOR(mat1, MatrixXd::Identity(6,6)); + VERIFY_IS_APPROX_EVALUATOR(mat2, mat1); + copy_using_evaluator(mat2.transpose(), mat1); + VERIFY_IS_APPROX(mat2.transpose(), mat1); + + ArrayXXd arr1(6,6), arr2(6,6); + VERIFY_IS_APPROX_EVALUATOR(arr1, ArrayXXd::Constant(6,6, 3.0)); + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1); + + // test automatic resizing + mat2.resize(3,3); + VERIFY_IS_APPROX_EVALUATOR(mat2, mat1); + arr2.resize(9,9); + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1); + + // test direct traversal + Matrix3f m3; + Array33f a3; + VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity()); // matrix, nullary + // TODO: find a way to test direct traversal with array + VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Identity().transpose()); // transpose + VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Identity()); // unary + VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity() + Matrix3f::Zero()); // binary + VERIFY_IS_APPROX_EVALUATOR(m3.block(0,0,2,2), Matrix3f::Identity().block(1,1,2,2)); // block + + // test linear traversal + VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero()); // matrix, nullary + VERIFY_IS_APPROX_EVALUATOR(a3, Array33f::Zero()); // array + VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Zero().transpose()); // transpose + VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Zero()); // unary + VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero() + m3); // binary + + // test inner vectorization + Matrix4f m4, m4src = Matrix4f::Random(); + Array44f a4, a4src = Matrix4f::Random(); + VERIFY_IS_APPROX_EVALUATOR(m4, m4src); // matrix + VERIFY_IS_APPROX_EVALUATOR(a4, a4src); // array + VERIFY_IS_APPROX_EVALUATOR(m4.transpose(), m4src.transpose()); // transpose + // TODO: find out why Matrix4f::Zero() does not allow inner vectorization + VERIFY_IS_APPROX_EVALUATOR(m4, 2 * m4src); // unary + VERIFY_IS_APPROX_EVALUATOR(m4, m4src + m4src); // binary + + // test linear vectorization + MatrixXf mX(6,6), mXsrc = MatrixXf::Random(6,6); + ArrayXXf aX(6,6), aXsrc = ArrayXXf::Random(6,6); + VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc); // matrix + VERIFY_IS_APPROX_EVALUATOR(aX, aXsrc); // array + VERIFY_IS_APPROX_EVALUATOR(mX.transpose(), mXsrc.transpose()); // transpose + VERIFY_IS_APPROX_EVALUATOR(mX, MatrixXf::Zero(6,6)); // nullary + VERIFY_IS_APPROX_EVALUATOR(mX, 2 * mXsrc); // unary + VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc + mXsrc); // binary + + // test blocks and slice vectorization + VERIFY_IS_APPROX_EVALUATOR(m4, (mXsrc.block<4,4>(1,0))); + VERIFY_IS_APPROX_EVALUATOR(aX, ArrayXXf::Constant(10, 10, 3.0).block(2, 3, 6, 6)); + + Matrix4f m4ref = m4; + copy_using_evaluator(m4.block(1, 1, 2, 3), m3.bottomRows(2)); + m4ref.block(1, 1, 2, 3) = m3.bottomRows(2); + VERIFY_IS_APPROX(m4, m4ref); + + mX.setIdentity(20,20); + MatrixXf mXref = MatrixXf::Identity(20,20); + mXsrc = MatrixXf::Random(9,12); + copy_using_evaluator(mX.block(4, 4, 9, 12), mXsrc); + mXref.block(4, 4, 9, 12) = mXsrc; + VERIFY_IS_APPROX(mX, mXref); + + // test Map + const float raw[3] = {1,2,3}; + float buffer[3] = {0,0,0}; + Vector3f v3; + Array3f a3f; + VERIFY_IS_APPROX_EVALUATOR(v3, Map(raw)); + VERIFY_IS_APPROX_EVALUATOR(a3f, Map(raw)); + Vector3f::Map(buffer) = 2*v3; + VERIFY(buffer[0] == 2); + VERIFY(buffer[1] == 4); + VERIFY(buffer[2] == 6); + + // test CwiseUnaryView + mat1.setRandom(); + mat2.setIdentity(); + MatrixXcd matXcd(6,6), matXcd_ref(6,6); + copy_using_evaluator(matXcd.real(), mat1); + copy_using_evaluator(matXcd.imag(), mat2); + matXcd_ref.real() = mat1; + matXcd_ref.imag() = mat2; + VERIFY_IS_APPROX(matXcd, matXcd_ref); + + // test Select + VERIFY_IS_APPROX_EVALUATOR(aX, (aXsrc > 0).select(aXsrc, -aXsrc)); + + // test Replicate + mXsrc = MatrixXf::Random(6, 6); + VectorXf vX = VectorXf::Random(6); + mX.resize(6, 6); + VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc.colwise() + vX); + matXcd.resize(12, 12); + VERIFY_IS_APPROX_EVALUATOR(matXcd, matXcd_ref.replicate(2,2)); + VERIFY_IS_APPROX_EVALUATOR(matXcd, (matXcd_ref.replicate<2,2>())); + + // test partial reductions + VectorXd vec1(6); + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.rowwise().sum()); + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.colwise().sum().transpose()); + + // test MatrixWrapper and ArrayWrapper + mat1.setRandom(6,6); + arr1.setRandom(6,6); + VERIFY_IS_APPROX_EVALUATOR(mat2, arr1.matrix()); + VERIFY_IS_APPROX_EVALUATOR(arr2, mat1.array()); + VERIFY_IS_APPROX_EVALUATOR(mat2, (arr1 + 2).matrix()); + VERIFY_IS_APPROX_EVALUATOR(arr2, mat1.array() + 2); + mat2.array() = arr1 * arr1; + VERIFY_IS_APPROX(mat2, (arr1 * arr1).matrix()); + arr2.matrix() = MatrixXd::Identity(6,6); + VERIFY_IS_APPROX(arr2, MatrixXd::Identity(6,6).array()); + + // test Reverse + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1.reverse()); + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1.colwise().reverse()); + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1.rowwise().reverse()); + arr2.reverse() = arr1; + VERIFY_IS_APPROX(arr2, arr1.reverse()); + mat2.array() = mat1.array().reverse(); + VERIFY_IS_APPROX(mat2.array(), mat1.array().reverse()); + + // test Diagonal + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.diagonal()); + vec1.resize(5); + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.diagonal(1)); + VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.diagonal<-1>()); + vec1.setRandom(); + + mat2 = mat1; + copy_using_evaluator(mat1.diagonal(1), vec1); + mat2.diagonal(1) = vec1; + VERIFY_IS_APPROX(mat1, mat2); + + copy_using_evaluator(mat1.diagonal<-1>(), mat1.diagonal(1)); + mat2.diagonal<-1>() = mat2.diagonal(1); + VERIFY_IS_APPROX(mat1, mat2); + } + + { + // test swapping + MatrixXd mat1, mat2, mat1ref, mat2ref; + mat1ref = mat1 = MatrixXd::Random(6, 6); + mat2ref = mat2 = 2 * mat1 + MatrixXd::Identity(6, 6); + swap_using_evaluator(mat1, mat2); + mat1ref.swap(mat2ref); + VERIFY_IS_APPROX(mat1, mat1ref); + VERIFY_IS_APPROX(mat2, mat2ref); + + swap_using_evaluator(mat1.block(0, 0, 3, 3), mat2.block(3, 3, 3, 3)); + mat1ref.block(0, 0, 3, 3).swap(mat2ref.block(3, 3, 3, 3)); + VERIFY_IS_APPROX(mat1, mat1ref); + VERIFY_IS_APPROX(mat2, mat2ref); + + swap_using_evaluator(mat1.row(2), mat2.col(3).transpose()); + mat1.row(2).swap(mat2.col(3).transpose()); + VERIFY_IS_APPROX(mat1, mat1ref); + VERIFY_IS_APPROX(mat2, mat2ref); + } + + { + // test compound assignment + const Matrix4d mat_const = Matrix4d::Random(); + Matrix4d mat, mat_ref; + mat = mat_ref = Matrix4d::Identity(); + add_assign_using_evaluator(mat, mat_const); + mat_ref += mat_const; + VERIFY_IS_APPROX(mat, mat_ref); + + subtract_assign_using_evaluator(mat.row(1), 2*mat.row(2)); + mat_ref.row(1) -= 2*mat_ref.row(2); + VERIFY_IS_APPROX(mat, mat_ref); + + const ArrayXXf arr_const = ArrayXXf::Random(5,3); + ArrayXXf arr, arr_ref; + arr = arr_ref = ArrayXXf::Constant(5, 3, 0.5); + multiply_assign_using_evaluator(arr, arr_const); + arr_ref *= arr_const; + VERIFY_IS_APPROX(arr, arr_ref); + + divide_assign_using_evaluator(arr.row(1), arr.row(2) + 1); + arr_ref.row(1) /= (arr_ref.row(2) + 1); + VERIFY_IS_APPROX(arr, arr_ref); + } + + { + // test triangular shapes + MatrixXd A = MatrixXd::Random(6,6), B(6,6), C(6,6), D(6,6); + A.setRandom();B.setRandom(); + VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView(), MatrixXd(A.triangularView())); + + A.setRandom();B.setRandom(); + VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView(), MatrixXd(A.triangularView())); + + A.setRandom();B.setRandom(); + VERIFY_IS_APPROX_EVALUATOR2(B, A.triangularView(), MatrixXd(A.triangularView())); + + A.setRandom();B.setRandom(); + C = B; C.triangularView() = A; + copy_using_evaluator(B.triangularView(), A); + VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView(), A)"); + + A.setRandom();B.setRandom(); + C = B; C.triangularView() = A.triangularView(); + copy_using_evaluator(B.triangularView(), A.triangularView()); + VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView(), A.triangularView())"); + + + A.setRandom();B.setRandom(); + C = B; C.triangularView() = A.triangularView().transpose(); + copy_using_evaluator(B.triangularView(), A.triangularView().transpose()); + VERIFY(B.isApprox(C) && "copy_using_evaluator(B.triangularView(), A.triangularView().transpose())"); + + + A.setRandom();B.setRandom(); C = B; D = A; + C.triangularView().swap(D.triangularView()); + swap_using_evaluator(B.triangularView(), A.triangularView()); + VERIFY(B.isApprox(C) && "swap_using_evaluator(B.triangularView(), A.triangularView())"); + + + VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.triangularView(),A), MatrixXd(A.triangularView()*A)); + + VERIFY_IS_APPROX_EVALUATOR2(B, prod(A.selfadjointView(),A), MatrixXd(A.selfadjointView()*A)); + } + + { + // test diagonal shapes + VectorXd d = VectorXd::Random(6); + MatrixXd A = MatrixXd::Random(6,6), B(6,6); + A.setRandom();B.setRandom(); + + VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(d.asDiagonal(),A), MatrixXd(d.asDiagonal()*A)); + VERIFY_IS_APPROX_EVALUATOR2(B, lazyprod(A,d.asDiagonal()), MatrixXd(A*d.asDiagonal())); + } + + { + // test CoeffReadCost + Matrix4d a, b; + VERIFY_IS_EQUAL( get_cost(a), 1 ); + VERIFY_IS_EQUAL( get_cost(a+b), 3); + VERIFY_IS_EQUAL( get_cost(2*a+b), 4); + VERIFY_IS_EQUAL( get_cost(a*b), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(b)), 15); + VERIFY_IS_EQUAL( get_cost(a*(a*b)), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a*b)), 15); + VERIFY_IS_EQUAL( get_cost(a*(a+b)), 1); + VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a+b)), 15); + } +} diff --git a/external/eigen3/test/fastmath.cpp b/external/eigen3/test/fastmath.cpp new file mode 100644 index 0000000..cc5db07 --- /dev/null +++ b/external/eigen3/test/fastmath.cpp @@ -0,0 +1,99 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +void check(bool b, bool ref) +{ + std::cout << b; + if(b==ref) + std::cout << " OK "; + else + std::cout << " BAD "; +} + +#if EIGEN_COMP_MSVC && EIGEN_COMP_MSVC < 1800 +namespace std { + template bool (isfinite)(T x) { return _finite(x); } + template bool (isnan)(T x) { return _isnan(x); } + template bool (isinf)(T x) { return _fpclass(x)==_FPCLASS_NINF || _fpclass(x)==_FPCLASS_PINF; } +} +#endif + +template +void check_inf_nan(bool dryrun) { + Matrix m(10); + m.setRandom(); + m(3) = std::numeric_limits::quiet_NaN(); + + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),true); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), true); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( (numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + T hidden_zero = (std::numeric_limits::min)()*(std::numeric_limits::min)(); + m(4) /= hidden_zero; + if(dryrun) + { + std::cout << "std::isfinite(" << m(4) << ") = "; check((std::isfinite)(m(4)),false); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(4)), false); std::cout << "\n"; + std::cout << "std::isinf(" << m(4) << ") = "; check((std::isinf)(m(4)),true); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(4)), true); std::cout << "\n"; + std::cout << "std::isnan(" << m(4) << ") = "; check((std::isnan)(m(4)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(4)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 1); std::cout << "\n"; + std::cout << "\n"; + } + else + { + VERIFY( !(numext::isfinite)(m(4)) ); + VERIFY( (numext::isinf)(m(4)) ); + VERIFY( !(numext::isnan)(m(4)) ); + VERIFY( !m.allFinite() ); + VERIFY( m.hasNaN() ); + } + m(3) = 0; + if(dryrun) + { + std::cout << "std::isfinite(" << m(3) << ") = "; check((std::isfinite)(m(3)),true); std::cout << " ; numext::isfinite = "; check((numext::isfinite)(m(3)), true); std::cout << "\n"; + std::cout << "std::isinf(" << m(3) << ") = "; check((std::isinf)(m(3)),false); std::cout << " ; numext::isinf = "; check((numext::isinf)(m(3)), false); std::cout << "\n"; + std::cout << "std::isnan(" << m(3) << ") = "; check((std::isnan)(m(3)),false); std::cout << " ; numext::isnan = "; check((numext::isnan)(m(3)), false); std::cout << "\n"; + std::cout << "allFinite: "; check(m.allFinite(), 0); std::cout << "\n"; + std::cout << "hasNaN: "; check(m.hasNaN(), 0); std::cout << "\n"; + std::cout << "\n\n"; + } + else + { + VERIFY( (numext::isfinite)(m(3)) ); + VERIFY( !(numext::isinf)(m(3)) ); + VERIFY( !(numext::isnan)(m(3)) ); + VERIFY( !m.allFinite() ); + VERIFY( !m.hasNaN() ); + } +} + +void test_fastmath() { + std::cout << "*** float *** \n\n"; check_inf_nan(true); + std::cout << "*** double ***\n\n"; check_inf_nan(true); + std::cout << "*** long double *** \n\n"; check_inf_nan(true); + + check_inf_nan(false); + check_inf_nan(false); + check_inf_nan(false); +} diff --git a/external/eigen3/test/first_aligned.cpp b/external/eigen3/test/first_aligned.cpp index 467f945..ae2d4bc 100644 --- a/external/eigen3/test/first_aligned.cpp +++ b/external/eigen3/test/first_aligned.cpp @@ -13,7 +13,7 @@ template void test_first_aligned_helper(Scalar *array, int size) { const int packet_size = sizeof(Scalar) * internal::packet_traits::size; - VERIFY(((size_t(array) + sizeof(Scalar) * internal::first_aligned(array, size)) % packet_size) == 0); + VERIFY(((size_t(array) + sizeof(Scalar) * internal::first_default_aligned(array, size)) % packet_size) == 0); } template @@ -21,7 +21,7 @@ void test_none_aligned_helper(Scalar *array, int size) { EIGEN_UNUSED_VARIABLE(array); EIGEN_UNUSED_VARIABLE(size); - VERIFY(internal::packet_traits::size == 1 || internal::first_aligned(array, size) == size); + VERIFY(internal::packet_traits::size == 1 || internal::first_default_aligned(array, size) == size); } struct some_non_vectorizable_type { float x; }; @@ -41,7 +41,7 @@ void test_first_aligned() test_first_aligned_helper(array_double+1, 50); test_first_aligned_helper(array_double+2, 50); - double *array_double_plus_4_bytes = (double*)(size_t(array_double)+4); + double *array_double_plus_4_bytes = (double*)(internal::UIntPtr(array_double)+4); test_none_aligned_helper(array_double_plus_4_bytes, 50); test_none_aligned_helper(array_double_plus_4_bytes+1, 50); diff --git a/external/eigen3/test/geo_alignedbox.cpp b/external/eigen3/test/geo_alignedbox.cpp index 84663ad..d2339a6 100644 --- a/external/eigen3/test/geo_alignedbox.cpp +++ b/external/eigen3/test/geo_alignedbox.cpp @@ -16,7 +16,7 @@ using namespace std; template EIGEN_DONT_INLINE -void kill_extra_precision(T& x) { eigen_assert(&x != 0); } +void kill_extra_precision(T& x) { eigen_assert((void*)(&x) != (void*)0); } template void alignedbox(const BoxType& _box) @@ -48,12 +48,21 @@ template void alignedbox(const BoxType& _box) b0.extend(p0); b0.extend(p1); VERIFY(b0.contains(p0*s1+(Scalar(1)-s1)*p1)); + VERIFY(b0.contains(b0.center())); + VERIFY_IS_APPROX(b0.center(),(p0+p1)/Scalar(2)); (b2 = b0).extend(b1); VERIFY(b2.contains(b0)); VERIFY(b2.contains(b1)); VERIFY_IS_APPROX(b2.clamp(b0), b0); + // intersection + BoxType box1(VectorType::Random(dim)); + box1.extend(VectorType::Random(dim)); + BoxType box2(VectorType::Random(dim)); + box2.extend(VectorType::Random(dim)); + + VERIFY(box1.intersects(box2) == !box1.intersection(box2).isEmpty()); // alignment -- make sure there is no memory alignment assertion BoxType *bp0 = new BoxType(dim); diff --git a/external/eigen3/test/geo_eulerangles.cpp b/external/eigen3/test/geo_eulerangles.cpp index b4830bd..932ebe7 100644 --- a/external/eigen3/test/geo_eulerangles.cpp +++ b/external/eigen3/test/geo_eulerangles.cpp @@ -26,16 +26,16 @@ void verify_euler(const Matrix& ea, int i, int j, int k) VERIFY_IS_APPROX(m, mbis); /* If I==K, and ea[1]==0, then there no unique solution. */ /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ - if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(M_PI/2),test_precision())) ) + if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision())) ) VERIFY((ea-eabis).norm() <= test_precision()); // approx_or_less_than does not work for 0 VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1))); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], Scalar(M_PI)); - VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(M_PI), eabis[1]); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(M_PI)); - VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(M_PI), eabis[2]); - VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], Scalar(M_PI)); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], Scalar(EIGEN_PI)); + VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[1]); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(EIGEN_PI)); + VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(EIGEN_PI), eabis[2]); + VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], Scalar(EIGEN_PI)); } template void check_all_var(const Matrix& ea) @@ -64,7 +64,7 @@ template void eulerangles() typedef Quaternion Quaternionx; typedef AngleAxis AngleAxisx; - Scalar a = internal::random(-Scalar(M_PI), Scalar(M_PI)); + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); Quaternionx q1; q1 = AngleAxisx(a, Vector3::Random().normalized()); Matrix3 m; @@ -84,13 +84,13 @@ template void eulerangles() check_all_var(ea); // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi]. - ea = (Array3::Random() + Array3(1,0,0))*Scalar(M_PI)*Array3(0.5,1,1); + ea = (Array3::Random() + Array3(1,0,0))*Scalar(EIGEN_PI)*Array3(0.5,1,1); check_all_var(ea); - ea[2] = ea[0] = internal::random(0,Scalar(M_PI)); + ea[2] = ea[0] = internal::random(0,Scalar(EIGEN_PI)); check_all_var(ea); - ea[0] = ea[1] = internal::random(0,Scalar(M_PI)); + ea[0] = ea[1] = internal::random(0,Scalar(EIGEN_PI)); check_all_var(ea); ea[1] = 0; diff --git a/external/eigen3/test/geo_homogeneous.cpp b/external/eigen3/test/geo_homogeneous.cpp index 0133030..2187c7b 100644 --- a/external/eigen3/test/geo_homogeneous.cpp +++ b/external/eigen3/test/geo_homogeneous.cpp @@ -38,6 +38,10 @@ template void homogeneous(void) hv0 << v0, 1; VERIFY_IS_APPROX(v0.homogeneous(), hv0); VERIFY_IS_APPROX(v0, hv0.hnormalized()); + + VERIFY_IS_APPROX(v0.homogeneous().sum(), hv0.sum()); + VERIFY_IS_APPROX(v0.homogeneous().minCoeff(), hv0.minCoeff()); + VERIFY_IS_APPROX(v0.homogeneous().maxCoeff(), hv0.maxCoeff()); hm0 << m0, ones.transpose(); VERIFY_IS_APPROX(m0.colwise().homogeneous(), hm0); @@ -59,7 +63,6 @@ template void homogeneous(void) VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2, v0.transpose().rowwise().homogeneous() * t2); - m0.transpose().rowwise().homogeneous().eval(); VERIFY_IS_APPROX((m0.transpose().rowwise().homogeneous().eval()) * t2, m0.transpose().rowwise().homogeneous() * t2); @@ -84,7 +87,7 @@ template void homogeneous(void) VERIFY_IS_APPROX(aff * pts.colwise().homogeneous(), (aff * pts1).colwise().hnormalized()); VERIFY_IS_APPROX(caff * pts.colwise().homogeneous(), (caff * pts1).colwise().hnormalized()); VERIFY_IS_APPROX(proj * pts.colwise().homogeneous(), (proj * pts1)); - + VERIFY_IS_APPROX((aff * pts1).colwise().hnormalized(), aff * pts); VERIFY_IS_APPROX((caff * pts1).colwise().hnormalized(), caff * pts); @@ -93,6 +96,23 @@ template void homogeneous(void) VERIFY_IS_APPROX((aff * pts2).colwise().hnormalized(), aff * pts2.colwise().hnormalized()); VERIFY_IS_APPROX((caff * pts2).colwise().hnormalized(), caff * pts2.colwise().hnormalized()); VERIFY_IS_APPROX((proj * pts2).colwise().hnormalized(), (proj * pts2.colwise().hnormalized().colwise().homogeneous()).colwise().hnormalized()); + + // Test combination of homogeneous + + VERIFY_IS_APPROX( (t2 * v0.homogeneous()).hnormalized(), + (t2.template topLeftCorner() * v0 + t2.template topRightCorner()) + / ((t2.template bottomLeftCorner<1,Size>()*v0).value() + t2(Size,Size)) ); + + VERIFY_IS_APPROX( (t2 * pts.colwise().homogeneous()).colwise().hnormalized(), + (Matrix(t2 * pts1).colwise().hnormalized()) ); + + VERIFY_IS_APPROX( (t2 .lazyProduct( v0.homogeneous() )).hnormalized(), (t2 * v0.homogeneous()).hnormalized() ); + VERIFY_IS_APPROX( (t2 .lazyProduct ( pts.colwise().homogeneous() )).colwise().hnormalized(), (t2 * pts1).colwise().hnormalized() ); + + VERIFY_IS_APPROX( (v0.transpose().homogeneous() .lazyProduct( t2 )).hnormalized(), (v0.transpose().homogeneous()*t2).hnormalized() ); + VERIFY_IS_APPROX( (pts.transpose().rowwise().homogeneous() .lazyProduct( t2 )).rowwise().hnormalized(), (pts1.transpose()*t2).rowwise().hnormalized() ); + + VERIFY_IS_APPROX( (t2.template triangularView() * v0.homogeneous()).eval(), (t2.template triangularView()*hv0) ); } void test_geo_homogeneous() diff --git a/external/eigen3/test/geo_hyperplane.cpp b/external/eigen3/test/geo_hyperplane.cpp index 3275378..2789285 100644 --- a/external/eigen3/test/geo_hyperplane.cpp +++ b/external/eigen3/test/geo_hyperplane.cpp @@ -18,10 +18,12 @@ template void hyperplane(const HyperplaneType& _plane) /* this test covers the following files: Hyperplane.h */ + using std::abs; typedef typename HyperplaneType::Index Index; const Index dim = _plane.dim(); enum { Options = HyperplaneType::Options }; typedef typename HyperplaneType::Scalar Scalar; + typedef typename HyperplaneType::RealScalar RealScalar; typedef Matrix VectorType; typedef Matrix MatrixType; @@ -42,7 +44,10 @@ template void hyperplane(const HyperplaneType& _plane) VERIFY_IS_APPROX( n1.dot(n1), Scalar(1) ); VERIFY_IS_MUCH_SMALLER_THAN( pl0.absDistance(p0), Scalar(1) ); - VERIFY_IS_APPROX( pl1.signedDistance(p1 + n1 * s0), s0 ); + if(numext::abs2(s0)>RealScalar(1e-6)) + VERIFY_IS_APPROX( pl1.signedDistance(p1 + n1 * s0), s0); + else + VERIFY_IS_MUCH_SMALLER_THAN( abs(pl1.signedDistance(p1 + n1 * s0) - s0), Scalar(1) ); VERIFY_IS_MUCH_SMALLER_THAN( pl1.signedDistance(pl1.projection(p0)), Scalar(1) ); VERIFY_IS_MUCH_SMALLER_THAN( pl1.absDistance(p1 + pl1.normal().unitOrthogonal() * s1), Scalar(1) ); @@ -52,6 +57,8 @@ template void hyperplane(const HyperplaneType& _plane) MatrixType rot = MatrixType::Random(dim,dim).householderQr().householderQ(); DiagonalMatrix scaling(VectorType::Random()); Translation translation(VectorType::Random()); + + while(scaling.diagonal().cwiseAbs().minCoeff() void hyperplane(const HyperplaneType& _plane) VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot,Isometry).absDistance(rot * p1), Scalar(1) ); pl2 = pl1; VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling).absDistance((rot*scaling) * p1), Scalar(1) ); + VERIFY_IS_APPROX( pl2.normal().norm(), RealScalar(1) ); pl2 = pl1; VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling*translation) - .absDistance((rot*scaling*translation) * p1), Scalar(1) ); + .absDistance((rot*scaling*translation) * p1), Scalar(1) ); + VERIFY_IS_APPROX( pl2.normal().norm(), RealScalar(1) ); pl2 = pl1; VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*translation,Isometry) .absDistance((rot*translation) * p1), Scalar(1) ); + VERIFY_IS_APPROX( pl2.normal().norm(), RealScalar(1) ); } // casting @@ -90,9 +100,9 @@ template void lines() Vector u = Vector::Random(); Vector v = Vector::Random(); Scalar a = internal::random(); - while (abs(a-1) < 1e-4) a = internal::random(); - while (u.norm() < 1e-4) u = Vector::Random(); - while (v.norm() < 1e-4) v = Vector::Random(); + while (abs(a-1) < Scalar(1e-4)) a = internal::random(); + while (u.norm() < Scalar(1e-4)) u = Vector::Random(); + while (v.norm() < Scalar(1e-4)) v = Vector::Random(); HLine line_u = HLine::Through(center + u, center + a*u); HLine line_v = HLine::Through(center + v, center + a*v); @@ -104,12 +114,15 @@ template void lines() Vector result = line_u.intersection(line_v); // the lines should intersect at the point we called "center" - VERIFY_IS_APPROX(result, center); + if(abs(a-1) > Scalar(1e-2) && abs(v.normalized().dot(u.normalized())) void hyperplane_alignment() typedef Hyperplane Plane3a; typedef Hyperplane Plane3u; - EIGEN_ALIGN16 Scalar array1[4]; - EIGEN_ALIGN16 Scalar array2[4]; - EIGEN_ALIGN16 Scalar array3[4+1]; + EIGEN_ALIGN_MAX Scalar array1[4]; + EIGEN_ALIGN_MAX Scalar array2[4]; + EIGEN_ALIGN_MAX Scalar array3[4+1]; Scalar* array3u = array3+1; Plane3a *p1 = ::new(reinterpret_cast(array1)) Plane3a; @@ -161,8 +174,8 @@ template void hyperplane_alignment() VERIFY_IS_APPROX(p1->coeffs(), p2->coeffs()); VERIFY_IS_APPROX(p1->coeffs(), p3->coeffs()); - #if defined(EIGEN_VECTORIZE) && EIGEN_ALIGN_STATICALLY - if(internal::packet_traits::Vectorizable) + #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES > 0 + if(internal::packet_traits::Vectorizable && internal::packet_traits::size<=4) VERIFY_RAISES_ASSERT((::new(reinterpret_cast(array3u)) Plane3a)); #endif } diff --git a/external/eigen3/test/geo_orthomethods.cpp b/external/eigen3/test/geo_orthomethods.cpp index c836dae..e178df2 100644 --- a/external/eigen3/test/geo_orthomethods.cpp +++ b/external/eigen3/test/geo_orthomethods.cpp @@ -33,12 +33,16 @@ template void orthomethods_3() VERIFY_IS_MUCH_SMALLER_THAN(v1.dot(v1.cross(v2)), Scalar(1)); VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(v2).dot(v2), Scalar(1)); VERIFY_IS_MUCH_SMALLER_THAN(v2.dot(v1.cross(v2)), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN(v1.cross(Vector3::Random()).dot(v1), Scalar(1)); Matrix3 mat3; mat3 << v0.normalized(), (v0.cross(v1)).normalized(), (v0.cross(v1).cross(v0)).normalized(); VERIFY(mat3.isUnitary()); - + + mat3.setRandom(); + VERIFY_IS_APPROX(v0.cross(mat3*v1), -(mat3*v1).cross(v0)); + VERIFY_IS_APPROX(v0.cross(mat3.lazyProduct(v1)), -(mat3.lazyProduct(v1)).cross(v0)); // colwise/rowwise cross product mat3.setRandom(); @@ -47,6 +51,13 @@ template void orthomethods_3() int i = internal::random(0,2); mcross = mat3.colwise().cross(vec3); VERIFY_IS_APPROX(mcross.col(i), mat3.col(i).cross(vec3)); + + VERIFY_IS_MUCH_SMALLER_THAN((mat3.adjoint() * mat3.colwise().cross(vec3)).diagonal().cwiseAbs().sum(), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN((mat3.adjoint() * mat3.colwise().cross(Vector3::Random())).diagonal().cwiseAbs().sum(), Scalar(1)); + + VERIFY_IS_MUCH_SMALLER_THAN((vec3.adjoint() * mat3.colwise().cross(vec3)).cwiseAbs().sum(), Scalar(1)); + VERIFY_IS_MUCH_SMALLER_THAN((vec3.adjoint() * Matrix3::Random().colwise().cross(vec3)).cwiseAbs().sum(), Scalar(1)); + mcross = mat3.rowwise().cross(vec3); VERIFY_IS_APPROX(mcross.row(i), mat3.row(i).cross(vec3)); @@ -57,6 +68,7 @@ template void orthomethods_3() v40.w() = v41.w() = v42.w() = 0; v42.template head<3>() = v40.template head<3>().cross(v41.template head<3>()); VERIFY_IS_APPROX(v40.cross3(v41), v42); + VERIFY_IS_MUCH_SMALLER_THAN(v40.cross3(Vector4::Random()).dot(v40), Scalar(1)); // check mixed product typedef Matrix RealVector3; diff --git a/external/eigen3/test/geo_parametrizedline.cpp b/external/eigen3/test/geo_parametrizedline.cpp index f0462d4..9bf5f3c 100644 --- a/external/eigen3/test/geo_parametrizedline.cpp +++ b/external/eigen3/test/geo_parametrizedline.cpp @@ -66,9 +66,9 @@ template void parametrizedline_alignment() typedef ParametrizedLine Line4a; typedef ParametrizedLine Line4u; - EIGEN_ALIGN16 Scalar array1[8]; - EIGEN_ALIGN16 Scalar array2[8]; - EIGEN_ALIGN16 Scalar array3[8+1]; + EIGEN_ALIGN_MAX Scalar array1[16]; + EIGEN_ALIGN_MAX Scalar array2[16]; + EIGEN_ALIGN_MAX Scalar array3[16+1]; Scalar* array3u = array3+1; Line4a *p1 = ::new(reinterpret_cast(array1)) Line4a; @@ -85,8 +85,8 @@ template void parametrizedline_alignment() VERIFY_IS_APPROX(p1->direction(), p2->direction()); VERIFY_IS_APPROX(p1->direction(), p3->direction()); - #if defined(EIGEN_VECTORIZE) && EIGEN_ALIGN_STATICALLY - if(internal::packet_traits::Vectorizable) + #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 + if(internal::packet_traits::Vectorizable && internal::packet_traits::size<=4) VERIFY_RAISES_ASSERT((::new(reinterpret_cast(array3u)) Line4a)); #endif } diff --git a/external/eigen3/test/geo_quaternion.cpp b/external/eigen3/test/geo_quaternion.cpp index 1694b32..96889e7 100644 --- a/external/eigen3/test/geo_quaternion.cpp +++ b/external/eigen3/test/geo_quaternion.cpp @@ -30,8 +30,8 @@ template void check_slerp(const QuatType& q0, const QuatType& Scalar largeEps = test_precision(); Scalar theta_tot = AA(q1*q0.inverse()).angle(); - if(theta_tot>M_PI) - theta_tot = Scalar(2.*M_PI)-theta_tot; + if(theta_tot>Scalar(EIGEN_PI)) + theta_tot = Scalar(2.)*Scalar(EIGEN_PI)-theta_tot; for(Scalar t=0; t<=Scalar(1.001); t+=Scalar(0.1)) { QuatType q = q0.slerp(t,q1); @@ -49,13 +49,13 @@ template void quaternion(void) */ using std::abs; typedef Matrix Vector3; - typedef Matrix Vector4; + typedef Matrix Matrix3; typedef Quaternion Quaternionx; typedef AngleAxis AngleAxisx; Scalar largeEps = test_precision(); if (internal::is_same::value) - largeEps = 1e-3f; + largeEps = Scalar(1e-3); Scalar eps = internal::random() * Scalar(1e-2); @@ -64,8 +64,8 @@ template void quaternion(void) v2 = Vector3::Random(), v3 = Vector3::Random(); - Scalar a = internal::random(-Scalar(M_PI), Scalar(M_PI)), - b = internal::random(-Scalar(M_PI), Scalar(M_PI)); + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)), + b = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); // Quaternion: Identity(), setIdentity(); Quaternionx q1, q2; @@ -82,8 +82,8 @@ template void quaternion(void) // angular distance Scalar refangle = abs(AngleAxisx(q1.inverse()*q2).angle()); - if (refangle>Scalar(M_PI)) - refangle = Scalar(2)*Scalar(M_PI) - refangle; + if (refangle>Scalar(EIGEN_PI)) + refangle = Scalar(2)*Scalar(EIGEN_PI) - refangle; if((q1.coeffs()-q2.coeffs()).norm() > 10*largeEps) { @@ -101,6 +101,11 @@ template void quaternion(void) q2 = q1.toRotationMatrix(); VERIFY_IS_APPROX(q1*v1,q2*v1); + Matrix3 rot1(q1); + VERIFY_IS_APPROX(q1*v1,rot1*v1); + Quaternionx q3(rot1.transpose()*rot1); + VERIFY_IS_APPROX(q3*v1,v1); + // angle-axis conversion AngleAxisx aa = AngleAxisx(q1); @@ -109,8 +114,8 @@ template void quaternion(void) // Do not execute the test if the rotation angle is almost zero, or // the rotation axis and v1 are almost parallel. if (abs(aa.angle()) > 5*test_precision() - && (aa.axis() - v1.normalized()).norm() < 1.99 - && (aa.axis() + v1.normalized()).norm() < 1.99) + && (aa.axis() - v1.normalized()).norm() < Scalar(1.99) + && (aa.axis() + v1.normalized()).norm() < Scalar(1.99)) { VERIFY_IS_NOT_APPROX(q1 * v1, Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1); } @@ -151,19 +156,19 @@ template void quaternion(void) Quaternionx *q = new Quaternionx; delete q; - q1 = AngleAxisx(a, v0.normalized()); - q2 = AngleAxisx(b, v1.normalized()); + q1 = Quaternionx::UnitRandom(); + q2 = Quaternionx::UnitRandom(); check_slerp(q1,q2); q1 = AngleAxisx(b, v1.normalized()); - q2 = AngleAxisx(b+Scalar(M_PI), v1.normalized()); + q2 = AngleAxisx(b+Scalar(EIGEN_PI), v1.normalized()); check_slerp(q1,q2); q1 = AngleAxisx(b, v1.normalized()); q2 = AngleAxisx(-b, -v1.normalized()); check_slerp(q1,q2); - q1.coeffs() = Vector4::Random().normalized(); + q1 = Quaternionx::UnitRandom(); q2.coeffs() = -q1.coeffs(); check_slerp(q1,q2); } @@ -179,11 +184,11 @@ template void mapQuaternion(void){ Vector3 v0 = Vector3::Random(), v1 = Vector3::Random(); - Scalar a = internal::random(-Scalar(M_PI), Scalar(M_PI)); + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); - EIGEN_ALIGN16 Scalar array1[4]; - EIGEN_ALIGN16 Scalar array2[4]; - EIGEN_ALIGN16 Scalar array3[4+1]; + EIGEN_ALIGN_MAX Scalar array1[4]; + EIGEN_ALIGN_MAX Scalar array2[4]; + EIGEN_ALIGN_MAX Scalar array3[4+1]; Scalar* array3unaligned = array3+1; MQuaternionA mq1(array1); @@ -232,9 +237,9 @@ template void quaternionAlignment(void){ typedef Quaternion QuaternionA; typedef Quaternion QuaternionUA; - EIGEN_ALIGN16 Scalar array1[4]; - EIGEN_ALIGN16 Scalar array2[4]; - EIGEN_ALIGN16 Scalar array3[4+1]; + EIGEN_ALIGN_MAX Scalar array1[4]; + EIGEN_ALIGN_MAX Scalar array2[4]; + EIGEN_ALIGN_MAX Scalar array3[4+1]; Scalar* arrayunaligned = array3+1; QuaternionA *q1 = ::new(reinterpret_cast(array1)) QuaternionA; @@ -247,8 +252,8 @@ template void quaternionAlignment(void){ VERIFY_IS_APPROX(q1->coeffs(), q2->coeffs()); VERIFY_IS_APPROX(q1->coeffs(), q3->coeffs()); - #if defined(EIGEN_VECTORIZE) && EIGEN_ALIGN_STATICALLY - if(internal::packet_traits::Vectorizable) + #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 + if(internal::packet_traits::Vectorizable && internal::packet_traits::size<=4) VERIFY_RAISES_ASSERT((::new(reinterpret_cast(arrayunaligned)) QuaternionA)); #endif } diff --git a/external/eigen3/test/geo_transformations.cpp b/external/eigen3/test/geo_transformations.cpp old mode 100644 new mode 100755 index 383c42b..278e527 --- a/external/eigen3/test/geo_transformations.cpp +++ b/external/eigen3/test/geo_transformations.cpp @@ -12,6 +12,17 @@ #include #include +template +Matrix angleToVec(T a) +{ + return Matrix(std::cos(a), std::sin(a)); +} + +// This permits to workaround a bug in clang/llvm code generation. +template +EIGEN_DONT_INLINE +void dont_over_optimize(T& x) { volatile typename T::Scalar tmp = x(0); x(0) = tmp; } + template void non_projective_only() { /* this test covers the following files: @@ -29,7 +40,7 @@ template void non_projective_only() Transform3 t0, t1, t2; - Scalar a = internal::random(-Scalar(M_PI), Scalar(M_PI)); + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); Quaternionx q1, q2; @@ -97,16 +108,14 @@ template void transformations() v1 = Vector3::Random(); Matrix3 matrot1, m; - Scalar a = internal::random(-Scalar(M_PI), Scalar(M_PI)); - Scalar s0 = internal::random(), - s1 = internal::random(); + Scalar a = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + Scalar s0 = internal::random(), s1 = internal::random(); while(v0.norm() < test_precision()) v0 = Vector3::Random(); while(v1.norm() < test_precision()) v1 = Vector3::Random(); - VERIFY_IS_APPROX(v0, AngleAxisx(a, v0.normalized()) * v0); - VERIFY_IS_APPROX(-v0, AngleAxisx(Scalar(M_PI), v0.unitOrthogonal()) * v0); + VERIFY_IS_APPROX(-v0, AngleAxisx(Scalar(EIGEN_PI), v0.unitOrthogonal()) * v0); if(abs(cos(a)) > test_precision()) { VERIFY_IS_APPROX(cos(a)*v0.squaredNorm(), v0.dot(AngleAxisx(a, v0.unitOrthogonal()) * v0)); @@ -132,14 +141,16 @@ template void transformations() AngleAxisx aa = AngleAxisx(q1); VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); - if(abs(aa.angle()) > NumTraits::dummy_precision()) + // The following test is stable only if 2*angle != angle and v1 is not colinear with axis + if( (abs(aa.angle()) > test_precision()) && (abs(aa.axis().dot(v1.normalized()))<(Scalar(1)-Scalar(4)*test_precision())) ) { VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) ); } aa.fromRotationMatrix(aa.toRotationMatrix()); VERIFY_IS_APPROX(q1 * v1, Quaternionx(aa) * v1); - if(abs(aa.angle()) > NumTraits::dummy_precision()) + // The following test is stable only if 2*angle != angle and v1 is not colinear with axis + if( (abs(aa.angle()) > test_precision()) && (abs(aa.axis().dot(v1.normalized()))<(Scalar(1)-Scalar(4)*test_precision())) ) { VERIFY( !(q1 * v1).isApprox(Quaternionx(AngleAxisx(aa.angle()*2,aa.axis())) * v1) ); } @@ -158,7 +169,7 @@ template void transformations() // TODO complete the tests ! a = 0; while (abs(a)(-Scalar(0.4)*Scalar(M_PI), Scalar(0.4)*Scalar(M_PI)); + a = internal::random(-Scalar(0.4)*Scalar(EIGEN_PI), Scalar(0.4)*Scalar(EIGEN_PI)); q1 = AngleAxisx(a, v0.normalized()); Transform3 t0, t1, t2; @@ -204,7 +215,7 @@ template void transformations() tmat4.matrix()(3,3) = Scalar(1); VERIFY_IS_APPROX(tmat3.matrix(), tmat4.matrix()); - Scalar a3 = internal::random(-Scalar(M_PI), Scalar(M_PI)); + Scalar a3 = internal::random(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); Vector3 v3 = Vector3::Random().normalized(); AngleAxisx aa3(a3, v3); Transform3 t3(aa3); @@ -216,12 +227,15 @@ template void transformations() t4 *= aa3; VERIFY_IS_APPROX(t3.matrix(), t4.matrix()); - v3 = Vector3::Random(); + do { + v3 = Vector3::Random(); + dont_over_optimize(v3); + } while (v3.cwiseAbs().minCoeff()::epsilon()); Translation3 tv3(v3); Transform3 t5(tv3); t4 = tv3; VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); - t4.translate(-v3); + t4.translate((-v3).eval()); VERIFY_IS_APPROX(t4.matrix(), MatrixType::Identity()); t4 *= tv3; VERIFY_IS_APPROX(t5.matrix(), t4.matrix()); @@ -413,12 +427,28 @@ template void transformations() VERIFY_IS_APPROX(r2d1f.template cast(),r2d1); Rotation2D r2d1d = r2d1.template cast(); VERIFY_IS_APPROX(r2d1d.template cast(),r2d1); - - t20 = Translation2(v20) * (Rotation2D(s0) * Eigen::Scaling(s0)); - t21 = Translation2(v20) * Rotation2D(s0) * Eigen::Scaling(s0); - VERIFY_IS_APPROX(t20,t21); + for(int k=0; k<100; ++k) + { + Scalar angle = internal::random(-100,100); + Rotation2D rot2(angle); + VERIFY( rot2.smallestPositiveAngle() >= 0 ); + VERIFY( rot2.smallestPositiveAngle() <= Scalar(2)*Scalar(EIGEN_PI) ); + VERIFY_IS_APPROX( angleToVec(rot2.smallestPositiveAngle()), angleToVec(rot2.angle()) ); + + VERIFY( rot2.smallestAngle() >= -Scalar(EIGEN_PI) ); + VERIFY( rot2.smallestAngle() <= Scalar(EIGEN_PI) ); + VERIFY_IS_APPROX( angleToVec(rot2.smallestAngle()), angleToVec(rot2.angle()) ); + + Matrix rot2_as_mat(rot2); + Rotation2D rot3(rot2_as_mat); + VERIFY_IS_APPROX( angleToVec(rot2.smallestAngle()), angleToVec(rot3.angle()) ); + } + + s0 = internal::random(-100,100); + s1 = internal::random(-100,100); Rotation2D R0(s0), R1(s1); + t20 = Translation2(v20) * (R0 * Eigen::Scaling(s0)); t21 = Translation2(v20) * R0 * Eigen::Scaling(s0); VERIFY_IS_APPROX(t20,t21); @@ -428,9 +458,24 @@ template void transformations() VERIFY_IS_APPROX(t20,t21); VERIFY_IS_APPROX(s0, (R0.slerp(0, R1)).angle()); - VERIFY_IS_APPROX(s1, (R0.slerp(1, R1)).angle()); - VERIFY_IS_APPROX(s0, (R0.slerp(0.5, R0)).angle()); - VERIFY_IS_APPROX(Scalar(0), (R0.slerp(0.5, R0.inverse())).angle()); + VERIFY_IS_APPROX( angleToVec(R1.smallestPositiveAngle()), angleToVec((R0.slerp(1, R1)).smallestPositiveAngle()) ); + VERIFY_IS_APPROX(R0.smallestPositiveAngle(), (R0.slerp(0.5, R0)).smallestPositiveAngle()); + + if(std::cos(s0)>0) + VERIFY_IS_MUCH_SMALLER_THAN((R0.slerp(0.5, R0.inverse())).smallestAngle(), Scalar(1)); + else + VERIFY_IS_APPROX(Scalar(EIGEN_PI), (R0.slerp(0.5, R0.inverse())).smallestPositiveAngle()); + + // Check path length + Scalar l = 0; + int path_steps = 100; + for(int k=0; k::epsilon()*Scalar(path_steps/2))); // check basic features { @@ -520,9 +565,9 @@ template void transform_alignment() typedef Transform Projective3a; typedef Transform Projective3u; - EIGEN_ALIGN16 Scalar array1[16]; - EIGEN_ALIGN16 Scalar array2[16]; - EIGEN_ALIGN16 Scalar array3[16+1]; + EIGEN_ALIGN_MAX Scalar array1[16]; + EIGEN_ALIGN_MAX Scalar array2[16]; + EIGEN_ALIGN_MAX Scalar array3[16+1]; Scalar* array3u = array3+1; Projective3a *p1 = ::new(reinterpret_cast(array1)) Projective3a; @@ -538,7 +583,7 @@ template void transform_alignment() VERIFY_IS_APPROX( (*p1) * (*p1), (*p2)*(*p3)); - #if defined(EIGEN_VECTORIZE) && EIGEN_ALIGN_STATICALLY + #if defined(EIGEN_VECTORIZE) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 if(internal::packet_traits::Vectorizable) VERIFY_RAISES_ASSERT((::new(reinterpret_cast(array3u)) Projective3a)); #endif @@ -594,7 +639,7 @@ void test_geo_transformations() CALL_SUBTEST_7(( transform_products() )); CALL_SUBTEST_7(( transform_products() )); - CALL_SUBTEST_8(( transform_associativity(Rotation2D(internal::random()*double(3.14))) )); - CALL_SUBTEST_8(( transform_associativity(Quaterniond(Vector4d::Random().normalized())) )); + CALL_SUBTEST_8(( transform_associativity(Rotation2D(internal::random()*double(EIGEN_PI))) )); + CALL_SUBTEST_8(( transform_associativity(Quaterniond::UnitRandom()) )); } } diff --git a/external/eigen3/test/half_float.cpp b/external/eigen3/test/half_float.cpp new file mode 100644 index 0000000..3d2410a --- /dev/null +++ b/external/eigen3/test/half_float.cpp @@ -0,0 +1,264 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include "main.h" + +#include + +// Make sure it's possible to forward declare Eigen::half +namespace Eigen { +struct half; +} + +using Eigen::half; + +void test_conversion() +{ + using Eigen::half_impl::__half; + + // Conversion from float. + VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00); + VERIFY_IS_EQUAL(half(0.5f).x, 0x3800); + VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555); + VERIFY_IS_EQUAL(half(0.0f).x, 0x0000); + VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000); + VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff); + VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity. + + // Denormals. + VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001); + VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001); + VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002); + + // Verify round-to-nearest-even behavior. + float val1 = float(half(__half(0x3c00))); + float val2 = float(half(__half(0x3c01))); + float val3 = float(half(__half(0x3c02))); + VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, 0x3c00); + VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, 0x3c02); + + // Conversion from int. + VERIFY_IS_EQUAL(half(-1).x, 0xbc00); + VERIFY_IS_EQUAL(half(0).x, 0x0000); + VERIFY_IS_EQUAL(half(1).x, 0x3c00); + VERIFY_IS_EQUAL(half(2).x, 0x4000); + VERIFY_IS_EQUAL(half(3).x, 0x4200); + + // Conversion from bool. + VERIFY_IS_EQUAL(half(false).x, 0x0000); + VERIFY_IS_EQUAL(half(true).x, 0x3c00); + + // Conversion to float. + VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f); + VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f); + + // Denormals. + VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f); + VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f); + + // NaNs and infinities. + VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number. + VERIFY(!(numext::isnan)(float(half(0.0f)))); + VERIFY((numext::isinf)(float(half(__half(0xfc00))))); + VERIFY((numext::isnan)(float(half(__half(0xfc01))))); + VERIFY((numext::isinf)(float(half(__half(0x7c00))))); + VERIFY((numext::isnan)(float(half(__half(0x7c01))))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 + VERIFY((numext::isnan)(float(half(0.0 / 0.0)))); + VERIFY((numext::isinf)(float(half(1.0 / 0.0)))); + VERIFY((numext::isinf)(float(half(-1.0 / 0.0)))); +#endif + + // Exactly same checks as above, just directly on the half representation. + VERIFY(!(numext::isinf)(half(__half(0x7bff)))); + VERIFY(!(numext::isnan)(half(__half(0x0000)))); + VERIFY((numext::isinf)(half(__half(0xfc00)))); + VERIFY((numext::isnan)(half(__half(0xfc01)))); + VERIFY((numext::isinf)(half(__half(0x7c00)))); + VERIFY((numext::isnan)(half(__half(0x7c01)))); + +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 + VERIFY((numext::isnan)(half(0.0 / 0.0))); + VERIFY((numext::isinf)(half(1.0 / 0.0))); + VERIFY((numext::isinf)(half(-1.0 / 0.0))); +#endif +} + +void test_numtraits() +{ + std::cout << "epsilon = " << NumTraits::epsilon() << " (0x" << std::hex << NumTraits::epsilon().x << ")" << std::endl; + std::cout << "highest = " << NumTraits::highest() << " (0x" << std::hex << NumTraits::highest().x << ")" << std::endl; + std::cout << "lowest = " << NumTraits::lowest() << " (0x" << std::hex << NumTraits::lowest().x << ")" << std::endl; + std::cout << "min = " << (std::numeric_limits::min)() << " (0x" << std::hex << half((std::numeric_limits::min)()).x << ")" << std::endl; + std::cout << "denorm min = " << (std::numeric_limits::denorm_min)() << " (0x" << std::hex << half((std::numeric_limits::denorm_min)()).x << ")" << std::endl; + std::cout << "infinity = " << NumTraits::infinity() << " (0x" << std::hex << NumTraits::infinity().x << ")" << std::endl; + std::cout << "quiet nan = " << NumTraits::quiet_NaN() << " (0x" << std::hex << NumTraits::quiet_NaN().x << ")" << std::endl; + std::cout << "signaling nan = " << std::numeric_limits::signaling_NaN() << " (0x" << std::hex << std::numeric_limits::signaling_NaN().x << ")" << std::endl; + + VERIFY(NumTraits::IsSigned); + + VERIFY_IS_EQUAL( std::numeric_limits::infinity().x, half(std::numeric_limits::infinity()).x ); + VERIFY_IS_EQUAL( std::numeric_limits::quiet_NaN().x, half(std::numeric_limits::quiet_NaN()).x ); + VERIFY_IS_EQUAL( std::numeric_limits::signaling_NaN().x, half(std::numeric_limits::signaling_NaN()).x ); + VERIFY( (std::numeric_limits::min)() > half(0.f) ); + VERIFY( (std::numeric_limits::denorm_min)() > half(0.f) ); + VERIFY( (std::numeric_limits::min)()/half(2) > half(0.f) ); + VERIFY_IS_EQUAL( (std::numeric_limits::denorm_min)()/half(2), half(0.f) ); +} + +void test_arithmetic() +{ + VERIFY_IS_EQUAL(float(half(2) + half(2)), 4); + VERIFY_IS_EQUAL(float(half(2) + half(-2)), 0); + VERIFY_IS_APPROX(float(half(0.33333f) + half(0.66667f)), 1.0f); + VERIFY_IS_EQUAL(float(half(2.0f) * half(-5.5f)), -11.0f); + VERIFY_IS_APPROX(float(half(1.0f) / half(3.0f)), 0.33333f); + VERIFY_IS_EQUAL(float(-half(4096.0f)), -4096.0f); + VERIFY_IS_EQUAL(float(-half(-4096.0f)), 4096.0f); +} + +void test_comparison() +{ + VERIFY(half(1.0f) > half(0.5f)); + VERIFY(half(0.5f) < half(1.0f)); + VERIFY(!(half(1.0f) < half(0.5f))); + VERIFY(!(half(0.5f) > half(1.0f))); + + VERIFY(!(half(4.0f) > half(4.0f))); + VERIFY(!(half(4.0f) < half(4.0f))); + + VERIFY(!(half(0.0f) < half(-0.0f))); + VERIFY(!(half(-0.0f) < half(0.0f))); + VERIFY(!(half(0.0f) > half(-0.0f))); + VERIFY(!(half(-0.0f) > half(0.0f))); + + VERIFY(half(0.2f) > half(-1.0f)); + VERIFY(half(-1.0f) < half(0.2f)); + VERIFY(half(-16.0f) < half(-15.0f)); + + VERIFY(half(1.0f) == half(1.0f)); + VERIFY(half(1.0f) != half(2.0f)); + + // Comparisons with NaNs and infinities. +#if !EIGEN_COMP_MSVC + // Visual Studio errors out on divisions by 0 + VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0))); + VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0)); + + VERIFY(!(half(1.0) == half(0.0 / 0.0))); + VERIFY(!(half(1.0) < half(0.0 / 0.0))); + VERIFY(!(half(1.0) > half(0.0 / 0.0))); + VERIFY(half(1.0) != half(0.0 / 0.0)); + + VERIFY(half(1.0) < half(1.0 / 0.0)); + VERIFY(half(1.0) > half(-1.0 / 0.0)); +#endif +} + +void test_basic_functions() +{ + VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(abs(half(3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(abs(half(-3.5f))), 3.5f); + + VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f); + VERIFY_IS_EQUAL(float(floor(half(3.5f))), 3.0f); + VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f); + VERIFY_IS_EQUAL(float(floor(half(-3.5f))), -4.0f); + + VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f); + VERIFY_IS_EQUAL(float(ceil(half(3.5f))), 4.0f); + VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f); + VERIFY_IS_EQUAL(float(ceil(half(-3.5f))), -3.0f); + + VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(sqrt(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f); + VERIFY_IS_APPROX(float(sqrt(half(4.0f))), 2.0f); + + VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(pow(half(0.0f), half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f); + VERIFY_IS_APPROX(float(pow(half(2.0f), half(2.0f))), 4.0f); + + VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f); + VERIFY_IS_EQUAL(float(exp(half(0.0f))), 1.0f); + VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI)); + VERIFY_IS_APPROX(float(exp(half(EIGEN_PI))), 20.f + float(EIGEN_PI)); + + VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f); + VERIFY_IS_EQUAL(float(log(half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); + VERIFY_IS_APPROX(float(log(half(10.0f))), 2.30273f); + + VERIFY_IS_EQUAL(float(numext::log1p(half(0.0f))), 0.0f); + VERIFY_IS_EQUAL(float(log1p(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::log1p(half(10.0f))), 2.3978953f); + VERIFY_IS_APPROX(float(log1p(half(10.0f))), 2.3978953f); +} + +void test_trigonometric_functions() +{ + VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f))); + VERIFY_IS_APPROX(cos(half(0.0f)), half(cosf(0.0f))); + VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI))); + //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2))); + //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f))); + + VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f))); + VERIFY_IS_APPROX(sin(half(0.0f)), half(sinf(0.0f))); + // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI))); + VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f))); + + VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f))); + VERIFY_IS_APPROX(tan(half(0.0f)), half(tanf(0.0f))); + // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI))); + // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2))); + //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f))); +} + +void test_array() +{ + typedef Array ArrayXh; + Index size = internal::random(1,10); + Index i = internal::random(0,size-1); + ArrayXh a1 = ArrayXh::Random(size), a2 = ArrayXh::Random(size); + VERIFY_IS_APPROX( a1+a1, half(2)*a1 ); + VERIFY( (a1.abs() >= half(0)).all() ); + VERIFY_IS_APPROX( (a1*a1).sqrt(), a1.abs() ); + + VERIFY( ((a1.min)(a2) <= (a1.max)(a2)).all() ); + a1(i) = half(-10.); + VERIFY_IS_EQUAL( a1.minCoeff(), half(-10.) ); + a1(i) = half(10.); + VERIFY_IS_EQUAL( a1.maxCoeff(), half(10.) ); + + std::stringstream ss; + ss << a1; +} + +void test_half_float() +{ + CALL_SUBTEST(test_conversion()); + CALL_SUBTEST(test_numtraits()); + CALL_SUBTEST(test_arithmetic()); + CALL_SUBTEST(test_comparison()); + CALL_SUBTEST(test_basic_functions()); + CALL_SUBTEST(test_trigonometric_functions()); + CALL_SUBTEST(test_array()); +} diff --git a/external/eigen3/test/incomplete_cholesky.cpp b/external/eigen3/test/incomplete_cholesky.cpp new file mode 100644 index 0000000..59ffe92 --- /dev/null +++ b/external/eigen3/test/incomplete_cholesky.cpp @@ -0,0 +1,65 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015-2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// #define EIGEN_DONT_VECTORIZE +// #define EIGEN_MAX_ALIGN_BYTES 0 +#include "sparse_solver.h" +#include +#include + +template void test_incomplete_cholesky_T() +{ + typedef SparseMatrix SparseMatrixType; + ConjugateGradient > > cg_illt_lower_amd; + ConjugateGradient > > cg_illt_lower_nat; + ConjugateGradient > > cg_illt_upper_amd; + ConjugateGradient > > cg_illt_upper_nat; + ConjugateGradient > > cg_illt_uplo_amd; + + + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_amd) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_lower_nat) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_upper_amd) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_upper_nat) ); + CALL_SUBTEST( check_sparse_spd_solving(cg_illt_uplo_amd) ); +} + +void test_incomplete_cholesky() +{ + CALL_SUBTEST_1(( test_incomplete_cholesky_T() )); + CALL_SUBTEST_2(( test_incomplete_cholesky_T, int>() )); + CALL_SUBTEST_3(( test_incomplete_cholesky_T() )); + +#ifdef EIGEN_TEST_PART_1 + // regression for bug 1150 + for(int N = 1; N<20; ++N) + { + Eigen::MatrixXd b( N, N ); + b.setOnes(); + + Eigen::SparseMatrix m( N, N ); + m.reserve(Eigen::VectorXi::Constant(N,4)); + for( int i = 0; i < N; ++i ) + { + m.insert( i, i ) = 1; + m.coeffRef( i, i / 2 ) = 2; + m.coeffRef( i, i / 3 ) = 2; + m.coeffRef( i, i / 4 ) = 2; + } + + Eigen::SparseMatrix A; + A = m * m.transpose(); + + Eigen::ConjugateGradient, + Eigen::Lower | Eigen::Upper, + Eigen::IncompleteCholesky > solver( A ); + VERIFY(solver.preconditioner().info() == Eigen::Success); + VERIFY(solver.info() == Eigen::Success); + } +#endif +} diff --git a/external/eigen3/test/inplace_decomposition.cpp b/external/eigen3/test/inplace_decomposition.cpp new file mode 100644 index 0000000..92d0d91 --- /dev/null +++ b/external/eigen3/test/inplace_decomposition.cpp @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include +#include +#include + +// This file test inplace decomposition through Ref<>, as supported by Cholesky, LU, and QR decompositions. + +template void inplace(bool square = false, bool SPD = false) +{ + typedef typename MatrixType::Scalar Scalar; + typedef Matrix RhsType; + typedef Matrix ResType; + + Index rows = MatrixType::RowsAtCompileTime==Dynamic ? internal::random(2,EIGEN_TEST_MAX_SIZE/2) : Index(MatrixType::RowsAtCompileTime); + Index cols = MatrixType::ColsAtCompileTime==Dynamic ? (square?rows:internal::random(2,rows)) : Index(MatrixType::ColsAtCompileTime); + + MatrixType A = MatrixType::Random(rows,cols); + RhsType b = RhsType::Random(rows); + ResType x(cols); + + if(SPD) + { + assert(square); + A.topRows(cols) = A.topRows(cols).adjoint() * A.topRows(cols); + A.diagonal().array() += 1e-3; + } + + MatrixType A0 = A; + MatrixType A1 = A; + + DecType dec(A); + + // Check that the content of A has been modified + VERIFY_IS_NOT_APPROX( A, A0 ); + + // Check that the decomposition is correct: + if(rows==cols) + { + VERIFY_IS_APPROX( A0 * (x = dec.solve(b)), b ); + } + else + { + VERIFY_IS_APPROX( A0.transpose() * A0 * (x = dec.solve(b)), A0.transpose() * b ); + } + + // Check that modifying A breaks the current dec: + A.setRandom(); + if(rows==cols) + { + VERIFY_IS_NOT_APPROX( A0 * (x = dec.solve(b)), b ); + } + else + { + VERIFY_IS_NOT_APPROX( A0.transpose() * A0 * (x = dec.solve(b)), A0.transpose() * b ); + } + + // Check that calling compute(A1) does not modify A1: + A = A0; + dec.compute(A1); + VERIFY_IS_EQUAL(A0,A1); + VERIFY_IS_NOT_APPROX( A, A0 ); + if(rows==cols) + { + VERIFY_IS_APPROX( A0 * (x = dec.solve(b)), b ); + } + else + { + VERIFY_IS_APPROX( A0.transpose() * A0 * (x = dec.solve(b)), A0.transpose() * b ); + } +} + + +void test_inplace_decomposition() +{ + EIGEN_UNUSED typedef Matrix Matrix43d; + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(( inplace >, MatrixXd>(true,true) )); + CALL_SUBTEST_1(( inplace >, Matrix4d>(true,true) )); + + CALL_SUBTEST_2(( inplace >, MatrixXd>(true,true) )); + CALL_SUBTEST_2(( inplace >, Matrix4d>(true,true) )); + + CALL_SUBTEST_3(( inplace >, MatrixXd>(true,false) )); + CALL_SUBTEST_3(( inplace >, Matrix4d>(true,false) )); + + CALL_SUBTEST_4(( inplace >, MatrixXd>(true,false) )); + CALL_SUBTEST_4(( inplace >, Matrix4d>(true,false) )); + + CALL_SUBTEST_5(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_5(( inplace >, Matrix43d>(false,false) )); + + CALL_SUBTEST_6(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_6(( inplace >, Matrix43d>(false,false) )); + + CALL_SUBTEST_7(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_7(( inplace >, Matrix43d>(false,false) )); + + CALL_SUBTEST_8(( inplace >, MatrixXd>(false,false) )); + CALL_SUBTEST_8(( inplace >, Matrix43d>(false,false) )); + } +} diff --git a/external/eigen3/test/integer_types.cpp b/external/eigen3/test/integer_types.cpp index 950f8e9..a21f73a 100644 --- a/external/eigen3/test/integer_types.cpp +++ b/external/eigen3/test/integer_types.cpp @@ -158,4 +158,12 @@ void test_integer_types() CALL_SUBTEST_8( integer_type_tests(Matrix(1, 5)) ); } +#ifdef EIGEN_TEST_PART_9 + VERIFY_IS_EQUAL(internal::scalar_div_cost::value, 8); + VERIFY_IS_EQUAL(internal::scalar_div_cost::value, 8); + if(sizeof(long)>sizeof(int)) { + VERIFY(internal::scalar_div_cost::value > internal::scalar_div_cost::value); + VERIFY(internal::scalar_div_cost::value > internal::scalar_div_cost::value); + } +#endif } diff --git a/external/eigen3/test/inverse.cpp b/external/eigen3/test/inverse.cpp index 8187b08..5c6777a 100644 --- a/external/eigen3/test/inverse.cpp +++ b/external/eigen3/test/inverse.cpp @@ -68,6 +68,15 @@ template void inverse(const MatrixType& m) VERIFY_IS_MUCH_SMALLER_THAN(abs(det-m3.determinant()), RealScalar(1)); m3.computeInverseWithCheck(m4, invertible); VERIFY( rows==1 ? invertible : !invertible ); + + // check with submatrices + { + Matrix m5; + m5.setRandom(); + m5.topLeftCorner(rows,rows) = m1; + m2 = m5.template topLeftCorner().inverse(); + VERIFY_IS_APPROX( (m5.template topLeftCorner()), m2.inverse() ); + } #endif // check in-place inversion @@ -93,12 +102,16 @@ void test_inverse() CALL_SUBTEST_3( inverse(Matrix3f()) ); CALL_SUBTEST_4( inverse(Matrix4f()) ); CALL_SUBTEST_4( inverse(Matrix()) ); + s = internal::random(50,320); CALL_SUBTEST_5( inverse(MatrixXf(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(25,100); CALL_SUBTEST_6( inverse(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + CALL_SUBTEST_7( inverse(Matrix4d()) ); CALL_SUBTEST_7( inverse(Matrix()) ); } - TEST_SET_BUT_UNUSED_VARIABLE(s) } diff --git a/external/eigen3/test/is_same_dense.cpp b/external/eigen3/test/is_same_dense.cpp new file mode 100644 index 0000000..2c7838c --- /dev/null +++ b/external/eigen3/test/is_same_dense.cpp @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +using internal::is_same_dense; + +void test_is_same_dense() +{ + typedef Matrix ColMatrixXd; + ColMatrixXd m1(10,10); + Ref ref_m1(m1); + Ref const_ref_m1(m1); + VERIFY(is_same_dense(m1,m1)); + VERIFY(is_same_dense(m1,ref_m1)); + VERIFY(is_same_dense(const_ref_m1,m1)); + VERIFY(is_same_dense(const_ref_m1,ref_m1)); + + VERIFY(is_same_dense(m1.block(0,0,m1.rows(),m1.cols()),m1)); + VERIFY(!is_same_dense(m1.row(0),m1.col(0))); + + Ref const_ref_m1_row(m1.row(1)); + VERIFY(!is_same_dense(m1.row(1),const_ref_m1_row)); + + Ref const_ref_m1_col(m1.col(1)); + VERIFY(is_same_dense(m1.col(1),const_ref_m1_col)); +} diff --git a/external/eigen3/test/jacobisvd.cpp b/external/eigen3/test/jacobisvd.cpp index 12c556e..7f5f715 100644 --- a/external/eigen3/test/jacobisvd.cpp +++ b/external/eigen3/test/jacobisvd.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008-2014 Gael Guennebaud // Copyright (C) 2009 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -14,279 +14,47 @@ #include "main.h" #include -template -void jacobisvd_check_full(const MatrixType& m, const JacobiSVD& svd) -{ - typedef typename MatrixType::Index Index; - Index rows = m.rows(); - Index cols = m.cols(); - - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime - }; - - typedef typename MatrixType::Scalar Scalar; - typedef Matrix MatrixUType; - typedef Matrix MatrixVType; - - MatrixType sigma = MatrixType::Zero(rows,cols); - sigma.diagonal() = svd.singularValues().template cast(); - MatrixUType u = svd.matrixU(); - MatrixVType v = svd.matrixV(); - - VERIFY_IS_APPROX(m, u * sigma * v.adjoint()); - VERIFY_IS_UNITARY(u); - VERIFY_IS_UNITARY(v); -} - -template -void jacobisvd_compare_to_full(const MatrixType& m, - unsigned int computationOptions, - const JacobiSVD& referenceSvd) -{ - typedef typename MatrixType::Index Index; - Index rows = m.rows(); - Index cols = m.cols(); - Index diagSize = (std::min)(rows, cols); - - JacobiSVD svd(m, computationOptions); - - VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues()); - if(computationOptions & ComputeFullU) - VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU()); - if(computationOptions & ComputeThinU) - VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize)); - if(computationOptions & ComputeFullV) - VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV()); - if(computationOptions & ComputeThinV) - VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize)); -} - -template -void jacobisvd_solve(const MatrixType& m, unsigned int computationOptions) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - Index rows = m.rows(); - Index cols = m.cols(); - - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime - }; - - typedef Matrix RhsType; - typedef Matrix SolutionType; - - RhsType rhs = RhsType::Random(rows, internal::random(1, cols)); - JacobiSVD svd(m, computationOptions); - - if(internal::is_same::value) svd.setThreshold(1e-8); - else if(internal::is_same::value) svd.setThreshold(1e-4); - - SolutionType x = svd.solve(rhs); - - RealScalar residual = (m*x-rhs).norm(); - // Check that there is no significantly better solution in the neighborhood of x - if(!test_isMuchSmallerThan(residual,rhs.norm())) - { - // If the residual is very small, then we have an exact solution, so we are already good. - for(int k=0;k::epsilon(); - RealScalar residual_y = (m*y-rhs).norm(); - VERIFY( test_isApprox(residual_y,residual) || residual < residual_y ); - - y.row(k) = x.row(k).array() - 2*NumTraits::epsilon(); - residual_y = (m*y-rhs).norm(); - VERIFY( test_isApprox(residual_y,residual) || residual < residual_y ); - } - } - - // evaluate normal equation which works also for least-squares solutions - if(internal::is_same::value) - { - // This test is not stable with single precision. - // This is probably because squaring m signicantly affects the precision. - VERIFY_IS_APPROX(m.adjoint()*m*x,m.adjoint()*rhs); - } - - // check minimal norm solutions - { - // generate a full-rank m x n problem with m MatrixType2; - typedef Matrix RhsType2; - typedef Matrix MatrixType2T; - Index rank = RankAtCompileTime2==Dynamic ? internal::random(1,cols) : Index(RankAtCompileTime2); - MatrixType2 m2(rank,cols); - int guard = 0; - do { - m2.setRandom(); - } while(m2.jacobiSvd().setThreshold(test_precision()).rank()!=rank && (++guard)<10); - VERIFY(guard<10); - RhsType2 rhs2 = RhsType2::Random(rank); - // use QR to find a reference minimal norm solution - HouseholderQR qr(m2.adjoint()); - Matrix tmp = qr.matrixQR().topLeftCorner(rank,rank).template triangularView().adjoint().solve(rhs2); - tmp.conservativeResize(cols); - tmp.tail(cols-rank).setZero(); - SolutionType x21 = qr.householderQ() * tmp; - // now check with SVD - JacobiSVD svd2(m2, computationOptions); - SolutionType x22 = svd2.solve(rhs2); - VERIFY_IS_APPROX(m2*x21, rhs2); - VERIFY_IS_APPROX(m2*x22, rhs2); - VERIFY_IS_APPROX(x21, x22); - - // Now check with a rank deficient matrix - typedef Matrix MatrixType3; - typedef Matrix RhsType3; - Index rows3 = RowsAtCompileTime3==Dynamic ? internal::random(rank+1,2*cols) : Index(RowsAtCompileTime3); - Matrix C = Matrix::Random(rows3,rank); - MatrixType3 m3 = C * m2; - RhsType3 rhs3 = C * rhs2; - JacobiSVD svd3(m3, computationOptions); - SolutionType x3 = svd3.solve(rhs3); - if(svd3.rank()!=rank) { - std::cout << m3 << "\n\n"; - std::cout << svd3.singularValues().transpose() << "\n"; - std::cout << svd3.rank() << " == " << rank << "\n"; - std::cout << x21.norm() << " == " << x3.norm() << "\n"; - } -// VERIFY_IS_APPROX(m3*x3, rhs3); - VERIFY_IS_APPROX(m3*x21, rhs3); - VERIFY_IS_APPROX(m2*x3, rhs2); - - VERIFY_IS_APPROX(x21, x3); - } -} - -template -void jacobisvd_test_all_computation_options(const MatrixType& m) -{ - if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols()) - return; - JacobiSVD fullSvd(m, ComputeFullU|ComputeFullV); - CALL_SUBTEST(( jacobisvd_check_full(m, fullSvd) )); - CALL_SUBTEST(( jacobisvd_solve(m, ComputeFullU | ComputeFullV) )); - - #if defined __INTEL_COMPILER - // remark #111: statement is unreachable - #pragma warning disable 111 - #endif - if(QRPreconditioner == FullPivHouseholderQRPreconditioner) - return; - - CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeFullU, fullSvd) )); - CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeFullV, fullSvd) )); - CALL_SUBTEST(( jacobisvd_compare_to_full(m, 0, fullSvd) )); - - if (MatrixType::ColsAtCompileTime == Dynamic) { - // thin U/V are only available with dynamic number of columns - CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeFullU|ComputeThinV, fullSvd) )); - CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinV, fullSvd) )); - CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinU|ComputeFullV, fullSvd) )); - CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinU , fullSvd) )); - CALL_SUBTEST(( jacobisvd_compare_to_full(m, ComputeThinU|ComputeThinV, fullSvd) )); - CALL_SUBTEST(( jacobisvd_solve(m, ComputeFullU | ComputeThinV) )); - CALL_SUBTEST(( jacobisvd_solve(m, ComputeThinU | ComputeFullV) )); - CALL_SUBTEST(( jacobisvd_solve(m, ComputeThinU | ComputeThinV) )); - - // test reconstruction - typedef typename MatrixType::Index Index; - Index diagSize = (std::min)(m.rows(), m.cols()); - JacobiSVD svd(m, ComputeThinU | ComputeThinV); - VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint()); - } -} +#define SVD_DEFAULT(M) JacobiSVD +#define SVD_FOR_MIN_NORM(M) JacobiSVD +#include "svd_common.h" +// Check all variants of JacobiSVD template void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true) { MatrixType m = a; if(pickrandom) - { - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; - Index diagSize = (std::min)(a.rows(), a.cols()); - RealScalar s = std::numeric_limits::max_exponent10/4; - s = internal::random(1,s); - Matrix d = Matrix::Random(diagSize); - for(Index k=0; k(-s,s)); - m = Matrix::Random(a.rows(),diagSize) * d.asDiagonal() * Matrix::Random(diagSize,a.cols()); - // cancel some coeffs - Index n = internal::random(0,m.size()-1); - for(Index i=0; i(0,m.rows()-1), internal::random(0,m.cols()-1)) = Scalar(0); - } + svd_fill_random(m); - CALL_SUBTEST(( jacobisvd_test_all_computation_options(m) )); - CALL_SUBTEST(( jacobisvd_test_all_computation_options(m) )); - CALL_SUBTEST(( jacobisvd_test_all_computation_options(m) )); - CALL_SUBTEST(( jacobisvd_test_all_computation_options(m) )); + CALL_SUBTEST(( svd_test_all_computation_options >(m, true) )); // check full only + CALL_SUBTEST(( svd_test_all_computation_options >(m, false) )); + CALL_SUBTEST(( svd_test_all_computation_options >(m, false) )); + if(m.rows()==m.cols()) + CALL_SUBTEST(( svd_test_all_computation_options >(m, false) )); } template void jacobisvd_verify_assert(const MatrixType& m) { - typedef typename MatrixType::Scalar Scalar; + svd_verify_assert >(m); typedef typename MatrixType::Index Index; Index rows = m.rows(); Index cols = m.cols(); enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime }; - typedef Matrix RhsType; - - RhsType rhs(rows); - - JacobiSVD svd; - VERIFY_RAISES_ASSERT(svd.matrixU()) - VERIFY_RAISES_ASSERT(svd.singularValues()) - VERIFY_RAISES_ASSERT(svd.matrixV()) - VERIFY_RAISES_ASSERT(svd.solve(rhs)) MatrixType a = MatrixType::Zero(rows, cols); a.setZero(); - svd.compute(a, 0); - VERIFY_RAISES_ASSERT(svd.matrixU()) - VERIFY_RAISES_ASSERT(svd.matrixV()) - svd.singularValues(); - VERIFY_RAISES_ASSERT(svd.solve(rhs)) if (ColsAtCompileTime == Dynamic) { - svd.compute(a, ComputeThinU); - svd.matrixU(); - VERIFY_RAISES_ASSERT(svd.matrixV()) - VERIFY_RAISES_ASSERT(svd.solve(rhs)) - - svd.compute(a, ComputeThinV); - svd.matrixV(); - VERIFY_RAISES_ASSERT(svd.matrixU()) - VERIFY_RAISES_ASSERT(svd.solve(rhs)) - JacobiSVD svd_fullqr; VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeFullU|ComputeThinV)) VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeThinV)) VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeFullV)) } - else - { - VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU)) - VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV)) - } } template @@ -302,126 +70,17 @@ void jacobisvd_method() VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).solve(m), m); } -// work around stupid msvc error when constructing at compile time an expression that involves -// a division by zero, even if the numeric type has floating point -template -EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); } - -// workaround aggressive optimization in ICC -template EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; } - -template -void jacobisvd_inf_nan() -{ - // all this function does is verify we don't iterate infinitely on nan/inf values - - JacobiSVD svd; - typedef typename MatrixType::Scalar Scalar; - Scalar some_inf = Scalar(1) / zero(); - VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf)); - svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV); - - Scalar nan = std::numeric_limits::quiet_NaN(); - VERIFY(nan != nan); - svd.compute(MatrixType::Constant(10,10,nan), ComputeFullU | ComputeFullV); - - MatrixType m = MatrixType::Zero(10,10); - m(internal::random(0,9), internal::random(0,9)) = some_inf; - svd.compute(m, ComputeFullU | ComputeFullV); - - m = MatrixType::Zero(10,10); - m(internal::random(0,9), internal::random(0,9)) = nan; - svd.compute(m, ComputeFullU | ComputeFullV); - - // regression test for bug 791 - m.resize(3,3); - m << 0, 2*NumTraits::epsilon(), 0.5, - 0, -0.5, 0, - nan, 0, 0; - svd.compute(m, ComputeFullU | ComputeFullV); -} - -// Regression test for bug 286: JacobiSVD loops indefinitely with some -// matrices containing denormal numbers. -void jacobisvd_bug286() -{ -#if defined __INTEL_COMPILER -// shut up warning #239: floating point underflow -#pragma warning push -#pragma warning disable 239 -#endif - Matrix2d M; - M << -7.90884e-313, -4.94e-324, - 0, 5.60844e-313; -#if defined __INTEL_COMPILER -#pragma warning pop -#endif - JacobiSVD svd; - svd.compute(M); // just check we don't loop indefinitely -} - -void jacobisvd_preallocate() -{ - Vector3f v(3.f, 2.f, 1.f); - MatrixXf m = v.asDiagonal(); - - internal::set_is_malloc_allowed(false); - VERIFY_RAISES_ASSERT(VectorXf tmp(10);) - JacobiSVD svd; - internal::set_is_malloc_allowed(true); - svd.compute(m); - VERIFY_IS_APPROX(svd.singularValues(), v); - - JacobiSVD svd2(3,3); - internal::set_is_malloc_allowed(false); - svd2.compute(m); - internal::set_is_malloc_allowed(true); - VERIFY_IS_APPROX(svd2.singularValues(), v); - VERIFY_RAISES_ASSERT(svd2.matrixU()); - VERIFY_RAISES_ASSERT(svd2.matrixV()); - svd2.compute(m, ComputeFullU | ComputeFullV); - VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity()); - VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity()); - internal::set_is_malloc_allowed(false); - svd2.compute(m); - internal::set_is_malloc_allowed(true); - - JacobiSVD svd3(3,3,ComputeFullU|ComputeFullV); - internal::set_is_malloc_allowed(false); - svd2.compute(m); - internal::set_is_malloc_allowed(true); - VERIFY_IS_APPROX(svd2.singularValues(), v); - VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity()); - VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity()); - internal::set_is_malloc_allowed(false); - svd2.compute(m, ComputeFullU|ComputeFullV); - internal::set_is_malloc_allowed(true); -} - void test_jacobisvd() { CALL_SUBTEST_3(( jacobisvd_verify_assert(Matrix3f()) )); CALL_SUBTEST_4(( jacobisvd_verify_assert(Matrix4d()) )); CALL_SUBTEST_7(( jacobisvd_verify_assert(MatrixXf(10,12)) )); CALL_SUBTEST_8(( jacobisvd_verify_assert(MatrixXcd(7,5)) )); + + CALL_SUBTEST_11(svd_all_trivial_2x2(jacobisvd)); + CALL_SUBTEST_12(svd_all_trivial_2x2(jacobisvd)); for(int i = 0; i < g_repeat; i++) { - Matrix2cd m; - m << 0, 1, - 0, 1; - CALL_SUBTEST_1(( jacobisvd(m, false) )); - m << 1, 0, - 1, 0; - CALL_SUBTEST_1(( jacobisvd(m, false) )); - - Matrix2d n; - n << 0, 0, - 0, 0; - CALL_SUBTEST_2(( jacobisvd(n, false) )); - n << 0, 0, - 0, 1; - CALL_SUBTEST_2(( jacobisvd(n, false) )); - CALL_SUBTEST_3(( jacobisvd() )); CALL_SUBTEST_4(( jacobisvd() )); CALL_SUBTEST_5(( jacobisvd >() )); @@ -440,8 +99,14 @@ void test_jacobisvd() (void) c; // Test on inf/nan matrix - CALL_SUBTEST_7( jacobisvd_inf_nan() ); - CALL_SUBTEST_10( jacobisvd_inf_nan() ); + CALL_SUBTEST_7( (svd_inf_nan, MatrixXf>()) ); + CALL_SUBTEST_10( (svd_inf_nan, MatrixXd>()) ); + + // bug1395 test compile-time vectors as input + CALL_SUBTEST_13(( jacobisvd_verify_assert(Matrix()) )); + CALL_SUBTEST_13(( jacobisvd_verify_assert(Matrix()) )); + CALL_SUBTEST_13(( jacobisvd_verify_assert(Matrix(r)) )); + CALL_SUBTEST_13(( jacobisvd_verify_assert(Matrix(c)) )); } CALL_SUBTEST_7(( jacobisvd(MatrixXf(internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); @@ -455,8 +120,7 @@ void test_jacobisvd() CALL_SUBTEST_7( JacobiSVD(10,10) ); // Check that preallocation avoids subsequent mallocs - CALL_SUBTEST_9( jacobisvd_preallocate() ); + CALL_SUBTEST_9( svd_preallocate() ); - // Regression check for bug 286 - CALL_SUBTEST_2( jacobisvd_bug286() ); + CALL_SUBTEST_2( svd_underoverflow() ); } diff --git a/external/eigen3/test/linearstructure.cpp b/external/eigen3/test/linearstructure.cpp index 618984d..17474af 100644 --- a/external/eigen3/test/linearstructure.cpp +++ b/external/eigen3/test/linearstructure.cpp @@ -2,11 +2,15 @@ // for linear algebra. // // Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +static bool g_called; +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } + #include "main.h" template void linearStructure(const MatrixType& m) @@ -17,6 +21,7 @@ template void linearStructure(const MatrixType& m) */ typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; Index rows = m.rows(); Index cols = m.cols(); @@ -28,7 +33,7 @@ template void linearStructure(const MatrixType& m) m3(rows, cols); Scalar s1 = internal::random(); - while (abs(s1)<1e-3) s1 = internal::random(); + while (abs(s1)(); Index r = internal::random(0, rows-1), c = internal::random(0, cols-1); @@ -68,8 +73,48 @@ template void linearStructure(const MatrixType& m) VERIFY_IS_APPROX(m1.block(0,0,rows,cols) * s1, m1 * s1); } +// Make sure that complex * real and real * complex are properly optimized +template void real_complex(DenseIndex rows = MatrixType::RowsAtCompileTime, DenseIndex cols = MatrixType::ColsAtCompileTime) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + RealScalar s = internal::random(); + MatrixType m1 = MatrixType::Random(rows, cols); + + g_called = false; + VERIFY_IS_APPROX(s*m1, Scalar(s)*m1); + VERIFY(g_called && "real * matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1*s, m1*Scalar(s)); + VERIFY(g_called && "matrix * real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1/s, m1/Scalar(s)); + VERIFY(g_called && "matrix / real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s+m1.array(), Scalar(s)+m1.array()); + VERIFY(g_called && "real + matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()+s, m1.array()+Scalar(s)); + VERIFY(g_called && "matrix + real not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(s-m1.array(), Scalar(s)-m1.array()); + VERIFY(g_called && "real - matrix not properly optimized"); + + g_called = false; + VERIFY_IS_APPROX(m1.array()-s, m1.array()-Scalar(s)); + VERIFY(g_called && "matrix - real not properly optimized"); +} + void test_linearstructure() { + g_called = true; + VERIFY(g_called); // avoid `unneeded-internal-declaration` warning. for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( linearStructure(Matrix()) ); CALL_SUBTEST_2( linearStructure(Matrix2f()) ); @@ -80,5 +125,25 @@ void test_linearstructure() CALL_SUBTEST_7( linearStructure(MatrixXi (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_8( linearStructure(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_9( linearStructure(ArrayXXf (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_10( linearStructure(ArrayXXcf (internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + + CALL_SUBTEST_11( real_complex() ); + CALL_SUBTEST_11( real_complex(10,10) ); + CALL_SUBTEST_11( real_complex(10,10) ); + } + +#ifdef EIGEN_TEST_PART_4 + { + // make sure that /=scalar and /scalar do not overflow + // rational: 1.0/4.94e-320 overflow, but m/4.94e-320 should not + Matrix4d m2, m3; + m3 = m2 = Matrix4d::Random()*1e-20; + m2 = m2 / 4.9e-320; + VERIFY_IS_APPROX(m2.cwiseQuotient(m2), Matrix4d::Ones()); + m3 /= 4.9e-320; + VERIFY_IS_APPROX(m3.cwiseQuotient(m3), Matrix4d::Ones()); + + } +#endif } diff --git a/external/eigen3/test/lscg.cpp b/external/eigen3/test/lscg.cpp new file mode 100644 index 0000000..d49ee00 --- /dev/null +++ b/external/eigen3/test/lscg.cpp @@ -0,0 +1,37 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse_solver.h" +#include + +template void test_lscg_T() +{ + LeastSquaresConjugateGradient > lscg_colmajor_diag; + LeastSquaresConjugateGradient, IdentityPreconditioner> lscg_colmajor_I; + LeastSquaresConjugateGradient > lscg_rowmajor_diag; + LeastSquaresConjugateGradient, IdentityPreconditioner> lscg_rowmajor_I; + + CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_diag) ); + CALL_SUBTEST( check_sparse_square_solving(lscg_colmajor_I) ); + + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_diag) ); + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_colmajor_I) ); + + CALL_SUBTEST( check_sparse_square_solving(lscg_rowmajor_diag) ); + CALL_SUBTEST( check_sparse_square_solving(lscg_rowmajor_I) ); + + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_rowmajor_diag) ); + CALL_SUBTEST( check_sparse_leastsquare_solving(lscg_rowmajor_I) ); +} + +void test_lscg() +{ + CALL_SUBTEST_1(test_lscg_T()); + CALL_SUBTEST_2(test_lscg_T >()); +} diff --git a/external/eigen3/test/lu.cpp b/external/eigen3/test/lu.cpp index 3746526..9787f4d 100644 --- a/external/eigen3/test/lu.cpp +++ b/external/eigen3/test/lu.cpp @@ -11,6 +11,11 @@ #include using namespace std; +template +typename MatrixType::RealScalar matrix_l1_norm(const MatrixType& m) { + return m.cwiseAbs().colwise().sum().maxCoeff(); +} + template void lu_non_invertible() { typedef typename MatrixType::Index Index; @@ -92,6 +97,26 @@ template void lu_non_invertible() // test that the code, which does resize(), may be applied to an xpr m2.block(0,0,m2.rows(),m2.cols()) = lu.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); + + // test solve with transposed + m3 = MatrixType::Random(rows,cols2); + m2 = m1.transpose()*m3; + m3 = MatrixType::Random(rows,cols2); + lu.template _solve_impl_transposed(m2, m3); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + m3 = MatrixType::Random(rows,cols2); + m3 = lu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + m3 = MatrixType::Random(rows,cols2); + m2 = m1.adjoint()*m3; + m3 = MatrixType::Random(rows,cols2); + lu.template _solve_impl_transposed(m2, m3); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); + m3 = MatrixType::Random(rows,cols2); + m3 = lu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); } template void lu_invertible() @@ -100,9 +125,9 @@ template void lu_invertible() LU.h */ typedef typename NumTraits::Real RealScalar; - DenseIndex size = MatrixType::RowsAtCompileTime; + Index size = MatrixType::RowsAtCompileTime; if( size==Dynamic) - size = internal::random(1,EIGEN_TEST_MAX_SIZE); + size = internal::random(1,EIGEN_TEST_MAX_SIZE); MatrixType m1(size, size), m2(size, size), m3(size, size); FullPivLU lu; @@ -123,7 +148,28 @@ template void lu_invertible() m3 = MatrixType::Random(size,size); m2 = lu.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); - VERIFY_IS_APPROX(m2, lu.inverse()*m3); + MatrixType m1_inverse = lu.inverse(); + VERIFY_IS_APPROX(m2, m1_inverse*m3); + + RealScalar rcond = (RealScalar(1) / matrix_l1_norm(m1)) / matrix_l1_norm(m1_inverse); + const RealScalar rcond_est = lu.rcond(); + // Verify that the estimated condition number is within a factor of 10 of the + // truth. + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + // test solve with transposed + lu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.transpose()*m2); + m3 = MatrixType::Random(size,size); + m3 = lu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + lu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.adjoint()*m2); + m3 = MatrixType::Random(size,size); + m3 = lu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); // Regression test for Bug 302 MatrixType m4 = MatrixType::Random(size,size); @@ -136,14 +182,39 @@ template void lu_partial_piv() PartialPivLU.h */ typedef typename MatrixType::Index Index; - Index rows = internal::random(1,4); - Index cols = rows; + typedef typename NumTraits::Real RealScalar; + Index size = internal::random(1,4); - MatrixType m1(cols, rows); + MatrixType m1(size, size), m2(size, size), m3(size, size); m1.setRandom(); PartialPivLU plu(m1); VERIFY_IS_APPROX(m1, plu.reconstructedMatrix()); + + m3 = MatrixType::Random(size,size); + m2 = plu.solve(m3); + VERIFY_IS_APPROX(m3, m1*m2); + MatrixType m1_inverse = plu.inverse(); + VERIFY_IS_APPROX(m2, m1_inverse*m3); + + RealScalar rcond = (RealScalar(1) / matrix_l1_norm(m1)) / matrix_l1_norm(m1_inverse); + const RealScalar rcond_est = plu.rcond(); + // Verify that the estimate is within a factor of 10 of the truth. + VERIFY(rcond_est > rcond / 10 && rcond_est < rcond * 10); + + // test solve with transposed + plu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.transpose()*m2); + m3 = MatrixType::Random(size,size); + m3 = plu.transpose().solve(m2); + VERIFY_IS_APPROX(m2, m1.transpose()*m3); + + // test solve with conjugate transposed + plu.template _solve_impl_transposed(m3, m2); + VERIFY_IS_APPROX(m3, m1.adjoint()*m2); + m3 = MatrixType::Random(size,size); + m3 = plu.adjoint().solve(m2); + VERIFY_IS_APPROX(m2, m1.adjoint()*m3); } template void lu_verify_assert() diff --git a/external/eigen3/test/main.h b/external/eigen3/test/main.h index 6642048..bd53251 100644 --- a/external/eigen3/test/main.h +++ b/external/eigen3/test/main.h @@ -41,7 +41,14 @@ #include #include #include +#include #include +#if __cplusplus >= 201103L +#include +#ifdef EIGEN_USE_THREADS +#include +#endif +#endif // To test that all calls from Eigen code to std::min() and std::max() are // protected by parenthesis against macro expansion, the min()/max() macros @@ -49,14 +56,48 @@ // compiler error. #define min(A,B) please_protect_your_min_with_parentheses #define max(A,B) please_protect_your_max_with_parentheses +#define isnan(X) please_protect_your_isnan_with_parentheses +#define isinf(X) please_protect_your_isinf_with_parentheses +#define isfinite(X) please_protect_your_isfinite_with_parentheses +#ifdef M_PI +#undef M_PI +#endif +#define M_PI please_use_EIGEN_PI_instead_of_M_PI #define FORBIDDEN_IDENTIFIER (this_identifier_is_forbidden_to_avoid_clashes) this_identifier_is_forbidden_to_avoid_clashes // B0 is defined in POSIX header termios.h #define B0 FORBIDDEN_IDENTIFIER +// Unit tests calling Eigen's blas library must preserve the default blocking size +// to avoid troubles. +#ifndef EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS +#define EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS +#endif // shuts down ICC's remark #593: variable "XXX" was set but never used -#define TEST_SET_BUT_UNUSED_VARIABLE(X) X = X + 0; +#define TEST_SET_BUT_UNUSED_VARIABLE(X) EIGEN_UNUSED_VARIABLE(X) + +#ifdef TEST_ENABLE_TEMPORARY_TRACKING + +static long int nb_temporaries; +static long int nb_temporaries_on_assert = -1; + +inline void on_temporary_creation(long int size) { + // here's a great place to set a breakpoint when debugging failures in this test! + if(size!=0) nb_temporaries++; + if(nb_temporaries_on_assert>0) assert(nb_temporaries g_test_stack; + // level == 0 <=> abort if test fail + // level >= 1 <=> warning message to std::cerr if test fail + static int g_test_level = 0; static int g_repeat; static unsigned int g_seed; static bool g_has_set_repeat, g_has_set_seed; } +#define TRACK std::cerr << __FILE__ << " " << __LINE__ << std::endl +// #define TRACK while() + #define EI_PP_MAKE_STRING2(S) #S #define EI_PP_MAKE_STRING(S) EI_PP_MAKE_STRING2(S) #define EIGEN_DEFAULT_IO_FORMAT IOFormat(4, 0, " ", "\n", "", "", "", "") +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) + #define EIGEN_EXCEPTIONS +#endif + #ifndef EIGEN_NO_ASSERTION_CHECKING namespace Eigen @@ -135,33 +186,35 @@ namespace Eigen if(report_on_cerr_on_assert_failure) \ std::cerr << #a << " " __FILE__ << "(" << __LINE__ << ")\n"; \ Eigen::no_more_assert = true; \ - throw Eigen::eigen_assert_exception(); \ + EIGEN_THROW_X(Eigen::eigen_assert_exception()); \ } \ else if (Eigen::internal::push_assert) \ { \ eigen_assert_list.push_back(std::string(EI_PP_MAKE_STRING(__FILE__) " (" EI_PP_MAKE_STRING(__LINE__) ") : " #a) ); \ } + #ifdef EIGEN_EXCEPTIONS #define VERIFY_RAISES_ASSERT(a) \ { \ Eigen::no_more_assert = false; \ - Eigen::eigen_assert_list.clear(); \ - Eigen::internal::push_assert = true; \ + Eigen::eigen_assert_list.clear(); \ + Eigen::internal::push_assert = true; \ Eigen::report_on_cerr_on_assert_failure = false; \ try { \ a; \ std::cerr << "One of the following asserts should have been triggered:\n"; \ - for (uint ai=0 ; ai0) + std::cerr << "WARNING: "; std::cerr << "Test " << testname << " failed in " << file << " (" << line << ")" << std::endl << " " << condition_as_string << std::endl; std::cerr << "Stack:\n"; @@ -208,14 +271,20 @@ inline void verify_impl(bool condition, const char *testname, const char *file, for(int i=test_stack_size-1; i>=0; --i) std::cerr << " - " << Eigen::g_test_stack[i] << "\n"; std::cerr << "\n"; - abort(); + if(Eigen::g_test_level==0) + abort(); } } #define VERIFY(a) ::verify_impl(a, g_test_stack.back().c_str(), __FILE__, __LINE__, EI_PP_MAKE_STRING(a)) -#define VERIFY_IS_EQUAL(a, b) VERIFY(test_is_equal(a, b)) -#define VERIFY_IS_APPROX(a, b) VERIFY(test_isApprox(a, b)) +#define VERIFY_GE(a, b) ::verify_impl(a >= b, g_test_stack.back().c_str(), __FILE__, __LINE__, EI_PP_MAKE_STRING(a >= b)) +#define VERIFY_LE(a, b) ::verify_impl(a <= b, g_test_stack.back().c_str(), __FILE__, __LINE__, EI_PP_MAKE_STRING(a <= b)) + + +#define VERIFY_IS_EQUAL(a, b) VERIFY(test_is_equal(a, b, true)) +#define VERIFY_IS_NOT_EQUAL(a, b) VERIFY(test_is_equal(a, b, false)) +#define VERIFY_IS_APPROX(a, b) VERIFY(verifyIsApprox(a, b)) #define VERIFY_IS_NOT_APPROX(a, b) VERIFY(!test_isApprox(a, b)) #define VERIFY_IS_MUCH_SMALLER_THAN(a, b) VERIFY(test_isMuchSmallerThan(a, b)) #define VERIFY_IS_NOT_MUCH_SMALLER_THAN(a, b) VERIFY(!test_isMuchSmallerThan(a, b)) @@ -236,9 +305,21 @@ namespace Eigen { template inline typename NumTraits::Real test_precision() { return NumTraits::dummy_precision(); } template<> inline float test_precision() { return 1e-3f; } template<> inline double test_precision() { return 1e-6; } +template<> inline long double test_precision() { return 1e-6l; } template<> inline float test_precision >() { return test_precision(); } template<> inline double test_precision >() { return test_precision(); } -template<> inline long double test_precision() { return 1e-6; } +template<> inline long double test_precision >() { return test_precision(); } + +inline bool test_isApprox(const short& a, const short& b) +{ return internal::isApprox(a, b, test_precision()); } +inline bool test_isApprox(const unsigned short& a, const unsigned short& b) +{ return internal::isApprox(a, b, test_precision()); } +inline bool test_isApprox(const unsigned int& a, const unsigned int& b) +{ return internal::isApprox(a, b, test_precision()); } +inline bool test_isApprox(const long& a, const long& b) +{ return internal::isApprox(a, b, test_precision()); } +inline bool test_isApprox(const unsigned long& a, const unsigned long& b) +{ return internal::isApprox(a, b, test_precision()); } inline bool test_isApprox(const int& a, const int& b) { return internal::isApprox(a, b, test_precision()); } @@ -253,14 +334,15 @@ inline bool test_isMuchSmallerThan(const float& a, const float& b) { return internal::isMuchSmallerThan(a, b, test_precision()); } inline bool test_isApproxOrLessThan(const float& a, const float& b) { return internal::isApproxOrLessThan(a, b, test_precision()); } + inline bool test_isApprox(const double& a, const double& b) { return internal::isApprox(a, b, test_precision()); } - inline bool test_isMuchSmallerThan(const double& a, const double& b) { return internal::isMuchSmallerThan(a, b, test_precision()); } inline bool test_isApproxOrLessThan(const double& a, const double& b) { return internal::isApproxOrLessThan(a, b, test_precision()); } +#ifndef EIGEN_TEST_NO_COMPLEX inline bool test_isApprox(const std::complex& a, const std::complex& b) { return internal::isApprox(a, b, test_precision >()); } inline bool test_isMuchSmallerThan(const std::complex& a, const std::complex& b) @@ -271,6 +353,15 @@ inline bool test_isApprox(const std::complex& a, const std::complex& a, const std::complex& b) { return internal::isMuchSmallerThan(a, b, test_precision >()); } +#ifndef EIGEN_TEST_NO_LONGDOUBLE +inline bool test_isApprox(const std::complex& a, const std::complex& b) +{ return internal::isApprox(a, b, test_precision >()); } +inline bool test_isMuchSmallerThan(const std::complex& a, const std::complex& b) +{ return internal::isMuchSmallerThan(a, b, test_precision >()); } +#endif +#endif + +#ifndef EIGEN_TEST_NO_LONGDOUBLE inline bool test_isApprox(const long double& a, const long double& b) { bool ret = internal::isApprox(a, b, test_precision()); @@ -284,13 +375,127 @@ inline bool test_isMuchSmallerThan(const long double& a, const long double& b) { return internal::isMuchSmallerThan(a, b, test_precision()); } inline bool test_isApproxOrLessThan(const long double& a, const long double& b) { return internal::isApproxOrLessThan(a, b, test_precision()); } +#endif // EIGEN_TEST_NO_LONGDOUBLE + +inline bool test_isApprox(const half& a, const half& b) +{ return internal::isApprox(a, b, test_precision()); } +inline bool test_isMuchSmallerThan(const half& a, const half& b) +{ return internal::isMuchSmallerThan(a, b, test_precision()); } +inline bool test_isApproxOrLessThan(const half& a, const half& b) +{ return internal::isApproxOrLessThan(a, b, test_precision()); } + +// test_relative_error returns the relative difference between a and b as a real scalar as used in isApprox. +template +typename NumTraits::NonInteger test_relative_error(const EigenBase &a, const EigenBase &b) +{ + using std::sqrt; + typedef typename NumTraits::NonInteger RealScalar; + typename internal::nested_eval::type ea(a.derived()); + typename internal::nested_eval::type eb(b.derived()); + return sqrt(RealScalar((ea-eb).cwiseAbs2().sum()) / RealScalar((std::min)(eb.cwiseAbs2().sum(),ea.cwiseAbs2().sum()))); +} + +template +typename T1::RealScalar test_relative_error(const T1 &a, const T2 &b, const typename T1::Coefficients* = 0) +{ + return test_relative_error(a.coeffs(), b.coeffs()); +} + +template +typename T1::Scalar test_relative_error(const T1 &a, const T2 &b, const typename T1::MatrixType* = 0) +{ + return test_relative_error(a.matrix(), b.matrix()); +} + +template +S test_relative_error(const Translation &a, const Translation &b) +{ + return test_relative_error(a.vector(), b.vector()); +} + +template +S test_relative_error(const ParametrizedLine &a, const ParametrizedLine &b) +{ + return (std::max)(test_relative_error(a.origin(), b.origin()), test_relative_error(a.origin(), b.origin())); +} + +template +S test_relative_error(const AlignedBox &a, const AlignedBox &b) +{ + return (std::max)(test_relative_error((a.min)(), (b.min)()), test_relative_error((a.max)(), (b.max)())); +} + +template class SparseMatrixBase; +template +typename T1::RealScalar test_relative_error(const MatrixBase &a, const SparseMatrixBase &b) +{ + return test_relative_error(a,b.toDense()); +} + +template class SparseMatrixBase; +template +typename T1::RealScalar test_relative_error(const SparseMatrixBase &a, const MatrixBase &b) +{ + return test_relative_error(a.toDense(),b); +} + +template class SparseMatrixBase; +template +typename T1::RealScalar test_relative_error(const SparseMatrixBase &a, const SparseMatrixBase &b) +{ + return test_relative_error(a.toDense(),b.toDense()); +} + +template +typename NumTraits::Real>::NonInteger test_relative_error(const T1 &a, const T2 &b, typename internal::enable_if::Real>::value, T1>::type* = 0) +{ + typedef typename NumTraits::Real>::NonInteger RealScalar; + return numext::sqrt(RealScalar(numext::abs2(a-b))/RealScalar((numext::mini)(numext::abs2(a),numext::abs2(b)))); +} + +template +T test_relative_error(const Rotation2D &a, const Rotation2D &b) +{ + return test_relative_error(a.angle(), b.angle()); +} + +template +T test_relative_error(const AngleAxis &a, const AngleAxis &b) +{ + return (std::max)(test_relative_error(a.angle(), b.angle()), test_relative_error(a.axis(), b.axis())); +} template -inline bool test_isApprox(const Type1& a, const Type2& b) +inline bool test_isApprox(const Type1& a, const Type2& b, typename Type1::Scalar* = 0) // Enabled for Eigen's type only { return a.isApprox(b, test_precision()); } +// get_test_precision is a small wrapper to test_precision allowing to return the scalar precision for either scalars or expressions +template +typename NumTraits::Real get_test_precision(const T&, const typename T::Scalar* = 0) +{ + return test_precision::Real>(); +} + +template +typename NumTraits::Real get_test_precision(const T&,typename internal::enable_if::Real>::value, T>::type* = 0) +{ + return test_precision::Real>(); +} + +// verifyIsApprox is a wrapper to test_isApprox that outputs the relative difference magnitude if the test fails. +template +inline bool verifyIsApprox(const Type1& a, const Type2& b) +{ + bool ret = test_isApprox(a,b); + if(!ret) + { + std::cerr << "Difference too large wrt tolerance " << get_test_precision(a) << ", relative error is: " << test_relative_error(a,b) << std::endl; + } + return ret; +} + // The idea behind this function is to compare the two scalars a and b where // the scalar ref is a hint about the expected order of magnitude of a and b. // WARNING: the scalar a and b must be positive @@ -326,17 +531,17 @@ inline bool test_isUnitary(const MatrixBase& m) // Forward declaration to avoid ICC warning template -bool test_is_equal(const T& actual, const U& expected); +bool test_is_equal(const T& actual, const U& expected, bool expect_equal=true); template -bool test_is_equal(const T& actual, const U& expected) +bool test_is_equal(const T& actual, const U& expected, bool expect_equal) { - if (actual==expected) + if ((actual==expected) == expect_equal) return true; // false: std::cerr - << std::endl << " actual = " << actual - << std::endl << " expected = " << expected << std::endl << std::endl; + << "\n actual = " << actual + << "\n expected " << (expect_equal ? "= " : "!=") << expected << "\n\n"; return false; } @@ -347,11 +552,10 @@ bool test_is_equal(const T& actual, const U& expected) */ // Forward declaration to avoid ICC warning template -void createRandomPIMatrixOfRank(typename MatrixType::Index desired_rank, typename MatrixType::Index rows, typename MatrixType::Index cols, MatrixType& m); +void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m); template -void createRandomPIMatrixOfRank(typename MatrixType::Index desired_rank, typename MatrixType::Index rows, typename MatrixType::Index cols, MatrixType& m) +void createRandomPIMatrixOfRank(Index desired_rank, Index rows, Index cols, MatrixType& m) { - typedef typename internal::traits::Index Index; typedef typename internal::traits::Scalar Scalar; enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime }; @@ -388,11 +592,10 @@ void createRandomPIMatrixOfRank(typename MatrixType::Index desired_rank, typenam // Forward declaration to avoid ICC warning template -void randomPermutationVector(PermutationVectorType& v, typename PermutationVectorType::Index size); +void randomPermutationVector(PermutationVectorType& v, Index size); template -void randomPermutationVector(PermutationVectorType& v, typename PermutationVectorType::Index size) +void randomPermutationVector(PermutationVectorType& v, Index size) { - typedef typename PermutationVectorType::Index Index; typedef typename PermutationVectorType::Scalar Scalar; v.resize(size); for(Index i = 0; i < size; ++i) v(i) = Scalar(i); @@ -411,12 +614,7 @@ template bool isNotNaN(const T& x) return x==x; } -template bool isNaN(const T& x) -{ - return x!=x; -} - -template bool isInf(const T& x) +template bool isPlusInf(const T& x) { return x > NumTraits::highest(); } @@ -437,13 +635,15 @@ template struct GetDifferentType > // Forward declaration to avoid ICC warning template std::string type_name(); -template std::string type_name() { return "other"; } -template<> std::string type_name() { return "float"; } -template<> std::string type_name() { return "double"; } -template<> std::string type_name() { return "int"; } -template<> std::string type_name >() { return "complex"; } -template<> std::string type_name >() { return "complex"; } -template<> std::string type_name >() { return "complex"; } +template std::string type_name() { return "other"; } +template<> std::string type_name() { return "float"; } +template<> std::string type_name() { return "double"; } +template<> std::string type_name() { return "long double"; } +template<> std::string type_name() { return "int"; } +template<> std::string type_name >() { return "complex"; } +template<> std::string type_name >() { return "complex"; } +template<> std::string type_name >() { return "complex"; } +template<> std::string type_name >() { return "complex"; } // forward declaration of the main test function void EIGEN_CAT(test_,EIGEN_TEST_FUNC)(); @@ -550,3 +750,8 @@ int main(int argc, char *argv[]) // remark #1572: floating-point equality and inequality comparisons are unreliable #pragma warning disable 279 383 1418 1572 #endif + +#ifdef _MSC_VER + // 4503 - decorated name length exceeded, name was truncated + #pragma warning( disable : 4503) +#endif diff --git a/external/eigen3/test/mapped_matrix.cpp b/external/eigen3/test/mapped_matrix.cpp index 58904fa..6a84c58 100644 --- a/external/eigen3/test/mapped_matrix.cpp +++ b/external/eigen3/test/mapped_matrix.cpp @@ -13,6 +13,8 @@ #include "main.h" +#define EIGEN_TESTMAP_MAX_SIZE 256 + template void map_class_vector(const VectorType& m) { typedef typename VectorType::Index Index; @@ -20,23 +22,26 @@ template void map_class_vector(const VectorType& m) Index size = m.size(); - // test Map.h Scalar* array1 = internal::aligned_new(size); Scalar* array2 = internal::aligned_new(size); Scalar* array3 = new Scalar[size+1]; - Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3; + Scalar* array3unaligned = (internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES) == 0 ? array3+1 : array3; + Scalar array4[EIGEN_TESTMAP_MAX_SIZE]; - Map(array1, size) = VectorType::Random(size); - Map(array2, size) = Map(array1, size); + Map(array1, size) = VectorType::Random(size); + Map(array2, size) = Map(array1, size); Map(array3unaligned, size) = Map(array1, size); - VectorType ma1 = Map(array1, size); - VectorType ma2 = Map(array2, size); + Map(array4, size) = Map(array1, size); + VectorType ma1 = Map(array1, size); + VectorType ma2 = Map(array2, size); VectorType ma3 = Map(array3unaligned, size); + VectorType ma4 = Map(array4, size); VERIFY_IS_EQUAL(ma1, ma2); VERIFY_IS_EQUAL(ma1, ma3); + VERIFY_IS_EQUAL(ma1, ma4); #ifdef EIGEN_VECTORIZE - if(internal::packet_traits::Vectorizable) - VERIFY_RAISES_ASSERT((Map(array3unaligned, size))) + if(internal::packet_traits::Vectorizable && size>=AlignedMax) + VERIFY_RAISES_ASSERT((Map(array3unaligned, size))) #endif internal::aligned_delete(array1, size); @@ -50,23 +55,64 @@ template void map_class_matrix(const MatrixType& m) typedef typename MatrixType::Scalar Scalar; Index rows = m.rows(), cols = m.cols(), size = rows*cols; + Scalar s1 = internal::random(); - // test Map.h + // array1 and array2 -> aligned heap allocation Scalar* array1 = internal::aligned_new(size); for(int i = 0; i < size; i++) array1[i] = Scalar(1); Scalar* array2 = internal::aligned_new(size); for(int i = 0; i < size; i++) array2[i] = Scalar(1); + // array3unaligned -> unaligned pointer to heap Scalar* array3 = new Scalar[size+1]; for(int i = 0; i < size+1; i++) array3[i] = Scalar(1); - Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3; - Map(array1, rows, cols) = MatrixType::Ones(rows,cols); - Map(array2, rows, cols) = Map(array1, rows, cols); - Map(array3unaligned, rows, cols) = Map(array1, rows, cols); - MatrixType ma1 = Map(array1, rows, cols); - MatrixType ma2 = Map(array2, rows, cols); + Scalar* array3unaligned = internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; + Scalar array4[256]; + if(size<=256) + for(int i = 0; i < size; i++) array4[i] = Scalar(1); + + Map map1(array1, rows, cols); + Map map2(array2, rows, cols); + Map map3(array3unaligned, rows, cols); + Map map4(array4, rows, cols); + + VERIFY_IS_EQUAL(map1, MatrixType::Ones(rows,cols)); + VERIFY_IS_EQUAL(map2, MatrixType::Ones(rows,cols)); + VERIFY_IS_EQUAL(map3, MatrixType::Ones(rows,cols)); + map1 = MatrixType::Random(rows,cols); + map2 = map1; + map3 = map1; + MatrixType ma1 = map1; + MatrixType ma2 = map2; + MatrixType ma3 = map3; + VERIFY_IS_EQUAL(map1, map2); + VERIFY_IS_EQUAL(map1, map3); VERIFY_IS_EQUAL(ma1, ma2); - MatrixType ma3 = Map(array3unaligned, rows, cols); VERIFY_IS_EQUAL(ma1, ma3); + VERIFY_IS_EQUAL(ma1, map3); + + VERIFY_IS_APPROX(s1*map1, s1*map2); + VERIFY_IS_APPROX(s1*ma1, s1*ma2); + VERIFY_IS_EQUAL(s1*ma1, s1*ma3); + VERIFY_IS_APPROX(s1*map1, s1*map3); + + map2 *= s1; + map3 *= s1; + VERIFY_IS_APPROX(s1*map1, map2); + VERIFY_IS_APPROX(s1*map1, map3); + + if(size<=256) + { + VERIFY_IS_EQUAL(map4, MatrixType::Ones(rows,cols)); + map4 = map1; + MatrixType ma4 = map4; + VERIFY_IS_EQUAL(map1, map4); + VERIFY_IS_EQUAL(ma1, map4); + VERIFY_IS_EQUAL(ma1, ma4); + VERIFY_IS_APPROX(s1*map1, s1*map4); + + map4 *= s1; + VERIFY_IS_APPROX(s1*map1, map4); + } internal::aligned_delete(array1, size); internal::aligned_delete(array2, size); @@ -80,11 +126,10 @@ template void map_static_methods(const VectorType& m) Index size = m.size(); - // test Map.h Scalar* array1 = internal::aligned_new(size); Scalar* array2 = internal::aligned_new(size); Scalar* array3 = new Scalar[size+1]; - Scalar* array3unaligned = size_t(array3)%16 == 0 ? array3+1 : array3; + Scalar* array3unaligned = internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; VectorType::MapAligned(array1, size) = VectorType::Random(size); VectorType::Map(array2, size) = VectorType::Map(array1, size); @@ -109,9 +154,9 @@ template void check_const_correctness(const PlainObjec // verify that map-to-const don't have LvalueBit typedef typename internal::add_const::type ConstPlainObjectType; VERIFY( !(internal::traits >::Flags & LvalueBit) ); - VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(internal::traits >::Flags & LvalueBit) ); VERIFY( !(Map::Flags & LvalueBit) ); - VERIFY( !(Map::Flags & LvalueBit) ); + VERIFY( !(Map::Flags & LvalueBit) ); } template @@ -142,6 +187,7 @@ void test_mapped_matrix() CALL_SUBTEST_1( map_class_vector(Matrix()) ); CALL_SUBTEST_1( check_const_correctness(Matrix()) ); CALL_SUBTEST_2( map_class_vector(Vector4d()) ); + CALL_SUBTEST_2( map_class_vector(VectorXd(13)) ); CALL_SUBTEST_2( check_const_correctness(Matrix4d()) ); CALL_SUBTEST_3( map_class_vector(RowVector4f()) ); CALL_SUBTEST_4( map_class_vector(VectorXcf(8)) ); diff --git a/external/eigen3/test/mapstaticmethods.cpp b/external/eigen3/test/mapstaticmethods.cpp index 5b512bd..06272d1 100644 --- a/external/eigen3/test/mapstaticmethods.cpp +++ b/external/eigen3/test/mapstaticmethods.cpp @@ -69,7 +69,8 @@ struct mapstaticmethods_impl { static void run(const PlainObjectType& m) { - int rows = m.rows(), cols = m.cols(); + typedef typename PlainObjectType::Index Index; + Index rows = m.rows(), cols = m.cols(); int i = internal::random(2,5), j = internal::random(2,5); @@ -115,7 +116,8 @@ struct mapstaticmethods_impl { static void run(const PlainObjectType& v) { - int size = v.size(); + typedef typename PlainObjectType::Index Index; + Index size = v.size(); int i = internal::random(2,5); diff --git a/external/eigen3/test/mapstride.cpp b/external/eigen3/test/mapstride.cpp index b1dc9de..4858f8f 100644 --- a/external/eigen3/test/mapstride.cpp +++ b/external/eigen3/test/mapstride.cpp @@ -23,7 +23,7 @@ template void map_class_vector(const VectorTy Scalar* a_array = internal::aligned_new(arraysize+1); Scalar* array = a_array; if(Alignment!=Aligned) - array = (Scalar*)(ptrdiff_t(a_array) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + array = (Scalar*)(internal::IntPtr(a_array) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); { Map > map(array, size); @@ -56,16 +56,30 @@ template void map_class_matrix(const MatrixTy Index rows = _m.rows(), cols = _m.cols(); MatrixType m = MatrixType::Random(rows,cols); + Scalar s1 = internal::random(); Index arraysize = 2*(rows+4)*(cols+4); - Scalar* a_array = internal::aligned_new(arraysize+1); - Scalar* array = a_array; + Scalar* a_array1 = internal::aligned_new(arraysize+1); + Scalar* array1 = a_array1; if(Alignment!=Aligned) - array = (Scalar*)(ptrdiff_t(a_array) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + array1 = (Scalar*)(internal::IntPtr(a_array1) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + Scalar a_array2[256]; + Scalar* array2 = a_array2; + if(Alignment!=Aligned) + array2 = (Scalar*)(internal::IntPtr(a_array2) + (internal::packet_traits::AlignedOnScalar?sizeof(Scalar):sizeof(typename NumTraits::Real))); + else + array2 = (Scalar*)(((internal::UIntPtr(a_array2)+EIGEN_MAX_ALIGN_BYTES-1)/EIGEN_MAX_ALIGN_BYTES)*EIGEN_MAX_ALIGN_BYTES); + Index maxsize2 = a_array2 - array2 + 256; + // test no inner stride and some dynamic outer stride + for(int k=0; k<2; ++k) { + if(k==1 && (m.innerSize()+1)*m.outerSize() > maxsize2) + break; + Scalar* array = (k==0 ? array1 : array2); + Map > map(array, rows, cols, OuterStride(m.innerSize()+1)); map = m; VERIFY(map.outerStride() == map.innerSize()+1); @@ -75,11 +89,19 @@ template void map_class_matrix(const MatrixTy VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j)); VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); } + VERIFY_IS_APPROX(s1*map,s1*m); + map *= s1; + VERIFY_IS_APPROX(map,s1*m); } // test no inner stride and an outer stride of +4. This is quite important as for fixed-size matrices, // this allows to hit the special case where it's vectorizable. + for(int k=0; k<2; ++k) { + if(k==1 && (m.innerSize()+4)*m.outerSize() > maxsize2) + break; + Scalar* array = (k==0 ? array1 : array2); + enum { InnerSize = MatrixType::InnerSizeAtCompileTime, OuterStrideAtCompileTime = InnerSize==Dynamic ? Dynamic : InnerSize+4 @@ -94,10 +116,18 @@ template void map_class_matrix(const MatrixTy VERIFY(array[map.outerStride()*i+j] == m.coeffByOuterInner(i,j)); VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); } + VERIFY_IS_APPROX(s1*map,s1*m); + map *= s1; + VERIFY_IS_APPROX(map,s1*m); } // test both inner stride and outer stride + for(int k=0; k<2; ++k) { + if(k==1 && (2*m.innerSize()+1)*(m.outerSize()*2) > maxsize2) + break; + Scalar* array = (k==0 ? array1 : array2); + Map > map(array, rows, cols, Stride(2*m.innerSize()+1, 2)); map = m; VERIFY(map.outerStride() == 2*map.innerSize()+1); @@ -108,9 +138,12 @@ template void map_class_matrix(const MatrixTy VERIFY(array[map.outerStride()*i+map.innerStride()*j] == m.coeffByOuterInner(i,j)); VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); } + VERIFY_IS_APPROX(s1*map,s1*m); + map *= s1; + VERIFY_IS_APPROX(map,s1*m); } - internal::aligned_delete(a_array, arraysize+1); + internal::aligned_delete(a_array1, arraysize+1); } void test_mapstride() diff --git a/external/eigen3/test/meta.cpp b/external/eigen3/test/meta.cpp index 3302c58..b8dea68 100644 --- a/external/eigen3/test/meta.cpp +++ b/external/eigen3/test/meta.cpp @@ -9,6 +9,12 @@ #include "main.h" +template +bool check_is_convertible(const From&, const To&) +{ + return internal::is_convertible::value; +} + void test_meta() { VERIFY((internal::conditional<(3<4),internal::true_type, internal::false_type>::type::value)); @@ -52,6 +58,24 @@ void test_meta() VERIFY(( internal::is_same::type >::value)); VERIFY(( internal::is_same::type >::value)); + VERIFY(( internal::is_convertible::value )); + VERIFY(( internal::is_convertible::value )); + VERIFY(( internal::is_convertible::value )); + VERIFY((!internal::is_convertible,double>::value )); + VERIFY(( internal::is_convertible::value )); +// VERIFY((!internal::is_convertible::value )); //does not work because the conversion is prevented by a static assertion + VERIFY((!internal::is_convertible::value )); + VERIFY((!internal::is_convertible::value )); + { + float f; + MatrixXf A, B; + VectorXf a, b; + VERIFY(( check_is_convertible(a.dot(b), f) )); + VERIFY(( check_is_convertible(a.transpose()*b, f) )); + VERIFY((!check_is_convertible(A*B, f) )); + VERIFY(( check_is_convertible(A*B, A) )); + } + VERIFY(internal::meta_sqrt<1>::ret == 1); #define VERIFY_META_SQRT(X) VERIFY(internal::meta_sqrt::ret == int(std::sqrt(double(X)))) VERIFY_META_SQRT(2); diff --git a/external/eigen3/test/metis_support.cpp b/external/eigen3/test/metis_support.cpp index 932b040..d87c56a 100644 --- a/external/eigen3/test/metis_support.cpp +++ b/external/eigen3/test/metis_support.cpp @@ -3,24 +3,10 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #include "sparse_solver.h" #include #include diff --git a/external/eigen3/test/mixingtypes.cpp b/external/eigen3/test/mixingtypes.cpp index 6c2f748..ad9c2c6 100644 --- a/external/eigen3/test/mixingtypes.cpp +++ b/external/eigen3/test/mixingtypes.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -15,14 +15,26 @@ #define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them #endif -// #ifndef EIGEN_DONT_VECTORIZE -// #define EIGEN_DONT_VECTORIZE // SSE intrinsics aren't designed to allow mixing types -// #endif +#if defined(EIGEN_TEST_PART_1) || defined(EIGEN_TEST_PART_2) || defined(EIGEN_TEST_PART_3) + +#ifndef EIGEN_DONT_VECTORIZE +#define EIGEN_DONT_VECTORIZE +#endif + +#endif + +static bool g_called; +#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same::value); } #include "main.h" using namespace std; +#define VERIFY_MIX_SCALAR(XPR,REF) \ + g_called = false; \ + VERIFY_IS_APPROX(XPR,REF); \ + VERIFY( g_called && #XPR" not properly optimized"); + template void mixingtypes(int size = SizeAtCompileType) { typedef std::complex CF; @@ -38,8 +50,10 @@ template void mixingtypes(int size = SizeAtCompileType) Mat_f mf = Mat_f::Random(size,size); Mat_d md = mf.template cast(); + //Mat_d rd = md; Mat_cf mcf = Mat_cf::Random(size,size); Mat_cd mcd = mcf.template cast >(); + Mat_cd rcd = mcd; Vec_f vf = Vec_f::Random(size,1); Vec_d vd = vf.template cast(); Vec_cf vcf = Vec_cf::Random(size,1); @@ -49,19 +63,59 @@ template void mixingtypes(int size = SizeAtCompileType) complex scf = internal::random >(); complex scd = internal::random >(); - mf+mf; - VERIFY_RAISES_ASSERT(mf+md); - VERIFY_RAISES_ASSERT(mf+mcf); + + float epsf = std::sqrt(std::numeric_limits ::min EIGEN_EMPTY ()); + double epsd = std::sqrt(std::numeric_limits::min EIGEN_EMPTY ()); + + while(std::abs(sf )(); + while(std::abs(sd )(); + while(std::abs(scf)(); + while(std::abs(scd)(); + +// VERIFY_RAISES_ASSERT(mf+md); // does not even compile + +#ifdef EIGEN_DONT_VECTORIZE VERIFY_RAISES_ASSERT(vf=vd); VERIFY_RAISES_ASSERT(vf+=vd); - VERIFY_RAISES_ASSERT(mcd=md); - +#endif + // check scalar products - VERIFY_IS_APPROX(vcf * sf , vcf * complex(sf)); - VERIFY_IS_APPROX(sd * vcd, complex(sd) * vcd); - VERIFY_IS_APPROX(vf * scf , vf.template cast >() * scf); - VERIFY_IS_APPROX(scd * vd, scd * vd.template cast >()); + VERIFY_MIX_SCALAR(vcf * sf , vcf * complex(sf)); + VERIFY_MIX_SCALAR(sd * vcd , complex(sd) * vcd); + VERIFY_MIX_SCALAR(vf * scf , vf.template cast >() * scf); + VERIFY_MIX_SCALAR(scd * vd , scd * vd.template cast >()); + + VERIFY_MIX_SCALAR(vcf * 2 , vcf * complex(2)); + VERIFY_MIX_SCALAR(vcf * 2.1 , vcf * complex(2.1)); + VERIFY_MIX_SCALAR(2 * vcf, vcf * complex(2)); + VERIFY_MIX_SCALAR(2.1 * vcf , vcf * complex(2.1)); + + // check scalar quotients + VERIFY_MIX_SCALAR(vcf / sf , vcf / complex(sf)); + VERIFY_MIX_SCALAR(vf / scf , vf.template cast >() / scf); + VERIFY_MIX_SCALAR(vf.array() / scf, vf.template cast >().array() / scf); + VERIFY_MIX_SCALAR(scd / vd.array() , scd / vd.template cast >().array()); + + // check scalar increment + VERIFY_MIX_SCALAR(vcf.array() + sf , vcf.array() + complex(sf)); + VERIFY_MIX_SCALAR(sd + vcd.array(), complex(sd) + vcd.array()); + VERIFY_MIX_SCALAR(vf.array() + scf, vf.template cast >().array() + scf); + VERIFY_MIX_SCALAR(scd + vd.array() , scd + vd.template cast >().array()); + + // check scalar subtractions + VERIFY_MIX_SCALAR(vcf.array() - sf , vcf.array() - complex(sf)); + VERIFY_MIX_SCALAR(sd - vcd.array(), complex(sd) - vcd.array()); + VERIFY_MIX_SCALAR(vf.array() - scf, vf.template cast >().array() - scf); + VERIFY_MIX_SCALAR(scd - vd.array() , scd - vd.template cast >().array()); + + // check scalar powers + VERIFY_MIX_SCALAR( pow(vcf.array(), sf), Eigen::pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( vcf.array().pow(sf) , Eigen::pow(vcf.array(), complex(sf)) ); + VERIFY_MIX_SCALAR( pow(sd, vcd.array()), Eigen::pow(complex(sd), vcd.array()) ); + VERIFY_MIX_SCALAR( Eigen::pow(vf.array(), scf), Eigen::pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( vf.array().pow(scf) , Eigen::pow(vf.template cast >().array(), scf) ); + VERIFY_MIX_SCALAR( Eigen::pow(scd, vd.array()), Eigen::pow(scd, vd.template cast >().array()) ); // check dot product vf.dot(vf); @@ -75,6 +129,7 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX(vcd.asDiagonal() * md, vcd.asDiagonal() * md.template cast >()); VERIFY_IS_APPROX(mcf * vf.asDiagonal(), mcf * vf.template cast >().asDiagonal()); VERIFY_IS_APPROX(md * vcd.asDiagonal(), md.template cast >() * vcd.asDiagonal()); + // vd.asDiagonal() * mf; // does not even compile // vcd.asDiagonal() * mf; // does not even compile @@ -92,7 +147,6 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX(mcd.array() *= md.array(), mcd2.array() *= md.array().template cast >()); // check matrix-matrix products - VERIFY_IS_APPROX(sd*md*mcd, (sd*md).template cast().eval()*mcd); VERIFY_IS_APPROX(sd*mcd*md, sd*mcd*md.template cast()); VERIFY_IS_APPROX(scd*md*mcd, scd*md.template cast().eval()*mcd); @@ -103,6 +157,20 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX(scf*mf*mcf, scf*mf.template cast()*mcf); VERIFY_IS_APPROX(scf*mcf*mf, scf*mcf*mf.template cast()); + VERIFY_IS_APPROX(sd*md.adjoint()*mcd, (sd*md).template cast().eval().adjoint()*mcd); + VERIFY_IS_APPROX(sd*mcd.adjoint()*md, sd*mcd.adjoint()*md.template cast()); + VERIFY_IS_APPROX(sd*md.adjoint()*mcd.adjoint(), (sd*md).template cast().eval().adjoint()*mcd.adjoint()); + VERIFY_IS_APPROX(sd*mcd.adjoint()*md.adjoint(), sd*mcd.adjoint()*md.template cast().adjoint()); + VERIFY_IS_APPROX(sd*md*mcd.adjoint(), (sd*md).template cast().eval()*mcd.adjoint()); + VERIFY_IS_APPROX(sd*mcd*md.adjoint(), sd*mcd*md.template cast().adjoint()); + + VERIFY_IS_APPROX(sf*mf.adjoint()*mcf, (sf*mf).template cast().eval().adjoint()*mcf); + VERIFY_IS_APPROX(sf*mcf.adjoint()*mf, sf*mcf.adjoint()*mf.template cast()); + VERIFY_IS_APPROX(sf*mf.adjoint()*mcf.adjoint(), (sf*mf).template cast().eval().adjoint()*mcf.adjoint()); + VERIFY_IS_APPROX(sf*mcf.adjoint()*mf.adjoint(), sf*mcf.adjoint()*mf.template cast().adjoint()); + VERIFY_IS_APPROX(sf*mf*mcf.adjoint(), (sf*mf).template cast().eval()*mcf.adjoint()); + VERIFY_IS_APPROX(sf*mcf*mf.adjoint(), sf*mcf*mf.template cast().adjoint()); + VERIFY_IS_APPROX(sf*mf*vcf, (sf*mf).template cast().eval()*vcf); VERIFY_IS_APPROX(scf*mf*vcf,(scf*mf.template cast()).eval()*vcf); VERIFY_IS_APPROX(sf*mcf*vf, sf*mcf*vf.template cast()); @@ -122,11 +190,111 @@ template void mixingtypes(int size = SizeAtCompileType) VERIFY_IS_APPROX(scd*vcd.adjoint()*md, scd*vcd.adjoint()*md.template cast().eval()); VERIFY_IS_APPROX(sd*vd.adjoint()*mcd, sd*vd.adjoint().template cast().eval()*mcd); VERIFY_IS_APPROX(scd*vd.adjoint()*mcd, scd*vd.adjoint().template cast().eval()*mcd); + + VERIFY_IS_APPROX( sd*vcd.adjoint()*md.template triangularView(), sd*vcd.adjoint()*md.template cast().eval().template triangularView()); + VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template triangularView(), scd*vcd.adjoint()*md.template cast().eval().template triangularView()); + VERIFY_IS_APPROX( sd*vcd.adjoint()*md.transpose().template triangularView(), sd*vcd.adjoint()*md.transpose().template cast().eval().template triangularView()); + VERIFY_IS_APPROX(scd*vcd.adjoint()*md.transpose().template triangularView(), scd*vcd.adjoint()*md.transpose().template cast().eval().template triangularView()); + VERIFY_IS_APPROX( sd*vd.adjoint()*mcd.template triangularView(), sd*vd.adjoint().template cast().eval()*mcd.template triangularView()); + VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template triangularView(), scd*vd.adjoint().template cast().eval()*mcd.template triangularView()); + VERIFY_IS_APPROX( sd*vd.adjoint()*mcd.transpose().template triangularView(), sd*vd.adjoint().template cast().eval()*mcd.transpose().template triangularView()); + VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.transpose().template triangularView(), scd*vd.adjoint().template cast().eval()*mcd.transpose().template triangularView()); + + // Not supported yet: trmm +// VERIFY_IS_APPROX(sd*mcd*md.template triangularView(), sd*mcd*md.template cast().eval().template triangularView()); +// VERIFY_IS_APPROX(scd*mcd*md.template triangularView(), scd*mcd*md.template cast().eval().template triangularView()); +// VERIFY_IS_APPROX(sd*md*mcd.template triangularView(), sd*md.template cast().eval()*mcd.template triangularView()); +// VERIFY_IS_APPROX(scd*md*mcd.template triangularView(), scd*md.template cast().eval()*mcd.template triangularView()); + + // Not supported yet: symv +// VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template selfadjointView(), sd*vcd.adjoint()*md.template cast().eval().template selfadjointView()); +// VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template selfadjointView(), scd*vcd.adjoint()*md.template cast().eval().template selfadjointView()); +// VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template selfadjointView(), sd*vd.adjoint().template cast().eval()*mcd.template selfadjointView()); +// VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template selfadjointView(), scd*vd.adjoint().template cast().eval()*mcd.template selfadjointView()); + + // Not supported yet: symm +// VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template selfadjointView(), sd*vcd.adjoint()*md.template cast().eval().template selfadjointView()); +// VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template selfadjointView(), scd*vcd.adjoint()*md.template cast().eval().template selfadjointView()); +// VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template selfadjointView(), sd*vd.adjoint().template cast().eval()*mcd.template selfadjointView()); +// VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template selfadjointView(), scd*vd.adjoint().template cast().eval()*mcd.template selfadjointView()); + + rcd.setZero(); + VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView() = sd * mcd * md), + Mat_cd((sd * mcd * md.template cast().eval()).template triangularView())); + VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView() = sd * md * mcd), + Mat_cd((sd * md.template cast().eval() * mcd).template triangularView())); + VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView() = scd * mcd * md), + Mat_cd((scd * mcd * md.template cast().eval()).template triangularView())); + VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView() = scd * md * mcd), + Mat_cd((scd * md.template cast().eval() * mcd).template triangularView())); + + + VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast().eval().array() * mcd.array() ); + VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() + mcd.array(), md.template cast().eval().array() + mcd.array() ); + VERIFY_IS_APPROX( mcd.array() + md.array(), mcd.array() + md.template cast().eval().array() ); + + VERIFY_IS_APPROX( md.array() - mcd.array(), md.template cast().eval().array() - mcd.array() ); + VERIFY_IS_APPROX( mcd.array() - md.array(), mcd.array() - md.template cast().eval().array() ); + + if(mcd.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast().eval().array() / mcd.array() ); + } + if(md.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast().eval().array() ); + } + + if(md.array().abs().minCoeff()>epsd || mcd.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( mcd.array().pow(md.array()), mcd.array().pow(md.template cast().eval().array()) ); + + VERIFY_IS_APPROX( pow(md.array(),mcd.array()), md.template cast().eval().array().pow(mcd.array()) ); + VERIFY_IS_APPROX( pow(mcd.array(),md.array()), mcd.array().pow(md.template cast().eval().array()) ); + } + + rcd = mcd; + VERIFY_IS_APPROX( rcd = md, md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd += md, mcd + md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd -= md, mcd - md.template cast().eval() ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.array() *= md.array(), mcd.array() * md.template cast().eval().array() ); + rcd = mcd; + if(md.array().abs().minCoeff()>epsd) + { + VERIFY_IS_APPROX( rcd.array() /= md.array(), mcd.array() / md.template cast().eval().array() ); + } + + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += md + mcd*md, mcd + (md.template cast().eval()) + mcd*(md.template cast().eval())); + + VERIFY_IS_APPROX( rcd.noalias() = md*md, ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= md*md, mcd - ((md*md).eval().template cast()) ); + + VERIFY_IS_APPROX( rcd.noalias() = mcd + md*md, mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() += mcd + md*md, mcd + mcd + ((md*md).eval().template cast()) ); + rcd = mcd; + VERIFY_IS_APPROX( rcd.noalias() -= mcd + md*md, - ((md*md).eval().template cast()) ); } void test_mixingtypes() { - CALL_SUBTEST_1(mixingtypes<3>()); - CALL_SUBTEST_2(mixingtypes<4>()); - CALL_SUBTEST_3(mixingtypes(internal::random(1,EIGEN_TEST_MAX_SIZE))); + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1(mixingtypes<3>()); + CALL_SUBTEST_2(mixingtypes<4>()); + CALL_SUBTEST_3(mixingtypes(internal::random(1,EIGEN_TEST_MAX_SIZE))); + + CALL_SUBTEST_4(mixingtypes<3>()); + CALL_SUBTEST_5(mixingtypes<4>()); + CALL_SUBTEST_6(mixingtypes(internal::random(1,EIGEN_TEST_MAX_SIZE))); + } } diff --git a/external/eigen3/test/mpl2only.cpp b/external/eigen3/test/mpl2only.cpp index 5ef0d2b..7d04d6b 100644 --- a/external/eigen3/test/mpl2only.cpp +++ b/external/eigen3/test/mpl2only.cpp @@ -12,7 +12,9 @@ #include #include #include +#include #include +#include int main() { diff --git a/external/eigen3/test/nesting_ops.cpp b/external/eigen3/test/nesting_ops.cpp index 1e85232..a419b0e 100644 --- a/external/eigen3/test/nesting_ops.cpp +++ b/external/eigen3/test/nesting_ops.cpp @@ -2,16 +2,37 @@ // for linear algebra. // // Copyright (C) 2010 Hauke Heibel +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_ENABLE_TEMPORARY_TRACKING + #include "main.h" -template void run_nesting_ops(const MatrixType& _m) +template +void use_n_times(const XprType &xpr) { - typename MatrixType::Nested m(_m); + typename internal::nested_eval::type mat(xpr); + typename XprType::PlainObject res(mat.rows(), mat.cols()); + nb_temporaries--; // remove res + res.setZero(); + for(int i=0; i +bool verify_eval_type(const XprType &, const ReferenceType&) +{ + typedef typename internal::nested_eval::type EvalType; + return internal::is_same::type, typename internal::remove_all::type>::value; +} + +template void run_nesting_ops_1(const MatrixType& _m) +{ + typename internal::nested_eval::type m(_m); // Make really sure that we are in debug mode! VERIFY_RAISES_ASSERT(eigen_assert(false)); @@ -24,10 +45,63 @@ template void run_nesting_ops(const MatrixType& _m) VERIFY_IS_APPROX( (m.transpose() * m).array().abs().sum(), (m.transpose() * m).array().abs().sum() ); } +template void run_nesting_ops_2(const MatrixType& _m) +{ + typedef typename MatrixType::Scalar Scalar; + Index rows = _m.rows(); + Index cols = _m.cols(); + MatrixType m1 = MatrixType::Random(rows,cols); + Matrix m2; + + if((MatrixType::SizeAtCompileTime==Dynamic)) + { + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1 + m1*m1), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1 + m1*m1), 1 ); + + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.template triangularView().solve(m1.col(0))), 1 ); + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.template triangularView().solve(m1.col(0))), 1 ); + + VERIFY_EVALUATION_COUNT( use_n_times<1>(Scalar(2)*m1.template triangularView().solve(m1.col(0))), 2 ); // FIXME could be one by applying the scaling in-place on the solve result + VERIFY_EVALUATION_COUNT( use_n_times<1>(m1.col(0)+m1.template triangularView().solve(m1.col(0))), 2 ); // FIXME could be one by adding m1.col() inplace + VERIFY_EVALUATION_COUNT( use_n_times<10>(m1.col(0)+m1.template triangularView().solve(m1.col(0))), 2 ); + } + + { + VERIFY( verify_eval_type<10>(m1, m1) ); + if(!NumTraits::IsComplex) + { + VERIFY( verify_eval_type<3>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<4>(2*m1, m1) ); + } + else + { + VERIFY( verify_eval_type<2>(2*m1, 2*m1) ); + VERIFY( verify_eval_type<3>(2*m1, m1) ); + } + VERIFY( verify_eval_type<2>(m1+m1, m1+m1) ); + VERIFY( verify_eval_type<3>(m1+m1, m1) ); + VERIFY( verify_eval_type<1>(m1*m1.transpose(), m2) ); + VERIFY( verify_eval_type<1>(m1*(m1+m1).transpose(), m2) ); + VERIFY( verify_eval_type<2>(m1*m1.transpose(), m2) ); + VERIFY( verify_eval_type<1>(m1+m1*m1, m1) ); + + VERIFY( verify_eval_type<1>(m1.template triangularView().solve(m1), m1) ); + VERIFY( verify_eval_type<1>(m1+m1.template triangularView().solve(m1), m1) ); + } +} + + void test_nesting_ops() { - CALL_SUBTEST_1(run_nesting_ops(MatrixXf::Random(25,25))); - CALL_SUBTEST_2(run_nesting_ops(MatrixXd::Random(25,25))); - CALL_SUBTEST_3(run_nesting_ops(Matrix4f::Random())); - CALL_SUBTEST_4(run_nesting_ops(Matrix4d::Random())); + CALL_SUBTEST_1(run_nesting_ops_1(MatrixXf::Random(25,25))); + CALL_SUBTEST_2(run_nesting_ops_1(MatrixXcd::Random(25,25))); + CALL_SUBTEST_3(run_nesting_ops_1(Matrix4f::Random())); + CALL_SUBTEST_4(run_nesting_ops_1(Matrix2d::Random())); + + Index s = internal::random(1,EIGEN_TEST_MAX_SIZE); + CALL_SUBTEST_1( run_nesting_ops_2(MatrixXf(s,s)) ); + CALL_SUBTEST_2( run_nesting_ops_2(MatrixXcd(s,s)) ); + CALL_SUBTEST_3( run_nesting_ops_2(Matrix4f()) ); + CALL_SUBTEST_4( run_nesting_ops_2(Matrix2d()) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } diff --git a/external/eigen3/test/nomalloc.cpp b/external/eigen3/test/nomalloc.cpp index 8e04023..50756c2 100644 --- a/external/eigen3/test/nomalloc.cpp +++ b/external/eigen3/test/nomalloc.cpp @@ -8,20 +8,10 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -// this hack is needed to make this file compiles with -pedantic (gcc) -#ifdef __GNUC__ -#define throw(X) -#endif - -#ifdef __INTEL_COMPILER - // disable "warning #76: argument to macro is empty" produced by the above hack - #pragma warning disable 76 -#endif - // discard stack allocation as that too bypasses malloc #define EIGEN_STACK_ALLOCATION_LIMIT 0 -// any heap allocation will raise an assert -#define EIGEN_NO_MALLOC +// heap allocation will raise an assert if enabled at runtime +#define EIGEN_RUNTIME_NO_MALLOC #include "main.h" #include @@ -88,14 +78,15 @@ template void nomalloc(const MatrixType& m) VERIFY_IS_APPROX(m2,m2); m2.template selfadjointView().rankUpdate(m1.col(0),-1); - m2.template selfadjointView().rankUpdate(m1.row(0),-1); + m2.template selfadjointView().rankUpdate(m1.row(0),-1); + m2.template selfadjointView().rankUpdate(m1.col(0), m1.col(0)); // rank-2 // The following fancy matrix-matrix products are not safe yet regarding static allocation -// m1 += m1.template triangularView() * m2.col(; -// m1.template selfadjointView().rankUpdate(m2); -// m1 += m1.template triangularView() * m2; -// m1 += m1.template selfadjointView() * m2; -// VERIFY_IS_APPROX(m1,m1); + m2.template selfadjointView().rankUpdate(m1); + m2 += m2.template triangularView() * m1; + m2.template triangularView() = m2 * m2; + m1 += m1.template selfadjointView() * m2; + VERIFY_IS_APPROX(m2,m2); } template @@ -171,7 +162,7 @@ void test_zerosized() { Eigen::VectorXd v; // explicit zero-sized: Eigen::ArrayXXd A0(0,0); - Eigen::ArrayXd v0(std::ptrdiff_t(0)); // FIXME ArrayXd(0) is ambiguous + Eigen::ArrayXd v0(0); // assigning empty objects to each other: A=A0; @@ -183,9 +174,11 @@ template void test_reference(const MatrixType& m) { enum { Flag = MatrixType::IsRowMajor ? Eigen::RowMajor : Eigen::ColMajor}; enum { TransposeFlag = !MatrixType::IsRowMajor ? Eigen::RowMajor : Eigen::ColMajor}; typename MatrixType::Index rows = m.rows(), cols=m.cols(); + typedef Eigen::Matrix MatrixX; + typedef Eigen::Matrix MatrixXT; // Dynamic reference: - typedef Eigen::Ref > Ref; - typedef Eigen::Ref > RefT; + typedef Eigen::Ref Ref; + typedef Eigen::Ref RefT; Ref r1(m); Ref r2(m.block(rows/3, cols/4, rows/2, cols/2)); @@ -195,10 +188,30 @@ template void test_reference(const MatrixType& m) { VERIFY_RAISES_ASSERT(RefT r5(m)); VERIFY_RAISES_ASSERT(Ref r6(m.transpose())); VERIFY_RAISES_ASSERT(Ref r7(Scalar(2) * m)); + + // Copy constructors shall also never malloc + Ref r8 = r1; + RefT r9 = r3; + + // Initializing from a compatible Ref shall also never malloc + Eigen::Ref > r10=r8, r11=m; + + // Initializing from an incompatible Ref will malloc: + typedef Eigen::Ref RefAligned; + VERIFY_RAISES_ASSERT(RefAligned r12=r10); + VERIFY_RAISES_ASSERT(Ref r13=r10); // r10 has more dynamic strides + } void test_nomalloc() { + // create some dynamic objects + Eigen::MatrixXd M1 = MatrixXd::Random(3,3); + Ref R1 = 2.0*M1; // Ref requires temporary + + // from here on prohibit malloc: + Eigen::internal::set_is_malloc_allowed(false); + // check that our operator new is indeed called: VERIFY_RAISES_ASSERT(MatrixXd dummy(MatrixXd::Random(3,3))); CALL_SUBTEST_1(nomalloc(Matrix()) ); @@ -207,6 +220,10 @@ void test_nomalloc() // Check decomposition modules with dynamic matrices that have a known compile-time max size (ctms) CALL_SUBTEST_4(ctms_decompositions()); + CALL_SUBTEST_5(test_zerosized()); + CALL_SUBTEST_6(test_reference(Matrix())); + CALL_SUBTEST_7(test_reference(R1)); + CALL_SUBTEST_8(Ref R2 = M1.topRows<2>(); test_reference(R2)); } diff --git a/external/eigen3/test/nullary.cpp b/external/eigen3/test/nullary.cpp index fbc721a..acd5550 100644 --- a/external/eigen3/test/nullary.cpp +++ b/external/eigen3/test/nullary.cpp @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2010-2011 Jitse Niesen +// Copyright (C) 2016 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -12,7 +13,6 @@ template bool equalsIdentity(const MatrixType& A) { - typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; Scalar zero = static_cast(0); @@ -30,13 +30,41 @@ bool equalsIdentity(const MatrixType& A) bool diagOK = (A.diagonal().array() == 1).all(); return offDiagOK && diagOK; + +} + +template +void check_extremity_accuracy(const VectorType &v, const typename VectorType::Scalar &low, const typename VectorType::Scalar &high) +{ + typedef typename VectorType::Scalar Scalar; + typedef typename VectorType::RealScalar RealScalar; + + RealScalar prec = internal::is_same::value ? NumTraits::dummy_precision()*10 : NumTraits::dummy_precision()/10; + Index size = v.size(); + + if(size<20) + return; + + for (int i=0; isize-6) + { + Scalar ref = (low*RealScalar(size-i-1))/RealScalar(size-1) + (high*RealScalar(i))/RealScalar(size-1); + if(std::abs(ref)>1) + { + if(!internal::isApprox(v(i), ref, prec)) + std::cout << v(i) << " != " << ref << " ; relative error: " << std::abs((v(i)-ref)/ref) << " ; required precision: " << prec << " ; range: " << low << "," << high << " ; i: " << i << "\n"; + VERIFY(internal::isApprox(v(i), (low*RealScalar(size-i-1))/RealScalar(size-1) + (high*RealScalar(i))/RealScalar(size-1), prec)); + } + } + } } template void testVectorType(const VectorType& base) { - typedef typename internal::traits::Index Index; - typedef typename internal::traits::Scalar Scalar; + typedef typename VectorType::Scalar Scalar; + typedef typename VectorType::RealScalar RealScalar; const Index size = base.size(); @@ -44,36 +72,61 @@ void testVectorType(const VectorType& base) Scalar low = (size == 1 ? high : internal::random(-500,500)); if (low>high) std::swap(low,high); + // check low==high + if(internal::random(0.f,1.f)<0.05f) + low = high; + // check abs(low) >> abs(high) + else if(size>2 && std::numeric_limits::max_exponent10>0 && internal::random(0.f,1.f)<0.1f) + low = -internal::random(1,2) * RealScalar(std::pow(RealScalar(10),std::numeric_limits::max_exponent10/2)); + const Scalar step = ((size == 1) ? 1 : (high-low)/(size-1)); // check whether the result yields what we expect it to do VectorType m(base); m.setLinSpaced(size,low,high); - VectorType n(size); - for (int i=0; i::IsInteger) + { + VectorType n(size); + for (int i=0; i::IsInteger) || ((high-low)>=size && (Index(high-low)%(size-1))==0) || (Index(high-low+1)::IsInteger) || (high-low>=size)) + for (int i=0; i::epsilon() ); + // random access version + m = VectorType::LinSpaced(size,low,high); + VERIFY_IS_APPROX(m,n); + VERIFY( internal::isApprox(m(m.size()-1),high) ); + VERIFY( size==1 || internal::isApprox(m(0),low) ); + VERIFY_IS_EQUAL(m(m.size()-1) , high); + if(!NumTraits::IsInteger) + CALL_SUBTEST( check_extremity_accuracy(m, low, high) ); + } - // These guys sometimes fail! This is not good. Any ideas how to fix them!? - //VERIFY( m(m.size()-1) == high ); - //VERIFY( m(0) == low ); + VERIFY( m(m.size()-1) <= high ); + VERIFY( (m.array() <= high).all() ); + VERIFY( (m.array() >= low).all() ); - // sequential access version - m = VectorType::LinSpaced(Sequential,size,low,high); - VERIFY_IS_APPROX(m,n); - // These guys sometimes fail! This is not good. Any ideas how to fix them!? - //VERIFY( m(m.size()-1) == high ); - //VERIFY( m(0) == low ); + VERIFY( m(m.size()-1) >= low ); + if(size>=1) + { + VERIFY( internal::isApprox(m(0),low) ); + VERIFY_IS_EQUAL(m(0) , low); + } // check whether everything works with row and col major vectors Matrix row_vector(size); @@ -95,23 +148,77 @@ void testVectorType(const VectorType& base) VERIFY_IS_APPROX( ScalarMatrix::LinSpaced(1,low,high), ScalarMatrix::Constant(high) ); // regression test for bug 526 (linear vectorized transversal) - if (size > 1) { + if (size > 1 && (!NumTraits::IsInteger)) { m.tail(size-1).setLinSpaced(low, high); VERIFY_IS_APPROX(m(size-1), high); } + + // regression test for bug 1383 (LinSpaced with empty size/range) + { + Index n0 = VectorType::SizeAtCompileTime==Dynamic ? 0 : VectorType::SizeAtCompileTime; + low = internal::random(); + m = VectorType::LinSpaced(n0,low,low-1); + VERIFY(m.size()==n0); + + if(VectorType::SizeAtCompileTime==Dynamic) + { + VERIFY_IS_EQUAL(VectorType::LinSpaced(n0,0,Scalar(n0-1)).sum(),Scalar(0)); + VERIFY_IS_EQUAL(VectorType::LinSpaced(n0,low,low-1).sum(),Scalar(0)); + } + + m.setLinSpaced(n0,0,Scalar(n0-1)); + VERIFY(m.size()==n0); + m.setLinSpaced(n0,low,low-1); + VERIFY(m.size()==n0); + + // empty range only: + VERIFY_IS_APPROX(VectorType::LinSpaced(size,low,low),VectorType::Constant(size,low)); + m.setLinSpaced(size,low,low); + VERIFY_IS_APPROX(m,VectorType::Constant(size,low)); + + if(NumTraits::IsInteger) + { + VERIFY_IS_APPROX( VectorType::LinSpaced(size,low,Scalar(low+size-1)), VectorType::LinSpaced(size,Scalar(low+size-1),low).reverse() ); + + if(VectorType::SizeAtCompileTime==Dynamic) + { + // Check negative multiplicator path: + for(Index k=1; k<5; ++k) + VERIFY_IS_APPROX( VectorType::LinSpaced(size,low,Scalar(low+(size-1)*k)), VectorType::LinSpaced(size,Scalar(low+(size-1)*k),low).reverse() ); + // Check negative divisor path: + for(Index k=1; k<5; ++k) + VERIFY_IS_APPROX( VectorType::LinSpaced(size*k,low,Scalar(low+size-1)), VectorType::LinSpaced(size*k,Scalar(low+size-1),low).reverse() ); + } + } + } } template void testMatrixType(const MatrixType& m) { - typedef typename MatrixType::Index Index; + using std::abs; const Index rows = m.rows(); const Index cols = m.cols(); + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + Scalar s1; + do { + s1 = internal::random(); + } while(abs(s1)::IsInteger)); MatrixType A; A.setIdentity(rows, cols); VERIFY(equalsIdentity(A)); VERIFY(equalsIdentity(MatrixType::Identity(rows, cols))); + + + A = MatrixType::Constant(rows,cols,s1); + Index i = internal::random(0,rows-1); + Index j = internal::random(0,cols-1); + VERIFY_IS_APPROX( MatrixType::Constant(rows,cols,s1)(i,j), s1 ); + VERIFY_IS_APPROX( MatrixType::Constant(rows,cols,s1).coeff(i,j), s1 ); + VERIFY_IS_APPROX( A(i,j), s1 ); } void test_nullary() @@ -120,12 +227,78 @@ void test_nullary() CALL_SUBTEST_2( testMatrixType(MatrixXcf(internal::random(1,300),internal::random(1,300))) ); CALL_SUBTEST_3( testMatrixType(MatrixXf(internal::random(1,300),internal::random(1,300))) ); - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_4( testVectorType(VectorXd(internal::random(1,300))) ); + for(int i = 0; i < g_repeat*10; i++) { + CALL_SUBTEST_4( testVectorType(VectorXd(internal::random(1,30000))) ); CALL_SUBTEST_5( testVectorType(Vector4d()) ); // regression test for bug 232 CALL_SUBTEST_6( testVectorType(Vector3d()) ); - CALL_SUBTEST_7( testVectorType(VectorXf(internal::random(1,300))) ); + CALL_SUBTEST_7( testVectorType(VectorXf(internal::random(1,30000))) ); CALL_SUBTEST_8( testVectorType(Vector3f()) ); + CALL_SUBTEST_8( testVectorType(Vector4f()) ); + CALL_SUBTEST_8( testVectorType(Matrix()) ); CALL_SUBTEST_8( testVectorType(Matrix()) ); + + CALL_SUBTEST_9( testVectorType(VectorXi(internal::random(1,10))) ); + CALL_SUBTEST_9( testVectorType(VectorXi(internal::random(9,300))) ); + CALL_SUBTEST_9( testVectorType(Matrix()) ); + } + +#ifdef EIGEN_TEST_PART_6 + // Assignment of a RowVectorXd to a MatrixXd (regression test for bug #79). + VERIFY( (MatrixXd(RowVectorXd::LinSpaced(3, 0, 1)) - RowVector3d(0, 0.5, 1)).norm() < std::numeric_limits::epsilon() ); +#endif + +#ifdef EIGEN_TEST_PART_9 + // Check possible overflow issue + { + int n = 60000; + ArrayXi a1(n), a2(n); + a1.setLinSpaced(n, 0, n-1); + for(int i=0; i >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( internal::has_binary_operator >::value )); + VERIFY(( !internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + // Regression unit test for a weird MSVC bug. + // Search "nullary_wrapper_workaround_msvc" in CoreEvaluators.h for the details. + // See also traits::match. + { + MatrixXf A = MatrixXf::Random(3,3); + Ref R = 2.0*A; + VERIFY_IS_APPROX(R, A+A); + + Ref R1 = MatrixXf::Random(3,3)+A; + + VectorXi V = VectorXi::Random(3); + Ref R2 = VectorXi::LinSpaced(3,1,3)+V; + VERIFY_IS_APPROX(R2, V+Vector3i(1,2,3)); + + VERIFY(( internal::has_nullary_operator >::value )); + VERIFY(( !internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); + + VERIFY(( !internal::has_nullary_operator >::value )); + VERIFY(( internal::has_unary_operator >::value )); + VERIFY(( !internal::has_binary_operator >::value )); + VERIFY(( internal::functor_has_linear_access >::ret )); } +#endif } diff --git a/external/eigen3/test/numext.cpp b/external/eigen3/test/numext.cpp new file mode 100644 index 0000000..3de33e2 --- /dev/null +++ b/external/eigen3/test/numext.cpp @@ -0,0 +1,53 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +template +void check_abs() { + typedef typename NumTraits::Real Real; + + if(NumTraits::IsSigned) + VERIFY_IS_EQUAL(numext::abs(-T(1)), T(1)); + VERIFY_IS_EQUAL(numext::abs(T(0)), T(0)); + VERIFY_IS_EQUAL(numext::abs(T(1)), T(1)); + + for(int k=0; k(); + if(!internal::is_same::value) + x = x/Real(2); + if(NumTraits::IsSigned) + { + VERIFY_IS_EQUAL(numext::abs(x), numext::abs(-x)); + VERIFY( numext::abs(-x) >= Real(0)); + } + VERIFY( numext::abs(x) >= Real(0)); + VERIFY_IS_APPROX( numext::abs2(x), numext::abs2(numext::abs(x)) ); + } +} + +void test_numext() { + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + CALL_SUBTEST( check_abs() ); + + CALL_SUBTEST( check_abs >() ); + CALL_SUBTEST( check_abs >() ); +} diff --git a/external/eigen3/test/packetmath.cpp b/external/eigen3/test/packetmath.cpp index bac7b02..7821a17 100644 --- a/external/eigen3/test/packetmath.cpp +++ b/external/eigen3/test/packetmath.cpp @@ -9,16 +9,28 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #include "main.h" +#include "unsupported/Eigen/SpecialFunctions" +#if defined __GNUC__ && __GNUC__>=6 + #pragma GCC diagnostic ignored "-Wignored-attributes" +#endif // using namespace Eigen; +#ifdef EIGEN_VECTORIZE_SSE +const bool g_vectorize_sse = true; +#else +const bool g_vectorize_sse = false; +#endif + namespace Eigen { namespace internal { template T negate(const T& x) { return -x; } } } -template bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits::Real& refvalue) +// NOTE: we disbale inlining for this function to workaround a GCC issue when using -O3 and the i387 FPU. +template EIGEN_DONT_INLINE +bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits::Real& refvalue) { return internal::isMuchSmallerThan(a-b, refvalue); } @@ -29,7 +41,7 @@ template bool areApproxAbs(const Scalar* a, const Scalar* b, in { if (!isApproxAbs(a[i],b[i],refvalue)) { - std::cout << "[" << Map >(a,size) << "]" << " != " << Map >(b,size) << "\n"; + std::cout << "ref: [" << Map >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; return false; } } @@ -42,21 +54,13 @@ template bool areApprox(const Scalar* a, const Scalar* b, int s { if (a[i]!=b[i] && !internal::isApprox(a[i],b[i])) { - std::cout << "[" << Map >(a,size) << "]" << " != " << Map >(b,size) << "\n"; + std::cout << "ref: [" << Map >(a,size) << "]" << " != vec: [" << Map >(b,size) << "]\n"; return false; } } return true; } - -#define CHECK_CWISE2(REFOP, POP) { \ - for (int i=0; i(data1), internal::pload(data1+PacketSize))); \ - VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ -} - #define CHECK_CWISE1(REFOP, POP) { \ for (int i=0; i VERIFY(areApprox(ref, data2, PacketSize) && #POP); \ } +#define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \ + packet_helper h; \ + for (int i=0; i template void packetmath() { using std::abs; - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; typedef typename NumTraits::Real RealScalar; - const int size = PacketSize*4; - EIGEN_ALIGN16 Scalar data1[internal::packet_traits::size*4]; - EIGEN_ALIGN16 Scalar data2[internal::packet_traits::size*4]; - EIGEN_ALIGN16 Packet packets[PacketSize*2]; - EIGEN_ALIGN16 Scalar ref[internal::packet_traits::size*4]; + const int max_size = PacketSize > 4 ? PacketSize : 4; + const int size = PacketSize*max_size; + EIGEN_ALIGN_MAX Scalar data1[size]; + EIGEN_ALIGN_MAX Scalar data2[size]; + EIGEN_ALIGN_MAX Packet packets[PacketSize*2]; + EIGEN_ALIGN_MAX Scalar ref[size]; RealScalar refvalue = 0; for (int i=0; i void packetmath() else if (offset==1) internal::palign<1>(packets[0], packets[1]); else if (offset==2) internal::palign<2>(packets[0], packets[1]); else if (offset==3) internal::palign<3>(packets[0], packets[1]); + else if (offset==4) internal::palign<4>(packets[0], packets[1]); + else if (offset==5) internal::palign<5>(packets[0], packets[1]); + else if (offset==6) internal::palign<6>(packets[0], packets[1]); + else if (offset==7) internal::palign<7>(packets[0], packets[1]); + else if (offset==8) internal::palign<8>(packets[0], packets[1]); + else if (offset==9) internal::palign<9>(packets[0], packets[1]); + else if (offset==10) internal::palign<10>(packets[0], packets[1]); + else if (offset==11) internal::palign<11>(packets[0], packets[1]); + else if (offset==12) internal::palign<12>(packets[0], packets[1]); + else if (offset==13) internal::palign<13>(packets[0], packets[1]); + else if (offset==14) internal::palign<14>(packets[0], packets[1]); + else if (offset==15) internal::palign<15>(packets[0], packets[1]); internal::pstore(data2, packets[0]); for (int i=0; i void packetmath() VERIFY(areApprox(ref, data2, PacketSize) && "internal::palign"); } - CHECK_CWISE2(REF_ADD, internal::padd); - CHECK_CWISE2(REF_SUB, internal::psub); - CHECK_CWISE2(REF_MUL, internal::pmul); - #ifndef EIGEN_VECTORIZE_ALTIVEC - if (!internal::is_same::value) - CHECK_CWISE2(REF_DIV, internal::pdiv); - #endif + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasNegate); + VERIFY((internal::is_same::value) || (!PacketTraits::Vectorizable) || PacketTraits::HasDiv); + + CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd); + CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub); + CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul); + CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv); + CHECK_CWISE1(internal::negate, internal::pnegate); CHECK_CWISE1(numext::conj, internal::pconj); @@ -165,9 +195,31 @@ template void packetmath() internal::pstore(data2, internal::pset1(data1[offset])); VERIFY(areApprox(ref, data2, PacketSize) && "internal::pset1"); } - + + { + for (int i=0; i(data1, A0, A1, A2, A3); + internal::pstore(data2+0*PacketSize, A0); + internal::pstore(data2+1*PacketSize, A1); + internal::pstore(data2+2*PacketSize, A2); + internal::pstore(data2+3*PacketSize, A3); + VERIFY(areApprox(ref, data2, 4*PacketSize) && "internal::pbroadcast4"); + } + + { + for (int i=0; i(data1, A0, A1); + internal::pstore(data2+0*PacketSize, A0); + internal::pstore(data2+1*PacketSize, A1); + VERIFY(areApprox(ref, data2, 2*PacketSize) && "internal::pbroadcast2"); + } + VERIFY(internal::isApprox(data1[0], internal::pfirst(internal::pload(data1))) && "internal::pfirst"); - + if(PacketSize>1) { for(int offset=0;offset<4;++offset) @@ -179,11 +231,31 @@ template void packetmath() } } + if(PacketSize>2) + { + for(int offset=0;offset<4;++offset) + { + for(int i=0;i(data1+offset)); + VERIFY(areApprox(ref, data2, PacketSize) && "ploadquad"); + } + } + ref[0] = 0; for (int i=0; i(data1)), refvalue) && "internal::predux"); + { + for (int i=0; i<4; ++i) + ref[i] = 0; + for (int i=0; i(data1))); + VERIFY(areApprox(ref, data2, PacketSize>4?PacketSize/2:PacketSize) && "internal::predux_downto4"); + } + ref[0] = 1; for (int i=0; i void packetmath() ref[i] = data1[PacketSize-i-1]; internal::pstore(data2, internal::preverse(internal::pload(data1))); VERIFY(areApprox(ref, data2, PacketSize) && "internal::preverse"); + + internal::PacketBlock kernel; + for (int i=0; i(data1+i*PacketSize); + } + ptranspose(kernel); + for (int i=0; i(data1); + Packet elsePacket = internal::pload(data2); + EIGEN_ALIGN_MAX internal::Selector selector; + for (int i = 0; i < PacketSize; ++i) { + selector.select[i] = i; + } + + Packet blend = internal::pblend(selector, thenPacket, elsePacket); + EIGEN_ALIGN_MAX Scalar result[size]; + internal::pstore(result, blend); + for (int i = 0; i < PacketSize; ++i) { + VERIFY(isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue)); + } + } + + if (PacketTraits::HasBlend || g_vectorize_sse) { + // pinsertfirst + for (int i=0; i(); + ref[0] = s; + internal::pstore(data2, internal::pinsertfirst(internal::pload(data1),s)); + VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertfirst"); + } + + if (PacketTraits::HasBlend || g_vectorize_sse) { + // pinsertlast + for (int i=0; i(); + ref[PacketSize-1] = s; + internal::pstore(data2, internal::pinsertlast(internal::pload(data1),s)); + VERIFY(areApprox(ref, data2, PacketSize) && "internal::pinsertlast"); + } } template void packetmath_real() { using std::abs; - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; const int size = PacketSize*4; - EIGEN_ALIGN16 Scalar data1[internal::packet_traits::size*4]; - EIGEN_ALIGN16 Scalar data2[internal::packet_traits::size*4]; - EIGEN_ALIGN16 Scalar ref[internal::packet_traits::size*4]; + EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; for (int i=0; i(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); data2[i] = internal::random(-1,1) * std::pow(Scalar(10), internal::random(-3,3)); } - CHECK_CWISE1_IF(internal::packet_traits::HasSin, std::sin, internal::psin); - CHECK_CWISE1_IF(internal::packet_traits::HasCos, std::cos, internal::pcos); - CHECK_CWISE1_IF(internal::packet_traits::HasTan, std::tan, internal::ptan); - + CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin); + CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos); + CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan); + + CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround); + CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil); + CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor); + for (int i=0; i(-1,1); data2[i] = internal::random(-1,1); } - CHECK_CWISE1_IF(internal::packet_traits::HasASin, std::asin, internal::pasin); - CHECK_CWISE1_IF(internal::packet_traits::HasACos, std::acos, internal::pacos); + CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin); + CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos); for (int i=0; i(-87,88); data2[i] = internal::random(-87,88); } - CHECK_CWISE1_IF(internal::packet_traits::HasExp, std::exp, internal::pexp); + CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp); + for (int i=0; i(-1,1) * std::pow(Scalar(10), internal::random(-6,6)); + data2[i] = internal::random(-1,1) * std::pow(Scalar(10), internal::random(-6,6)); + } + CHECK_CWISE1_IF(PacketTraits::HasTanh, std::tanh, internal::ptanh); + if(PacketTraits::HasExp && PacketTraits::size>=2) + { + data1[0] = std::numeric_limits::quiet_NaN(); + data1[1] = std::numeric_limits::epsilon(); + packet_helper h; + h.store(data2, internal::pexp(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + VERIFY_IS_EQUAL(std::exp(std::numeric_limits::epsilon()), data2[1]); + + data1[0] = -std::numeric_limits::epsilon(); + data1[1] = 0; + h.store(data2, internal::pexp(h.load(data1))); + VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::epsilon()), data2[0]); + VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]); + + data1[0] = (std::numeric_limits::min)(); + data1[1] = -(std::numeric_limits::min)(); + h.store(data2, internal::pexp(h.load(data1))); + VERIFY_IS_EQUAL(std::exp((std::numeric_limits::min)()), data2[0]); + VERIFY_IS_EQUAL(std::exp(-(std::numeric_limits::min)()), data2[1]); + + data1[0] = std::numeric_limits::denorm_min(); + data1[1] = -std::numeric_limits::denorm_min(); + h.store(data2, internal::pexp(h.load(data1))); + VERIFY_IS_EQUAL(std::exp(std::numeric_limits::denorm_min()), data2[0]); + VERIFY_IS_EQUAL(std::exp(-std::numeric_limits::denorm_min()), data2[1]); + } + + if (PacketTraits::HasTanh) { + // NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details. + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasTanh,Packet> h; + h.store(data2, internal::ptanh(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } + +#if EIGEN_HAS_C99_MATH + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasLGamma,Packet> h; + h.store(data2, internal::plgamma(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } { data1[0] = std::numeric_limits::quiet_NaN(); - packet_helper::HasExp,Packet> h; - h.store(data2, internal::pexp(h.load(data1))); - VERIFY(isNaN(data2[0])); + packet_helper::HasErf,Packet> h; + h.store(data2, internal::perf(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); } + { + data1[0] = std::numeric_limits::quiet_NaN(); + packet_helper::HasErfc,Packet> h; + h.store(data2, internal::perfc(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + } +#endif // EIGEN_HAS_C99_MATH for (int i=0; i(0,1) * std::pow(Scalar(10), internal::random(-6,6)); data2[i] = internal::random(0,1) * std::pow(Scalar(10), internal::random(-6,6)); } - if(internal::random(0,1)<0.1) + + if(internal::random(0,1)<0.1f) data1[internal::random(0, PacketSize)] = 0; - CHECK_CWISE1_IF(internal::packet_traits::HasSqrt, std::sqrt, internal::psqrt); - CHECK_CWISE1_IF(internal::packet_traits::HasLog, std::log, internal::plog); + CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); + CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); +#if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L) + CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p); + CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); + CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); + CHECK_CWISE1_IF(internal::packet_traits::HasErfc, std::erfc, internal::perfc); +#endif + + if(PacketTraits::HasLog && PacketTraits::size>=2) { data1[0] = std::numeric_limits::quiet_NaN(); - packet_helper::HasLog,Packet> h; + data1[1] = std::numeric_limits::epsilon(); + packet_helper h; h.store(data2, internal::plog(h.load(data1))); - VERIFY(isNaN(data2[0])); - data1[0] = -1.0f; + VERIFY((numext::isnan)(data2[0])); + VERIFY_IS_EQUAL(std::log(std::numeric_limits::epsilon()), data2[1]); + + data1[0] = -std::numeric_limits::epsilon(); + data1[1] = 0; + h.store(data2, internal::plog(h.load(data1))); + VERIFY((numext::isnan)(data2[0])); + VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]); + + data1[0] = (std::numeric_limits::min)(); + data1[1] = -(std::numeric_limits::min)(); + h.store(data2, internal::plog(h.load(data1))); + VERIFY_IS_EQUAL(std::log((std::numeric_limits::min)()), data2[0]); + VERIFY((numext::isnan)(data2[1])); + + data1[0] = std::numeric_limits::denorm_min(); + data1[1] = -std::numeric_limits::denorm_min(); + h.store(data2, internal::plog(h.load(data1))); + // VERIFY_IS_EQUAL(std::log(std::numeric_limits::denorm_min()), data2[0]); + VERIFY((numext::isnan)(data2[1])); + + data1[0] = Scalar(-1.0f); h.store(data2, internal::plog(h.load(data1))); - VERIFY(isNaN(data2[0])); -#if !EIGEN_FAST_MATH + VERIFY((numext::isnan)(data2[0])); h.store(data2, internal::psqrt(h.load(data1))); - VERIFY(isNaN(data2[0])); - VERIFY(isNaN(data2[1])); -#endif + VERIFY((numext::isnan)(data2[0])); + VERIFY((numext::isnan)(data2[1])); } } template void packetmath_notcomplex() { using std::abs; - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; + + EIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; - EIGEN_ALIGN16 Scalar data1[internal::packet_traits::size*4]; - EIGEN_ALIGN16 Scalar data2[internal::packet_traits::size*4]; - EIGEN_ALIGN16 Scalar ref[internal::packet_traits::size*4]; - - Array::Map(data1, internal::packet_traits::size*4).setRandom(); + Array::Map(data1, PacketTraits::size*4).setRandom(); ref[0] = data1[0]; for (int i=0; i(data1))) && "internal::predux_min"); - CHECK_CWISE2((std::min), internal::pmin); - CHECK_CWISE2((std::max), internal::pmax); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin); + VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax); + + CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin); + CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax); CHECK_CWISE1(abs, internal::pabs); ref[0] = data1[0]; for (int i=0; i(data1))) && "internal::predux_max"); - + for (int i=0; i(data1[0])); VERIFY(areApprox(ref, data2, PacketSize) && "internal::plset"); } template void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval) { - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; - + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; + internal::conj_if cj0; internal::conj_if cj1; internal::conj_helper cj; internal::conj_helper pcj; - + for(int i=0;i void test_conj_helper(Scalar } internal::pstore(pval,pcj.pmul(internal::pload(data1),internal::pload(data2))); VERIFY(areApprox(ref, pval, PacketSize) && "conj_helper pmul"); - + for(int i=0;i(data1),internal::pload(data2),internal::pload(pval))); VERIFY(areApprox(ref, pval, PacketSize) && "conj_helper pmadd"); } template void packetmath_complex() { - typedef typename internal::packet_traits::type Packet; - const int PacketSize = internal::packet_traits::size; + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + const int PacketSize = PacketTraits::size; const int size = PacketSize*4; - EIGEN_ALIGN16 Scalar data1[PacketSize*4]; - EIGEN_ALIGN16 Scalar data2[PacketSize*4]; - EIGEN_ALIGN16 Scalar ref[PacketSize*4]; - EIGEN_ALIGN16 Scalar pval[PacketSize*4]; + EIGEN_ALIGN_MAX Scalar data1[PacketSize*4]; + EIGEN_ALIGN_MAX Scalar data2[PacketSize*4]; + EIGEN_ALIGN_MAX Scalar ref[PacketSize*4]; + EIGEN_ALIGN_MAX Scalar pval[PacketSize*4]; for (int i=0; i() * Scalar(1e2); data2[i] = internal::random() * Scalar(1e2); } - + test_conj_helper (data1,data2,ref,pval); test_conj_helper (data1,data2,ref,pval); test_conj_helper (data1,data2,ref,pval); test_conj_helper (data1,data2,ref,pval); - + { for(int i=0;i(data1))); VERIFY(areApprox(ref, pval, PacketSize) && "pcplxflip"); } - - +} + +template void packetmath_scatter_gather() +{ + typedef internal::packet_traits PacketTraits; + typedef typename PacketTraits::type Packet; + typedef typename NumTraits::Real RealScalar; + const int PacketSize = PacketTraits::size; + EIGEN_ALIGN_MAX Scalar data1[PacketSize]; + RealScalar refvalue = 0; + for (int i=0; i()/RealScalar(PacketSize); + } + + int stride = internal::random(1,20); + + EIGEN_ALIGN_MAX Scalar buffer[PacketSize*20]; + memset(buffer, 0, 20*PacketSize*sizeof(Scalar)); + Packet packet = internal::pload(data1); + internal::pscatter(buffer, packet, stride); + + for (int i = 0; i < PacketSize*20; ++i) { + if ((i%stride) == 0 && i()/RealScalar(PacketSize); + } + packet = internal::pgather(buffer, 7); + internal::pstore(data1, packet); + for (int i = 0; i < PacketSize; ++i) { + VERIFY(isApproxAbs(data1[i], buffer[i*7], refvalue) && "pgather"); + } } void test_packetmath() @@ -370,17 +619,23 @@ void test_packetmath() CALL_SUBTEST_1( packetmath() ); CALL_SUBTEST_2( packetmath() ); CALL_SUBTEST_3( packetmath() ); - CALL_SUBTEST_1( packetmath >() ); - CALL_SUBTEST_2( packetmath >() ); + CALL_SUBTEST_4( packetmath >() ); + CALL_SUBTEST_5( packetmath >() ); CALL_SUBTEST_1( packetmath_notcomplex() ); CALL_SUBTEST_2( packetmath_notcomplex() ); CALL_SUBTEST_3( packetmath_notcomplex() ); - + CALL_SUBTEST_1( packetmath_real() ); CALL_SUBTEST_2( packetmath_real() ); - CALL_SUBTEST_1( packetmath_complex >() ); - CALL_SUBTEST_2( packetmath_complex >() ); + CALL_SUBTEST_4( packetmath_complex >() ); + CALL_SUBTEST_5( packetmath_complex >() ); + + CALL_SUBTEST_1( packetmath_scatter_gather() ); + CALL_SUBTEST_2( packetmath_scatter_gather() ); + CALL_SUBTEST_3( packetmath_scatter_gather() ); + CALL_SUBTEST_4( packetmath_scatter_gather >() ); + CALL_SUBTEST_5( packetmath_scatter_gather >() ); } } diff --git a/external/eigen3/test/pastix_support.cpp b/external/eigen3/test/pastix_support.cpp index 14da094..b62f857 100644 --- a/external/eigen3/test/pastix_support.cpp +++ b/external/eigen3/test/pastix_support.cpp @@ -7,6 +7,8 @@ // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS #include "sparse_solver.h" #include #include @@ -25,6 +27,14 @@ template void test_pastix_T() check_sparse_spd_solving(pastix_llt_upper); check_sparse_spd_solving(pastix_ldlt_upper); check_sparse_square_solving(pastix_lu); + + // Some compilation check: + pastix_llt_lower.iparm(); + pastix_llt_lower.dparm(); + pastix_ldlt_lower.iparm(); + pastix_ldlt_lower.dparm(); + pastix_lu.iparm(); + pastix_lu.dparm(); } // There is no support for selfadjoint matrices with PaStiX. diff --git a/external/eigen3/test/permutationmatrices.cpp b/external/eigen3/test/permutationmatrices.cpp index 7b0dbc7..db12665 100644 --- a/external/eigen3/test/permutationmatrices.cpp +++ b/external/eigen3/test/permutationmatrices.cpp @@ -7,6 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_ENABLE_TEMPORARY_TRACKING + #include "main.h" using namespace std; @@ -33,7 +35,9 @@ template void permutationmatrices(const MatrixType& m) RightPermutationVectorType rv; randomPermutationVector(rv, cols); RightPermutationType rp(rv); - MatrixType m_permuted = lp * m_original * rp; + MatrixType m_permuted = MatrixType::Random(rows,cols); + + VERIFY_EVALUATION_COUNT(m_permuted = lp * m_original * rp, 1); // 1 temp for sub expression "lp * m_original" for (int i=0; i void permutationmatrices(const MatrixType& m) Matrix rm(rp); VERIFY_IS_APPROX(m_permuted, lm*m_original*rm); - + + m_permuted = m_original; + VERIFY_EVALUATION_COUNT(m_permuted = lp * m_permuted * rp, 1); + VERIFY_IS_APPROX(m_permuted, lm*m_original*rm); + VERIFY_IS_APPROX(lp.inverse()*m_permuted*rp.inverse(), m_original); VERIFY_IS_APPROX(lv.asPermutation().inverse()*m_permuted*rv.asPermutation().inverse(), m_original); VERIFY_IS_APPROX(MapLeftPerm(lv.data(),lv.size()).inverse()*m_permuted*MapRightPerm(rv.data(),rv.size()).inverse(), m_original); @@ -63,22 +71,22 @@ template void permutationmatrices(const MatrixType& m) LeftPermutationType identityp; identityp.setIdentity(rows); VERIFY_IS_APPROX(m_original, identityp*m_original); - + // check inplace permutations m_permuted = m_original; - m_permuted = lp.inverse() * m_permuted; + VERIFY_EVALUATION_COUNT(m_permuted.noalias()= lp.inverse() * m_permuted, 1); // 1 temp to allocate the mask VERIFY_IS_APPROX(m_permuted, lp.inverse()*m_original); - + m_permuted = m_original; - m_permuted = m_permuted * rp.inverse(); + VERIFY_EVALUATION_COUNT(m_permuted.noalias() = m_permuted * rp.inverse(), 1); // 1 temp to allocate the mask VERIFY_IS_APPROX(m_permuted, m_original*rp.inverse()); - + m_permuted = m_original; - m_permuted = lp * m_permuted; + VERIFY_EVALUATION_COUNT(m_permuted.noalias() = lp * m_permuted, 1); // 1 temp to allocate the mask VERIFY_IS_APPROX(m_permuted, lp*m_original); - + m_permuted = m_original; - m_permuted = m_permuted * rp; + VERIFY_EVALUATION_COUNT(m_permuted.noalias() = m_permuted * rp, 1); // 1 temp to allocate the mask VERIFY_IS_APPROX(m_permuted, m_original*rp); if(rows>1 && cols>1) @@ -99,7 +107,38 @@ template void permutationmatrices(const MatrixType& m) rm = rp; rm.col(i).swap(rm.col(j)); VERIFY_IS_APPROX(rm, rp2.toDenseMatrix().template cast()); - } + } + + { + // simple compilation check + Matrix A = rp; + Matrix B = rp.transpose(); + VERIFY_IS_APPROX(A, B.transpose()); + } +} + +template +void bug890() +{ + typedef Matrix MatrixType; + typedef Matrix VectorType; + typedef Stride S; + typedef Map MapType; + typedef PermutationMatrix Perm; + + VectorType v1(2), v2(2), op(4), rhs(2); + v1 << 666,667; + op << 1,0,0,1; + rhs << 42,42; + + Perm P(2); + P.indices() << 1, 0; + + MapType(v1.data(),2,1,S(1,1)) = P * MapType(rhs.data(),2,1,S(1,1)); + VERIFY_IS_APPROX(v1, (P * rhs).eval()); + + MapType(v1.data(),2,1,S(1,1)) = P.inverse() * MapType(rhs.data(),2,1,S(1,1)); + VERIFY_IS_APPROX(v1, (P.inverse() * rhs).eval()); } void test_permutationmatrices() @@ -113,4 +152,5 @@ void test_permutationmatrices() CALL_SUBTEST_6( permutationmatrices(Matrix(20, 30)) ); CALL_SUBTEST_7( permutationmatrices(MatrixXcf(15, 10)) ); } + CALL_SUBTEST_5( bug890() ); } diff --git a/external/eigen3/test/product.h b/external/eigen3/test/product.h index 397af24..3b65112 100644 --- a/external/eigen3/test/product.h +++ b/external/eigen3/test/product.h @@ -22,7 +22,6 @@ template void product(const MatrixType& m) /* this test covers the following files: Identity.h Product.h */ - typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; typedef Matrix RowVectorType; typedef Matrix ColVectorType; @@ -112,6 +111,23 @@ template void product(const MatrixType& m) vcres.noalias() -= m1.transpose() * v1; VERIFY_IS_APPROX(vcres, vc2 - m1.transpose() * v1); + // test d ?= a+b*c rules + res.noalias() = square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + res.noalias() += square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, 2*(square + m1 * m2.transpose())); + res.noalias() -= square + m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square + m1 * m2.transpose()); + + // test d ?= a-b*c rules + res.noalias() = square - m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square - m1 * m2.transpose()); + res.noalias() += square - m1 * m2.transpose(); + VERIFY_IS_APPROX(res, 2*(square - m1 * m2.transpose())); + res.noalias() -= square - m1 * m2.transpose(); + VERIFY_IS_APPROX(res, square - m1 * m2.transpose()); + + tm1 = m1; VERIFY_IS_APPROX(tm1.transpose() * v1, m1.transpose() * v1); VERIFY_IS_APPROX(v1.transpose() * tm1, v1.transpose() * m1); @@ -152,19 +168,44 @@ template void product(const MatrixType& m) VERIFY_IS_APPROX(res2.block(0,0,1,cols).noalias() = m1.block(0,0,1,cols) * square2, (ref2.row(0) = m1.row(0) * square2)); } + // vector.block() (see bug 1283) + { + RowVectorType w1(rows); + VERIFY_IS_APPROX(square * v1.block(0,0,rows,1), square * v1); + VERIFY_IS_APPROX(w1.noalias() = square * v1.block(0,0,rows,1), square * v1); + VERIFY_IS_APPROX(w1.block(0,0,rows,1).noalias() = square * v1.block(0,0,rows,1), square * v1); + + Matrix w2(cols); + VERIFY_IS_APPROX(vc2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.noalias() = vc2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.block(0,0,1,cols).noalias() = vc2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + + vc2 = square2.block(0,0,1,cols).transpose(); + VERIFY_IS_APPROX(square2.block(0,0,1,cols) * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.noalias() = square2.block(0,0,1,cols) * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.block(0,0,1,cols).noalias() = square2.block(0,0,1,cols) * square2, vc2.transpose() * square2); + + vc2 = square2.block(0,0,cols,1); + VERIFY_IS_APPROX(square2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.noalias() = square2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + VERIFY_IS_APPROX(w2.block(0,0,1,cols).noalias() = square2.block(0,0,cols,1).transpose() * square2, vc2.transpose() * square2); + } + // inner product { Scalar x = square2.row(c) * square2.col(c2); VERIFY_IS_APPROX(x, square2.row(c).transpose().cwiseProduct(square2.col(c2)).sum()); } - + // outer product - VERIFY_IS_APPROX(m1.col(c) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols)); - VERIFY_IS_APPROX(m1.row(r).transpose() * m1.col(c).transpose(), m1.block(r,0,1,cols).transpose() * m1.block(0,c,rows,1).transpose()); - VERIFY_IS_APPROX(m1.block(0,c,rows,1) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols)); - VERIFY_IS_APPROX(m1.col(c) * m1.block(r,0,1,cols), m1.block(0,c,rows,1) * m1.block(r,0,1,cols)); - VERIFY_IS_APPROX(m1.leftCols(1) * m1.row(r), m1.block(0,0,rows,1) * m1.block(r,0,1,cols)); - VERIFY_IS_APPROX(m1.col(c) * m1.topRows(1), m1.block(0,c,rows,1) * m1.block(0,0,1,cols)); + { + VERIFY_IS_APPROX(m1.col(c) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols)); + VERIFY_IS_APPROX(m1.row(r).transpose() * m1.col(c).transpose(), m1.block(r,0,1,cols).transpose() * m1.block(0,c,rows,1).transpose()); + VERIFY_IS_APPROX(m1.block(0,c,rows,1) * m1.row(r), m1.block(0,c,rows,1) * m1.block(r,0,1,cols)); + VERIFY_IS_APPROX(m1.col(c) * m1.block(r,0,1,cols), m1.block(0,c,rows,1) * m1.block(r,0,1,cols)); + VERIFY_IS_APPROX(m1.leftCols(1) * m1.row(r), m1.block(0,0,rows,1) * m1.block(r,0,1,cols)); + VERIFY_IS_APPROX(m1.col(c) * m1.topRows(1), m1.block(0,c,rows,1) * m1.block(0,0,1,cols)); + } // Aliasing { @@ -186,4 +227,5 @@ template void product(const MatrixType& m) VERIFY_IS_APPROX(square * (s1*(square*square)), s1 * square * square * square); VERIFY_IS_APPROX(square * (square*square).conjugate(), square * square.conjugate() * square.conjugate()); } + } diff --git a/external/eigen3/test/product_extra.cpp b/external/eigen3/test/product_extra.cpp index ea24869..e2b855b 100644 --- a/external/eigen3/test/product_extra.cpp +++ b/external/eigen3/test/product_extra.cpp @@ -98,6 +98,16 @@ template void product_extra(const MatrixType& m) // regression test MatrixType tmp = m1 * m1.adjoint() * s1; VERIFY_IS_APPROX(tmp, m1 * m1.adjoint() * s1); + + // regression test for bug 1343, assignment to arrays + Array a1 = m1 * vc2; + VERIFY_IS_APPROX(a1.matrix(),m1*vc2); + Array a2 = s1 * (m1 * vc2); + VERIFY_IS_APPROX(a2.matrix(),s1*m1*vc2); + Array a3 = v1 * m1; + VERIFY_IS_APPROX(a3.matrix(),v1*m1); + Array a4 = m1 * m2.adjoint(); + VERIFY_IS_APPROX(a4.matrix(),m1*m2.adjoint()); } // Regression test for bug reported at http://forum.kde.org/viewtopic.php?f=74&t=96947 @@ -116,8 +126,8 @@ void zero_sized_objects(const MatrixType& m) typedef typename MatrixType::Scalar Scalar; const int PacketSize = internal::packet_traits::size; const int PacketSize1 = PacketSize>1 ? PacketSize-1 : 1; - DenseIndex rows = m.rows(); - DenseIndex cols = m.cols(); + Index rows = m.rows(); + Index cols = m.cols(); { MatrixType res, a(rows,0), b(0,cols); @@ -169,6 +179,7 @@ void zero_sized_objects(const MatrixType& m) } } +template void bug_127() { // Bug 127 @@ -193,6 +204,16 @@ void bug_127() a*b; } +template void bug_817() +{ + ArrayXXf B = ArrayXXf::Random(10,10), C; + VectorXf x = VectorXf::Random(10); + C = (x.transpose()*B.matrix()); + B = (x.transpose()*B.matrix()); + VERIFY_IS_APPROX(B,C); +} + +template void unaligned_objects() { // Regression test for the bug reported here: @@ -222,6 +243,116 @@ void unaligned_objects() } } +template +EIGEN_DONT_INLINE +Index test_compute_block_size(Index m, Index n, Index k) +{ + Index mc(m), nc(n), kc(k); + internal::computeProductBlockingSizes(kc, mc, nc); + return kc+mc+nc; +} + +template +Index compute_block_size() +{ + Index ret = 0; + ret += test_compute_block_size(0,1,1); + ret += test_compute_block_size(1,0,1); + ret += test_compute_block_size(1,1,0); + ret += test_compute_block_size(0,0,1); + ret += test_compute_block_size(0,1,0); + ret += test_compute_block_size(1,0,0); + ret += test_compute_block_size(0,0,0); + return ret; +} + +template +void aliasing_with_resize() +{ + Index m = internal::random(10,50); + Index n = internal::random(10,50); + MatrixXd A, B, C(m,n), D(m,m); + VectorXd a, b, c(n); + C.setRandom(); + D.setRandom(); + c.setRandom(); + double s = internal::random(1,10); + + A = C; + B = A * A.transpose(); + A = A * A.transpose(); + VERIFY_IS_APPROX(A,B); + + A = C; + B = (A * A.transpose())/s; + A = (A * A.transpose())/s; + VERIFY_IS_APPROX(A,B); + + A = C; + B = (A * A.transpose()) + D; + A = (A * A.transpose()) + D; + VERIFY_IS_APPROX(A,B); + + A = C; + B = D + (A * A.transpose()); + A = D + (A * A.transpose()); + VERIFY_IS_APPROX(A,B); + + A = C; + B = s * (A * A.transpose()); + A = s * (A * A.transpose()); + VERIFY_IS_APPROX(A,B); + + A = C; + a = c; + b = (A * a)/s; + a = (A * a)/s; + VERIFY_IS_APPROX(a,b); +} + +template +void bug_1308() +{ + int n = 10; + MatrixXd r(n,n); + VectorXd v = VectorXd::Random(n); + r = v * RowVectorXd::Ones(n); + VERIFY_IS_APPROX(r, v.rowwise().replicate(n)); + r = VectorXd::Ones(n) * v.transpose(); + VERIFY_IS_APPROX(r, v.rowwise().replicate(n).transpose()); + + Matrix4d ones44 = Matrix4d::Ones(); + Matrix4d m44 = Matrix4d::Ones() * Matrix4d::Ones(); + VERIFY_IS_APPROX(m44,Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(m44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + + typedef Matrix RMatrix4d; + RMatrix4d r44 = Matrix4d::Ones() * Matrix4d::Ones(); + VERIFY_IS_APPROX(r44,Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*Matrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=Matrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44*RMatrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=ones44.transpose()*RMatrix4d::Ones(), Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44, Matrix4d::Constant(4)); + VERIFY_IS_APPROX(r44.noalias()=RMatrix4d::Ones()*ones44.transpose(), Matrix4d::Constant(4)); + +// RowVector4d r4; + m44.setOnes(); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += m44.row(0).transpose() * RowVector4d::Ones(), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += m44.col(0) * RowVector4d::Ones(), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.row(0), ones44); + r44.setZero(); + VERIFY_IS_APPROX(r44.noalias() += Vector4d::Ones() * m44.col(0).transpose(), ones44); +} + void test_product_extra() { for(int i = 0; i < g_repeat; i++) { @@ -232,6 +363,13 @@ void test_product_extra() CALL_SUBTEST_4( product_extra(MatrixXcd(internal::random(1,EIGEN_TEST_MAX_SIZE/2), internal::random(1,EIGEN_TEST_MAX_SIZE/2))) ); CALL_SUBTEST_1( zero_sized_objects(MatrixXf(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } - CALL_SUBTEST_5( bug_127() ); - CALL_SUBTEST_6( unaligned_objects() ); + CALL_SUBTEST_5( bug_127<0>() ); + CALL_SUBTEST_5( bug_817<0>() ); + CALL_SUBTEST_5( bug_1308<0>() ); + CALL_SUBTEST_6( unaligned_objects<0>() ); + CALL_SUBTEST_7( compute_block_size() ); + CALL_SUBTEST_7( compute_block_size() ); + CALL_SUBTEST_7( compute_block_size >() ); + CALL_SUBTEST_8( aliasing_with_resize() ); + } diff --git a/external/eigen3/test/product_large.cpp b/external/eigen3/test/product_large.cpp index 6bb4d1a..845cd40 100644 --- a/external/eigen3/test/product_large.cpp +++ b/external/eigen3/test/product_large.cpp @@ -21,12 +21,12 @@ void test_aliasing() VectorType y(rows); y.setZero(); MatrixType A(rows,cols); A.setRandom(); // CwiseBinaryOp - VERIFY_IS_APPROX(x = y + A*x, A*z); + VERIFY_IS_APPROX(x = y + A*x, A*z); // OK because "y + A*x" is marked as "assume-aliasing" x = z; // CwiseUnaryOp - VERIFY_IS_APPROX(x = T(1.)*(A*x), A*z); + VERIFY_IS_APPROX(x = T(1.)*(A*x), A*z); // OK because 1*(A*x) is replaced by (1*A*x) which is a Product<> expression x = z; - VERIFY_IS_APPROX(x = y+(-(A*x)), -A*z); + // VERIFY_IS_APPROX(x = y-A*x, -A*z); // Not OK in 3.3 because x is resized before A*x gets evaluated x = z; } @@ -62,15 +62,16 @@ void test_product_large() // check the functions to setup blocking sizes compile and do not segfault // FIXME check they do what they are supposed to do !! std::ptrdiff_t l1 = internal::random(10000,20000); - std::ptrdiff_t l2 = internal::random(1000000,2000000); - setCpuCacheSizes(l1,l2); + std::ptrdiff_t l2 = internal::random(100000,200000); + std::ptrdiff_t l3 = internal::random(1000000,2000000); + setCpuCacheSizes(l1,l2,l3); VERIFY(l1==l1CacheSize()); VERIFY(l2==l2CacheSize()); std::ptrdiff_t k1 = internal::random(10,100)*16; std::ptrdiff_t m1 = internal::random(10,100)*16; std::ptrdiff_t n1 = internal::random(10,100)*16; // only makes sure it compiles fine - internal::computeProductBlockingSizes(k1,m1,n1); + internal::computeProductBlockingSizes(k1,m1,n1,1); } { @@ -83,5 +84,24 @@ void test_product_large() MatrixXf r2 = mat1.row(2)*mat2; VERIFY_IS_APPROX(r2, (mat1.row(2)*mat2).eval()); } + + { + Eigen::MatrixXd A(10,10), B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } +#endif + + // Regression test for bug 714: +#if defined EIGEN_HAS_OPENMP + omp_set_dynamic(1); + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_6( product(Matrix(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + } #endif } diff --git a/external/eigen3/test/product_mmtr.cpp b/external/eigen3/test/product_mmtr.cpp index aeba009..d3e24b0 100644 --- a/external/eigen3/test/product_mmtr.cpp +++ b/external/eigen3/test/product_mmtr.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2010 Gael Guennebaud +// Copyright (C) 2010-2017 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -10,11 +10,19 @@ #include "main.h" #define CHECK_MMTR(DEST, TRI, OP) { \ + ref3 = DEST; \ ref2 = ref1 = DEST; \ DEST.template triangularView() OP; \ ref1 OP; \ - ref2.template triangularView() = ref1; \ + ref2.template triangularView() \ + = ref1.template triangularView(); \ VERIFY_IS_APPROX(DEST,ref2); \ + \ + DEST = ref3; \ + ref3 = ref2; \ + ref3.diagonal() = DEST.diagonal(); \ + DEST.template triangularView() OP; \ + VERIFY_IS_APPROX(DEST,ref3); \ } template void mmtr(int size) @@ -26,12 +34,14 @@ template void mmtr(int size) MatrixColMaj matc = MatrixColMaj::Zero(size, size); MatrixRowMaj matr = MatrixRowMaj::Zero(size, size); - MatrixColMaj ref1(size, size), ref2(size, size); + MatrixColMaj ref1(size, size), ref2(size, size), ref3(size,size); MatrixColMaj soc(size,othersize); soc.setRandom(); MatrixColMaj osc(othersize,size); osc.setRandom(); MatrixRowMaj sor(size,othersize); sor.setRandom(); MatrixRowMaj osr(othersize,size); osr.setRandom(); + MatrixColMaj sqc(size,size); sqc.setRandom(); + MatrixRowMaj sqr(size,size); sqr.setRandom(); Scalar s = internal::random(); @@ -49,6 +59,29 @@ template void mmtr(int size) CHECK_MMTR(matc, Upper, -= s*(osc.transpose()*osc.conjugate())); CHECK_MMTR(matr, Lower, -= s*soc*soc.adjoint()); CHECK_MMTR(matr, Upper, -= soc*(s*soc.adjoint())); + + CHECK_MMTR(matc, Lower, -= s*sqr*sqc.template triangularView()); + CHECK_MMTR(matc, Upper, = s*sqc*sqr.template triangularView()); + CHECK_MMTR(matc, Lower, += s*sqr*sqc.template triangularView()); + CHECK_MMTR(matc, Upper, = s*sqc*sqc.template triangularView()); + + CHECK_MMTR(matc, Lower, = (s*sqr).template triangularView()*sqc); + CHECK_MMTR(matc, Upper, -= (s*sqc).template triangularView()*sqc); + CHECK_MMTR(matc, Lower, = (s*sqr).template triangularView()*sqc); + CHECK_MMTR(matc, Upper, += (s*sqc).template triangularView()*sqc); + + // check aliasing + ref2 = ref1 = matc; + ref1 = sqc.adjoint() * matc * sqc; + ref2.template triangularView() = ref1.template triangularView(); + matc.template triangularView() = sqc.adjoint() * matc * sqc; + VERIFY_IS_APPROX(matc, ref2); + + ref2 = ref1 = matc; + ref1 = sqc * matc * sqc.adjoint(); + ref2.template triangularView() = ref1.template triangularView(); + matc.template triangularView() = sqc * matc * sqc.adjoint(); + VERIFY_IS_APPROX(matc, ref2); } void test_product_mmtr() diff --git a/external/eigen3/test/product_notemporary.cpp b/external/eigen3/test/product_notemporary.cpp index 5599d26..30592b7 100644 --- a/external/eigen3/test/product_notemporary.cpp +++ b/external/eigen3/test/product_notemporary.cpp @@ -7,25 +7,10 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -static int nb_temporaries; - -inline void on_temporary_creation(int size) { - // here's a great place to set a breakpoint when debugging failures in this test! - if(size!=0) nb_temporaries++; -} - - -#define EIGEN_DENSE_STORAGE_CTOR_PLUGIN { on_temporary_creation(size); } +#define TEST_ENABLE_TEMPORARY_TRACKING #include "main.h" -#define VERIFY_EVALUATION_COUNT(XPR,N) {\ - nb_temporaries = 0; \ - XPR; \ - if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ - VERIFY( (#XPR) && nb_temporaries==N ); \ - } - template void product_notemporary(const MatrixType& m) { /* This test checks the number of temporaries created @@ -62,14 +47,19 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( m3.noalias() = m1 * m2.adjoint(), 0); VERIFY_EVALUATION_COUNT( m3 = s1 * (m1 * m2.transpose()), 1); - VERIFY_EVALUATION_COUNT( m3 = m3 + s1 * (m1 * m2.transpose()), 1); +// VERIFY_EVALUATION_COUNT( m3 = m3 + s1 * (m1 * m2.transpose()), 1); VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * (m1 * m2.transpose()), 0); VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()), 1); + VERIFY_EVALUATION_COUNT( m3 = m3 - (m1 * m2.adjoint()), 1); + VERIFY_EVALUATION_COUNT( m3 = m3 + (m1 * m2.adjoint()).transpose(), 1); - VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 1); // 0 in 3.3 - VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 1); // 0 in 3.3 - VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 1); // 0 in 3.3 + VERIFY_EVALUATION_COUNT( m3.noalias() = m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() += m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 + m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() = m3 - m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() += m3 - m1 * m2.transpose(), 0); + VERIFY_EVALUATION_COUNT( m3.noalias() -= m3 - m1 * m2.transpose(), 0); VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * m2.adjoint(), 0); VERIFY_EVALUATION_COUNT( m3.noalias() = s1 * m1 * s2 * (m1*s3+m2*s2).adjoint(), 1); @@ -86,7 +76,7 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( m3.noalias() -= (s1 * m1).template triangularView() * m2, 0); VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView() * (m2+m2), 1); VERIFY_EVALUATION_COUNT( rm3.noalias() = (s1 * m1.adjoint()).template triangularView() * m2.adjoint(), 0); - + VERIFY_EVALUATION_COUNT( m3.template triangularView() = (m1 * m2.adjoint()), 0); VERIFY_EVALUATION_COUNT( m3.template triangularView() -= (m1 * m2.adjoint()), 0); @@ -123,8 +113,7 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( Scalar tmp = 0; tmp += Scalar(RealScalar(1)) / (m3.transpose() * m3).diagonal().array().abs().sum(), 0 ); // Zero temporaries for ... CoeffBasedProductMode - // - does not work with GCC because of the <..>, we'ld need variadic macros ... - //VERIFY_EVALUATION_COUNT( m3.col(0).head<5>() * m3.col(0).transpose() + m3.col(0).head<5>() * m3.col(0).transpose(), 0 ); + VERIFY_EVALUATION_COUNT( m3.col(0).template head<5>() * m3.col(0).transpose() + m3.col(0).template head<5>() * m3.col(0).transpose(), 0 ); // Check matrix * vectors VERIFY_EVALUATION_COUNT( cvres.noalias() = m1 * cv1, 0 ); @@ -132,6 +121,26 @@ template void product_notemporary(const MatrixType& m) VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.col(0), 0 ); VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * rv1.adjoint(), 0 ); VERIFY_EVALUATION_COUNT( cvres.noalias() -= m1 * m2.row(0).transpose(), 0 ); + + VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * cv1, 0 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (m1+m1) * (m1*cv1), 1 ); + VERIFY_EVALUATION_COUNT( cvres.noalias() = (rm3+rm3) * (m1*cv1), 1 ); + + // Check outer products + m3 = cv1 * rv1; + VERIFY_EVALUATION_COUNT( m3.noalias() = cv1 * rv1, 0 ); + VERIFY_EVALUATION_COUNT( m3.noalias() = (cv1+cv1) * (rv1+rv1), 1 ); + VERIFY_EVALUATION_COUNT( m3.noalias() = (m1*cv1) * (rv1), 1 ); + VERIFY_EVALUATION_COUNT( m3.noalias() += (m1*cv1) * (rv1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (cv1) * (rv1 * m1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() -= (cv1) * (rv1 * m1), 1 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() = (m1*cv1) * (rv1 * m1), 2 ); + VERIFY_EVALUATION_COUNT( rm3.noalias() += (m1*cv1) * (rv1 * m1), 2 ); + + // Check nested products + VERIFY_EVALUATION_COUNT( cvres.noalias() = m1.adjoint() * m1 * cv1, 1 ); + VERIFY_EVALUATION_COUNT( rvres.noalias() = rv1 * (m1 * m2.adjoint()), 1 ); } void test_product_notemporary() @@ -140,11 +149,12 @@ void test_product_notemporary() for(int i = 0; i < g_repeat; i++) { s = internal::random(16,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_1( product_notemporary(MatrixXf(s, s)) ); - s = internal::random(16,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_2( product_notemporary(MatrixXd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(16,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_3( product_notemporary(MatrixXcf(s,s)) ); - s = internal::random(16,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_4( product_notemporary(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } } diff --git a/external/eigen3/test/product_selfadjoint.cpp b/external/eigen3/test/product_selfadjoint.cpp index 374e239..3d768aa 100644 --- a/external/eigen3/test/product_selfadjoint.cpp +++ b/external/eigen3/test/product_selfadjoint.cpp @@ -67,14 +67,21 @@ void test_product_selfadjoint() CALL_SUBTEST_1( product_selfadjoint(Matrix()) ); CALL_SUBTEST_2( product_selfadjoint(Matrix()) ); CALL_SUBTEST_3( product_selfadjoint(Matrix3d()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_4( product_selfadjoint(MatrixXcf(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_5( product_selfadjoint(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_6( product_selfadjoint(MatrixXd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_7( product_selfadjoint(Matrix(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } - TEST_SET_BUT_UNUSED_VARIABLE(s) } diff --git a/external/eigen3/test/product_small.cpp b/external/eigen3/test/product_small.cpp index 8b132ab..fdfdd9f 100644 --- a/external/eigen3/test/product_small.cpp +++ b/external/eigen3/test/product_small.cpp @@ -9,8 +9,10 @@ #define EIGEN_NO_STATIC_ASSERT #include "product.h" +#include // regression test for bug 447 +template void product1x1() { Matrix matAstatic; @@ -28,16 +30,237 @@ void product1x1() matAdynamic.cwiseProduct(matBdynamic.transpose()).sum() ); } +template +const TC& ref_prod(TC &C, const TA &A, const TB &B) +{ + for(Index i=0;i +typename internal::enable_if::type +test_lazy_single(int rows, int cols, int depth) +{ + Matrix A(rows,depth); A.setRandom(); + Matrix B(depth,cols); B.setRandom(); + Matrix C(rows,cols); C.setRandom(); + Matrix D(C); + VERIFY_IS_APPROX(C+=A.lazyProduct(B), ref_prod(D,A,B)); +} + +template +typename internal::enable_if< ( (Rows ==1&&Depth!=1&&OA==ColMajor) + || (Depth==1&&Rows !=1&&OA==RowMajor) + || (Cols ==1&&Depth!=1&&OB==RowMajor) + || (Depth==1&&Cols !=1&&OB==ColMajor) + || (Rows ==1&&Cols !=1&&OC==ColMajor) + || (Cols ==1&&Rows !=1&&OC==RowMajor)),void>::type +test_lazy_single(int, int, int) +{ +} + +template +void test_lazy_all_layout(int rows=Rows, int cols=Cols, int depth=Depth) +{ + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); + CALL_SUBTEST(( test_lazy_single(rows,cols,depth) )); +} + +template +void test_lazy_l1() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + + // Inner + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(1,1,depth) )); + + // Outer + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(4,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(7,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,cols) )); +} + +template +void test_lazy_l2() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + + // mat-vec + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(4,1,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,1,depth) )); + + // vec-mat + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(1,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(1,4,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(1,cols,depth) )); +} + +template +void test_lazy_l3() +{ + int rows = internal::random(1,12); + int cols = internal::random(1,12); + int depth = internal::random(1,12); + // mat-mat + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(rows) )); + CALL_SUBTEST(( test_lazy_all_layout(4,3,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(rows,6,depth) )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout() )); + CALL_SUBTEST(( test_lazy_all_layout(8,cols) )); + CALL_SUBTEST(( test_lazy_all_layout(3,4,depth) )); + CALL_SUBTEST(( test_lazy_all_layout(4,cols,depth) )); +} + +template +void test_linear_but_not_vectorizable() +{ + // Check tricky cases for which the result of the product is a vector and thus must exhibit the LinearBit flag, + // but is not vectorizable along the linear dimension. + Index n = N==Dynamic ? internal::random(1,32) : N; + Index m = M==Dynamic ? internal::random(1,32) : M; + Index k = K==Dynamic ? internal::random(1,32) : K; + + { + Matrix A; A.setRandom(n,m+1); + Matrix B; B.setRandom(m*2,k); + Matrix C; + Matrix R; + + C.noalias() = A.template topLeftCorner<1,M>() * (B.template topRows()+B.template bottomRows()); + R.noalias() = A.template topLeftCorner<1,M>() * (B.template topRows()+B.template bottomRows()).eval(); + VERIFY_IS_APPROX(C,R); + } + + { + Matrix A; A.setRandom(m+1,n); + Matrix B; B.setRandom(k,m*2); + Matrix C; + Matrix R; + + C.noalias() = (B.template leftCols()+B.template rightCols()) * A.template topLeftCorner(); + R.noalias() = (B.template leftCols()+B.template rightCols()).eval() * A.template topLeftCorner(); + VERIFY_IS_APPROX(C,R); + } +} + +template +void bug_1311() +{ + Matrix< double, Rows, 2 > A; A.setRandom(); + Vector2d b = Vector2d::Random() ; + Matrix res; + res.noalias() = 1. * (A * b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = 1.*A * b; + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (1.*A).lazyProduct(b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (1.*A).lazyProduct(1.*b); + VERIFY_IS_APPROX(res, A*b); + res.noalias() = (A).lazyProduct(1.*b); + VERIFY_IS_APPROX(res, A*b); +} void test_product_small() { for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( product(Matrix()) ); - CALL_SUBTEST_2( product(Matrix()) ); + CALL_SUBTEST_2( product(Matrix()) ); + CALL_SUBTEST_8( product(Matrix()) ); CALL_SUBTEST_3( product(Matrix3d()) ); CALL_SUBTEST_4( product(Matrix4d()) ); CALL_SUBTEST_5( product(Matrix4f()) ); - CALL_SUBTEST_6( product1x1() ); + CALL_SUBTEST_6( product1x1<0>() ); + + CALL_SUBTEST_11( test_lazy_l1() ); + CALL_SUBTEST_12( test_lazy_l2() ); + CALL_SUBTEST_13( test_lazy_l3() ); + + CALL_SUBTEST_21( test_lazy_l1() ); + CALL_SUBTEST_22( test_lazy_l2() ); + CALL_SUBTEST_23( test_lazy_l3() ); + + CALL_SUBTEST_31( test_lazy_l1 >() ); + CALL_SUBTEST_32( test_lazy_l2 >() ); + CALL_SUBTEST_33( test_lazy_l3 >() ); + + CALL_SUBTEST_41( test_lazy_l1 >() ); + CALL_SUBTEST_42( test_lazy_l2 >() ); + CALL_SUBTEST_43( test_lazy_l3 >() ); + + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + + CALL_SUBTEST_6( bug_1311<3>() ); + CALL_SUBTEST_6( bug_1311<5>() ); } #ifdef EIGEN_TEST_PART_6 @@ -46,5 +269,25 @@ void test_product_small() Vector3f v = Vector3f::Random(); VERIFY_IS_APPROX( (v * v.transpose()) * v, (v * v.transpose()).eval() * v); } + + { + // regression test for pull-request #93 + Eigen::Matrix A; A.setRandom(); + Eigen::Matrix B; B.setRandom(); + Eigen::Matrix C; C.setRandom(); + VERIFY_IS_APPROX(B * A.inverse(), B * A.inverse()[0]); + VERIFY_IS_APPROX(A.inverse() * C, A.inverse()[0] * C); + } + + { + Eigen::Matrix A, B, C; + A.setRandom(); + C = A; + for(int k=0; k<79; ++k) + C = C * A; + B.noalias() = (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))) + * (((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A)) * ((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))*((A*A)*(A*A))); + VERIFY_IS_APPROX(B,C); + } #endif } diff --git a/external/eigen3/test/product_symm.cpp b/external/eigen3/test/product_symm.cpp index 74d7329..8c44383 100644 --- a/external/eigen3/test/product_symm.cpp +++ b/external/eigen3/test/product_symm.cpp @@ -39,6 +39,24 @@ template void symm(int size = Size, in VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView() * (s2*rhs1), rhs13 = (s1*m1) * (s2*rhs1)); + VERIFY_IS_APPROX(rhs12 = (s1*m2).transpose().template selfadjointView() * (s2*rhs1), + rhs13 = (s1*m1.transpose()) * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView().transpose() * (s2*rhs1), + rhs13 = (s1*m1.transpose()) * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).conjugate().template selfadjointView() * (s2*rhs1), + rhs13 = (s1*m1).conjugate() * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView().conjugate() * (s2*rhs1), + rhs13 = (s1*m1).conjugate() * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).adjoint().template selfadjointView() * (s2*rhs1), + rhs13 = (s1*m1).adjoint() * (s2*rhs1)); + + VERIFY_IS_APPROX(rhs12 = (s1*m2).template selfadjointView().adjoint() * (s2*rhs1), + rhs13 = (s1*m1).adjoint() * (s2*rhs1)); + m2 = m1.template triangularView(); rhs12.setRandom(); rhs13 = rhs12; m3 = m2.template selfadjointView(); VERIFY_IS_EQUAL(m1, m3); diff --git a/external/eigen3/test/product_syrk.cpp b/external/eigen3/test/product_syrk.cpp index 73c9500..e10f0f2 100644 --- a/external/eigen3/test/product_syrk.cpp +++ b/external/eigen3/test/product_syrk.cpp @@ -125,11 +125,12 @@ void test_product_syrk() int s; s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_1( syrk(MatrixXf(s, s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_2( syrk(MatrixXd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_3( syrk(MatrixXcf(s, s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_4( syrk(MatrixXcd(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } } diff --git a/external/eigen3/test/product_trmm.cpp b/external/eigen3/test/product_trmm.cpp index d715b9a..12e5544 100644 --- a/external/eigen3/test/product_trmm.cpp +++ b/external/eigen3/test/product_trmm.cpp @@ -9,10 +9,18 @@ #include "main.h" +template +int get_random_size() +{ + const int factor = NumTraits::ReadCost; + const int max_test_size = EIGEN_TEST_MAX_SIZE>2*factor ? EIGEN_TEST_MAX_SIZE/factor : EIGEN_TEST_MAX_SIZE; + return internal::random(1,max_test_size); +} + template -void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), - int cols=internal::random(1,EIGEN_TEST_MAX_SIZE), - int otherCols = OtherCols==Dynamic?internal::random(1,EIGEN_TEST_MAX_SIZE):OtherCols) +void trmm(int rows=get_random_size(), + int cols=get_random_size(), + int otherCols = OtherCols==Dynamic?get_random_size():OtherCols) { typedef Matrix TriMatrix; typedef Matrix OnTheRight; @@ -42,13 +50,13 @@ void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), VERIFY_IS_APPROX( ge_xs.noalias() = mat.template triangularView() * ge_right, tri * ge_right); VERIFY_IS_APPROX( ge_sx.noalias() = ge_left * mat.template triangularView(), ge_left * tri); - + VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView() * (s2*ge_left.transpose()), s1*triTr.conjugate() * (s2*ge_left.transpose())); VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.transpose() * mat.adjoint().template triangularView(), ge_right.transpose() * triTr.conjugate()); VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView() * (s2*ge_left.adjoint()), s1*triTr.conjugate() * (s2*ge_left.adjoint())); VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.adjoint() * mat.adjoint().template triangularView(), ge_right.adjoint() * triTr.conjugate()); - + ge_xs_save = ge_xs; VERIFY_IS_APPROX( (ge_xs_save + s1*triTr.conjugate() * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.adjoint()).template triangularView() * (s2*ge_left.adjoint()) ); ge_sx.setRandom(); @@ -61,13 +69,13 @@ void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), } template -void trmv(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), int cols=internal::random(1,EIGEN_TEST_MAX_SIZE)) +void trmv(int rows=get_random_size(), int cols=get_random_size()) { trmm(rows,cols,1); } template -void trmm(int rows=internal::random(1,EIGEN_TEST_MAX_SIZE), int cols=internal::random(1,EIGEN_TEST_MAX_SIZE), int otherCols = internal::random(1,EIGEN_TEST_MAX_SIZE)) +void trmm(int rows=get_random_size(), int cols=get_random_size(), int otherCols = get_random_size()) { trmm(rows,cols,otherCols); } diff --git a/external/eigen3/test/product_trmv.cpp b/external/eigen3/test/product_trmv.cpp index 4c3c435..57a202a 100644 --- a/external/eigen3/test/product_trmv.cpp +++ b/external/eigen3/test/product_trmv.cpp @@ -78,12 +78,14 @@ void test_product_trmv() CALL_SUBTEST_1( trmv(Matrix()) ); CALL_SUBTEST_2( trmv(Matrix()) ); CALL_SUBTEST_3( trmv(Matrix3d()) ); + s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_4( trmv(MatrixXcf(s,s)) ); - s = internal::random(1,EIGEN_TEST_MAX_SIZE/2); CALL_SUBTEST_5( trmv(MatrixXcd(s,s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) + s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_6( trmv(Matrix(s, s)) ); + TEST_SET_BUT_UNUSED_VARIABLE(s) } - TEST_SET_BUT_UNUSED_VARIABLE(s); } diff --git a/external/eigen3/test/product_trsolve.cpp b/external/eigen3/test/product_trsolve.cpp index 69892b3..4b97fa9 100644 --- a/external/eigen3/test/product_trsolve.cpp +++ b/external/eigen3/test/product_trsolve.cpp @@ -84,10 +84,18 @@ void test_product_trsolve() CALL_SUBTEST_4((trsolve,Dynamic,Dynamic>(internal::random(1,EIGEN_TEST_MAX_SIZE/2),internal::random(1,EIGEN_TEST_MAX_SIZE/2)))); // vectors - CALL_SUBTEST_1((trsolve(internal::random(1,EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_5((trsolve,Dynamic,1>(internal::random(1,EIGEN_TEST_MAX_SIZE)))); - CALL_SUBTEST_6((trsolve())); - CALL_SUBTEST_7((trsolve())); - CALL_SUBTEST_8((trsolve,4,1>())); + CALL_SUBTEST_5((trsolve(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_6((trsolve(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_7((trsolve,Dynamic,1>(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + CALL_SUBTEST_8((trsolve,Dynamic,1>(internal::random(1,EIGEN_TEST_MAX_SIZE)))); + + // meta-unrollers + CALL_SUBTEST_9((trsolve())); + CALL_SUBTEST_10((trsolve())); + CALL_SUBTEST_11((trsolve,4,1>())); + CALL_SUBTEST_12((trsolve())); + CALL_SUBTEST_13((trsolve())); + CALL_SUBTEST_14((trsolve())); + } } diff --git a/external/eigen3/test/qr.cpp b/external/eigen3/test/qr.cpp index a79e0dd..dfcc1e8 100644 --- a/external/eigen3/test/qr.cpp +++ b/external/eigen3/test/qr.cpp @@ -54,6 +54,8 @@ template void qr_invertible() { using std::log; using std::abs; + using std::pow; + using std::max; typedef typename NumTraits::Real RealScalar; typedef typename MatrixType::Scalar Scalar; @@ -65,7 +67,7 @@ template void qr_invertible() if (internal::is_same::value) { // let's build a matrix more stable to inverse - MatrixType a = MatrixType::Random(size,size*2); + MatrixType a = MatrixType::Random(size,size*4); m1 += a * a.adjoint(); } @@ -81,8 +83,11 @@ template void qr_invertible() m3 = qr.householderQ(); // get a unitary m1 = m3 * m1 * m3; qr.compute(m1); - VERIFY_IS_APPROX(absdet, qr.absDeterminant()); VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant()); + // This test is tricky if the determinant becomes too small. + // Since we generate random numbers with magnitude rrange [0,1], the average determinant is 0.5^size + VERIFY_IS_MUCH_SMALLER_THAN( abs(absdet-qr.absDeterminant()), numext::maxi(RealScalar(pow(0.5,size)),numext::maxi(abs(absdet),abs(qr.absDeterminant()))) ); + } template void qr_verify_assert() diff --git a/external/eigen3/test/qr_colpivoting.cpp b/external/eigen3/test/qr_colpivoting.cpp index eb3feac..26ed27f 100644 --- a/external/eigen3/test/qr_colpivoting.cpp +++ b/external/eigen3/test/qr_colpivoting.cpp @@ -10,21 +10,103 @@ #include "main.h" #include +#include + +template +void cod() { + typedef typename MatrixType::Index Index; + + Index rows = internal::random(2, EIGEN_TEST_MAX_SIZE); + Index cols = internal::random(2, EIGEN_TEST_MAX_SIZE); + Index cols2 = internal::random(2, EIGEN_TEST_MAX_SIZE); + Index rank = internal::random(1, (std::min)(rows, cols) - 1); + + typedef typename MatrixType::Scalar Scalar; + typedef Matrix + MatrixQType; + MatrixType matrix; + createRandomPIMatrixOfRank(rank, rows, cols, matrix); + CompleteOrthogonalDecomposition cod(matrix); + VERIFY(rank == cod.rank()); + VERIFY(cols - cod.rank() == cod.dimensionOfKernel()); + VERIFY(!cod.isInjective()); + VERIFY(!cod.isInvertible()); + VERIFY(!cod.isSurjective()); + + MatrixQType q = cod.householderQ(); + VERIFY_IS_UNITARY(q); + + MatrixType z = cod.matrixZ(); + VERIFY_IS_UNITARY(z); + + MatrixType t; + t.setZero(rows, cols); + t.topLeftCorner(rank, rank) = + cod.matrixT().topLeftCorner(rank, rank).template triangularView(); + + MatrixType c = q * t * z * cod.colsPermutation().inverse(); + VERIFY_IS_APPROX(matrix, c); + + MatrixType exact_solution = MatrixType::Random(cols, cols2); + MatrixType rhs = matrix * exact_solution; + MatrixType cod_solution = cod.solve(rhs); + VERIFY_IS_APPROX(rhs, matrix * cod_solution); + + // Verify that we get the same minimum-norm solution as the SVD. + JacobiSVD svd(matrix, ComputeThinU | ComputeThinV); + MatrixType svd_solution = svd.solve(rhs); + VERIFY_IS_APPROX(cod_solution, svd_solution); + + MatrixType pinv = cod.pseudoInverse(); + VERIFY_IS_APPROX(cod_solution, pinv * rhs); +} + +template +void cod_fixedsize() { + enum { + Rows = MatrixType::RowsAtCompileTime, + Cols = MatrixType::ColsAtCompileTime + }; + typedef typename MatrixType::Scalar Scalar; + int rank = internal::random(1, (std::min)(int(Rows), int(Cols)) - 1); + Matrix matrix; + createRandomPIMatrixOfRank(rank, Rows, Cols, matrix); + CompleteOrthogonalDecomposition > cod(matrix); + VERIFY(rank == cod.rank()); + VERIFY(Cols - cod.rank() == cod.dimensionOfKernel()); + VERIFY(cod.isInjective() == (rank == Rows)); + VERIFY(cod.isSurjective() == (rank == Cols)); + VERIFY(cod.isInvertible() == (cod.isInjective() && cod.isSurjective())); + + Matrix exact_solution; + exact_solution.setRandom(Cols, Cols2); + Matrix rhs = matrix * exact_solution; + Matrix cod_solution = cod.solve(rhs); + VERIFY_IS_APPROX(rhs, matrix * cod_solution); + + // Verify that we get the same minimum-norm solution as the SVD. + JacobiSVD svd(matrix, ComputeFullU | ComputeFullV); + Matrix svd_solution = svd.solve(rhs); + VERIFY_IS_APPROX(cod_solution, svd_solution); +} template void qr() { + using std::sqrt; typedef typename MatrixType::Index Index; Index rows = internal::random(2,EIGEN_TEST_MAX_SIZE), cols = internal::random(2,EIGEN_TEST_MAX_SIZE), cols2 = internal::random(2,EIGEN_TEST_MAX_SIZE); Index rank = internal::random(1, (std::min)(rows, cols)-1); typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; typedef Matrix MatrixQType; MatrixType m1; createRandomPIMatrixOfRank(rank,rows,cols,m1); ColPivHouseholderQR qr(m1); - VERIFY(rank == qr.rank()); - VERIFY(cols - qr.rank() == qr.dimensionOfKernel()); + VERIFY_IS_EQUAL(rank, qr.rank()); + VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel()); VERIFY(!qr.isInjective()); VERIFY(!qr.isInvertible()); VERIFY(!qr.isSurjective()); @@ -36,26 +118,59 @@ template void qr() MatrixType c = q * r * qr.colsPermutation().inverse(); VERIFY_IS_APPROX(m1, c); + // Verify that the absolute value of the diagonal elements in R are + // non-increasing until they reach the singularity threshold. + RealScalar threshold = + sqrt(RealScalar(rows)) * numext::abs(r(0, 0)) * NumTraits::epsilon(); + for (Index i = 0; i < (std::min)(rows, cols) - 1; ++i) { + RealScalar x = numext::abs(r(i, i)); + RealScalar y = numext::abs(r(i + 1, i + 1)); + if (x < threshold && y < threshold) continue; + if (!test_isApproxOrLessThan(y, x)) { + for (Index j = 0; j < (std::min)(rows, cols); ++j) { + std::cout << "i = " << j << ", |r_ii| = " << numext::abs(r(j, j)) << std::endl; + } + std::cout << "Failure at i=" << i << ", rank=" << rank + << ", threshold=" << threshold << std::endl; + } + VERIFY_IS_APPROX_OR_LESS_THAN(y, x); + } + MatrixType m2 = MatrixType::Random(cols,cols2); MatrixType m3 = m1*m2; m2 = MatrixType::Random(cols,cols2); m2 = qr.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); + + { + Index size = rows; + do { + m1 = MatrixType::Random(size,size); + qr.compute(m1); + } while(!qr.isInvertible()); + MatrixType m1_inv = qr.inverse(); + m3 = m1 * MatrixType::Random(size,cols2); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m2, m1_inv*m3); + } } template void qr_fixedsize() { + using std::sqrt; + using std::abs; enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime }; typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; int rank = internal::random(1, (std::min)(int(Rows), int(Cols))-1); Matrix m1; createRandomPIMatrixOfRank(rank,Rows,Cols,m1); ColPivHouseholderQR > qr(m1); - VERIFY(rank == qr.rank()); - VERIFY(Cols - qr.rank() == qr.dimensionOfKernel()); - VERIFY(qr.isInjective() == (rank == Rows)); - VERIFY(qr.isSurjective() == (rank == Cols)); - VERIFY(qr.isInvertible() == (qr.isInjective() && qr.isSurjective())); + VERIFY_IS_EQUAL(rank, qr.rank()); + VERIFY_IS_EQUAL(Cols - qr.rank(), qr.dimensionOfKernel()); + VERIFY_IS_EQUAL(qr.isInjective(), (rank == Rows)); + VERIFY_IS_EQUAL(qr.isSurjective(), (rank == Cols)); + VERIFY_IS_EQUAL(qr.isInvertible(), (qr.isInjective() && qr.isSurjective())); Matrix r = qr.matrixQR().template triangularView(); Matrix c = qr.householderQ() * r * qr.colsPermutation().inverse(); @@ -66,6 +181,71 @@ template void qr_fixedsize() m2 = Matrix::Random(Cols,Cols2); m2 = qr.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); + // Verify that the absolute value of the diagonal elements in R are + // non-increasing until they reache the singularity threshold. + RealScalar threshold = + sqrt(RealScalar(Rows)) * (std::abs)(r(0, 0)) * NumTraits::epsilon(); + for (Index i = 0; i < (std::min)(int(Rows), int(Cols)) - 1; ++i) { + RealScalar x = numext::abs(r(i, i)); + RealScalar y = numext::abs(r(i + 1, i + 1)); + if (x < threshold && y < threshold) continue; + if (!test_isApproxOrLessThan(y, x)) { + for (Index j = 0; j < (std::min)(int(Rows), int(Cols)); ++j) { + std::cout << "i = " << j << ", |r_ii| = " << numext::abs(r(j, j)) << std::endl; + } + std::cout << "Failure at i=" << i << ", rank=" << rank + << ", threshold=" << threshold << std::endl; + } + VERIFY_IS_APPROX_OR_LESS_THAN(y, x); + } +} + +// This test is meant to verify that pivots are chosen such that +// even for a graded matrix, the diagonal of R falls of roughly +// monotonically until it reaches the threshold for singularity. +// We use the so-called Kahan matrix, which is a famous counter-example +// for rank-revealing QR. See +// http://www.netlib.org/lapack/lawnspdf/lawn176.pdf +// page 3 for more detail. +template void qr_kahan_matrix() +{ + using std::sqrt; + using std::abs; + typedef typename MatrixType::Index Index; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + Index rows = 300, cols = rows; + + MatrixType m1; + m1.setZero(rows,cols); + RealScalar s = std::pow(NumTraits::epsilon(), 1.0 / rows); + RealScalar c = std::sqrt(1 - s*s); + RealScalar pow_s_i(1.0); // pow(s,i) + for (Index i = 0; i < rows; ++i) { + m1(i, i) = pow_s_i; + m1.row(i).tail(rows - i - 1) = -pow_s_i * c * MatrixType::Ones(1, rows - i - 1); + pow_s_i *= s; + } + m1 = (m1 + m1.transpose()).eval(); + ColPivHouseholderQR qr(m1); + MatrixType r = qr.matrixQR().template triangularView(); + + RealScalar threshold = + std::sqrt(RealScalar(rows)) * numext::abs(r(0, 0)) * NumTraits::epsilon(); + for (Index i = 0; i < (std::min)(rows, cols) - 1; ++i) { + RealScalar x = numext::abs(r(i, i)); + RealScalar y = numext::abs(r(i + 1, i + 1)); + if (x < threshold && y < threshold) continue; + if (!test_isApproxOrLessThan(y, x)) { + for (Index j = 0; j < (std::min)(rows, cols); ++j) { + std::cout << "i = " << j << ", |r_ii| = " << numext::abs(r(j, j)) << std::endl; + } + std::cout << "Failure at i=" << i << ", rank=" << qr.rank() + << ", threshold=" << threshold << std::endl; + } + VERIFY_IS_APPROX_OR_LESS_THAN(y, x); + } } template void qr_invertible() @@ -131,6 +311,15 @@ void test_qr_colpivoting() CALL_SUBTEST_5(( qr_fixedsize, 1 >() )); } + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( cod() ); + CALL_SUBTEST_2( cod() ); + CALL_SUBTEST_3( cod() ); + CALL_SUBTEST_4(( cod_fixedsize, 4 >() )); + CALL_SUBTEST_5(( cod_fixedsize, 3 >() )); + CALL_SUBTEST_5(( cod_fixedsize, 1 >() )); + } + for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( qr_invertible() ); CALL_SUBTEST_2( qr_invertible() ); @@ -147,4 +336,7 @@ void test_qr_colpivoting() // Test problem size constructors CALL_SUBTEST_9(ColPivHouseholderQR(10, 20)); + + CALL_SUBTEST_1( qr_kahan_matrix() ); + CALL_SUBTEST_2( qr_kahan_matrix() ); } diff --git a/external/eigen3/test/qr_fullpivoting.cpp b/external/eigen3/test/qr_fullpivoting.cpp index 511f247..70e89c1 100644 --- a/external/eigen3/test/qr_fullpivoting.cpp +++ b/external/eigen3/test/qr_fullpivoting.cpp @@ -15,16 +15,20 @@ template void qr() { typedef typename MatrixType::Index Index; - Index rows = internal::random(20,200), cols = internal::random(20,200), cols2 = internal::random(20,200); - Index rank = internal::random(1, (std::min)(rows, cols)-1); + Index max_size = EIGEN_TEST_MAX_SIZE; + Index min_size = numext::maxi(1,EIGEN_TEST_MAX_SIZE/10); + Index rows = internal::random(min_size,max_size), + cols = internal::random(min_size,max_size), + cols2 = internal::random(min_size,max_size), + rank = internal::random(1, (std::min)(rows, cols)-1); typedef typename MatrixType::Scalar Scalar; typedef Matrix MatrixQType; MatrixType m1; createRandomPIMatrixOfRank(rank,rows,cols,m1); FullPivHouseholderQR qr(m1); - VERIFY(rank == qr.rank()); - VERIFY(cols - qr.rank() == qr.dimensionOfKernel()); + VERIFY_IS_EQUAL(rank, qr.rank()); + VERIFY_IS_EQUAL(cols - qr.rank(), qr.dimensionOfKernel()); VERIFY(!qr.isInjective()); VERIFY(!qr.isInvertible()); VERIFY(!qr.isSurjective()); @@ -40,12 +44,28 @@ template void qr() MatrixType c = qr.matrixQ() * r * qr.colsPermutation().inverse(); VERIFY_IS_APPROX(m1, c); - + + // stress the ReturnByValue mechanism + MatrixType tmp; + VERIFY_IS_APPROX(tmp.noalias() = qr.matrixQ() * r, (qr.matrixQ() * r).eval()); + MatrixType m2 = MatrixType::Random(cols,cols2); MatrixType m3 = m1*m2; m2 = MatrixType::Random(cols,cols2); m2 = qr.solve(m3); VERIFY_IS_APPROX(m3, m1*m2); + + { + Index size = rows; + do { + m1 = MatrixType::Random(size,size); + qr.compute(m1); + } while(!qr.isInvertible()); + MatrixType m1_inv = qr.inverse(); + m3 = m1 * MatrixType::Random(size,cols2); + m2 = qr.solve(m3); + VERIFY_IS_APPROX(m2, m1_inv*m3); + } } template void qr_invertible() @@ -55,7 +75,9 @@ template void qr_invertible() typedef typename NumTraits::Real RealScalar; typedef typename MatrixType::Scalar Scalar; - int size = internal::random(10,50); + Index max_size = numext::mini(50,EIGEN_TEST_MAX_SIZE); + Index min_size = numext::maxi(1,EIGEN_TEST_MAX_SIZE/10); + Index size = internal::random(min_size,max_size); MatrixType m1(size, size), m2(size, size), m3(size, size); m1 = MatrixType::Random(size,size); diff --git a/external/eigen3/test/rand.cpp b/external/eigen3/test/rand.cpp index 421f8bc..51cf017 100644 --- a/external/eigen3/test/rand.cpp +++ b/external/eigen3/test/rand.cpp @@ -31,7 +31,7 @@ template void check_all_in_range(Scalar x, Scalar y) { mask( check_in_range(x,y)-x )++; } - for(DenseIndex i=0; i0).all() ); diff --git a/external/eigen3/test/real_qz.cpp b/external/eigen3/test/real_qz.cpp index a1766c6..99ac312 100644 --- a/external/eigen3/test/real_qz.cpp +++ b/external/eigen3/test/real_qz.cpp @@ -7,6 +7,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define EIGEN_RUNTIME_NO_MALLOC #include "main.h" #include #include @@ -41,7 +42,11 @@ template void real_qz(const MatrixType& m) break; } - RealQZ qz(A,B); + RealQZ qz(dim); + // TODO enable full-prealocation of required memory, this probably requires an in-place mode for HessenbergDecomposition + //Eigen::internal::set_is_malloc_allowed(false); + qz.compute(A,B); + //Eigen::internal::set_is_malloc_allowed(true); VERIFY_IS_EQUAL(qz.info(), Success); // check for zeros @@ -49,11 +54,20 @@ template void real_qz(const MatrixType& m) for (Index i=0; i void matrixRedux(const MatrixType& m) @@ -21,7 +24,7 @@ template void matrixRedux(const MatrixType& m) MatrixType m1 = MatrixType::Random(rows, cols); // The entries of m1 are uniformly distributed in [0,1], so m1.prod() is very small. This may lead to test - // failures if we underflow into denormals. Thus, we scale so that entires are close to 1. + // failures if we underflow into denormals. Thus, we scale so that entries are close to 1. MatrixType m1_for_prod = MatrixType::Ones(rows, cols) + RealScalar(0.2) * m1; VERIFY_IS_MUCH_SMALLER_THAN(MatrixType::Zero(rows, cols).sum(), Scalar(1)); @@ -65,6 +68,12 @@ template void matrixRedux(const MatrixType& m) // test empty objects VERIFY_IS_APPROX(m1.block(r0,c0,0,0).sum(), Scalar(0)); VERIFY_IS_APPROX(m1.block(r0,c0,0,0).prod(), Scalar(1)); + + // test nesting complex expression + VERIFY_EVALUATION_COUNT( (m1.matrix()*m1.matrix().transpose()).sum(), (MatrixType::IsVectorAtCompileTime && MatrixType::SizeAtCompileTime!=1 ? 0 : 1) ); + Matrix m2(rows,rows); + m2.setRandom(); + VERIFY_EVALUATION_COUNT( ((m1.matrix()*m1.matrix().transpose())+m2).sum(),(MatrixType::IsVectorAtCompileTime && MatrixType::SizeAtCompileTime!=1 ? 0 : 1)); } template void vectorRedux(const VectorType& w) @@ -147,8 +156,10 @@ void test_redux() CALL_SUBTEST_1( matrixRedux(Array()) ); CALL_SUBTEST_2( matrixRedux(Matrix2f()) ); CALL_SUBTEST_2( matrixRedux(Array2f()) ); + CALL_SUBTEST_2( matrixRedux(Array22f()) ); CALL_SUBTEST_3( matrixRedux(Matrix4d()) ); CALL_SUBTEST_3( matrixRedux(Array4d()) ); + CALL_SUBTEST_3( matrixRedux(Array44d()) ); CALL_SUBTEST_4( matrixRedux(MatrixXcf(internal::random(1,maxsize), internal::random(1,maxsize))) ); CALL_SUBTEST_4( matrixRedux(ArrayXXcf(internal::random(1,maxsize), internal::random(1,maxsize))) ); CALL_SUBTEST_5( matrixRedux(MatrixXd (internal::random(1,maxsize), internal::random(1,maxsize))) ); diff --git a/external/eigen3/test/ref.cpp b/external/eigen3/test/ref.cpp index 8297e26..769db04 100644 --- a/external/eigen3/test/ref.cpp +++ b/external/eigen3/test/ref.cpp @@ -12,26 +12,10 @@ #undef EIGEN_DEFAULT_TO_ROW_MAJOR #endif -static int nb_temporaries; - -inline void on_temporary_creation(int) { - // here's a great place to set a breakpoint when debugging failures in this test! - nb_temporaries++; -} - - -#define EIGEN_DENSE_STORAGE_CTOR_PLUGIN { on_temporary_creation(size); } +#define TEST_ENABLE_TEMPORARY_TRACKING #include "main.h" -#define VERIFY_EVALUATION_COUNT(XPR,N) {\ - nb_temporaries = 0; \ - XPR; \ - if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ - VERIFY( (#XPR) && nb_temporaries==N ); \ - } - - // test Ref.h // Deal with i387 extended precision @@ -248,6 +232,12 @@ int test_ref_overload_fun1(Ref ) { return 3; } int test_ref_overload_fun2(Ref ) { return 4; } int test_ref_overload_fun2(Ref ) { return 5; } +void test_ref_ambiguous(const Ref &A, Ref B) +{ + B = A; + B = A - A; +} + // See also bug 969 void test_ref_overloads() { @@ -260,6 +250,9 @@ void test_ref_overloads() VERIFY( test_ref_overload_fun2(Ad)==4 ); VERIFY( test_ref_overload_fun2(Ad+Bd)==4 ); VERIFY( test_ref_overload_fun2(Af+Bf)==5 ); + + ArrayXd A, B; + test_ref_ambiguous(A, B); } void test_ref() diff --git a/external/eigen3/test/runtest.sh b/external/eigen3/test/runtest.sh deleted file mode 100755 index 2be2508..0000000 --- a/external/eigen3/test/runtest.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -black='\E[30m' -red='\E[31m' -green='\E[32m' -yellow='\E[33m' -blue='\E[34m' -magenta='\E[35m' -cyan='\E[36m' -white='\E[37m' - -if ! ./$1 > /dev/null 2> .runtest.log ; then - echo -e $red Test $1 failed: $black - echo -e $blue - cat .runtest.log - echo -e $black - exit 1 -else -echo -e $green Test $1 passed$black -fi diff --git a/external/eigen3/test/rvalue_types.cpp b/external/eigen3/test/rvalue_types.cpp index b3c8565..8887f1b 100644 --- a/external/eigen3/test/rvalue_types.cpp +++ b/external/eigen3/test/rvalue_types.cpp @@ -11,20 +11,22 @@ #include -#ifdef EIGEN_HAVE_RVALUE_REFERENCES +using internal::UIntPtr; + +#if EIGEN_HAS_RVALUE_REFERENCES template void rvalue_copyassign(const MatrixType& m) { typedef typename internal::traits::Scalar Scalar; - + // create a temporary which we are about to destroy by moving MatrixType tmp = m; - long src_address = reinterpret_cast(tmp.data()); - + UIntPtr src_address = reinterpret_cast(tmp.data()); + // move the temporary to n MatrixType n = std::move(tmp); - long dst_address = reinterpret_cast(n.data()); + UIntPtr dst_address = reinterpret_cast(n.data()); if (MatrixType::RowsAtCompileTime==Dynamic|| MatrixType::ColsAtCompileTime==Dynamic) { @@ -51,7 +53,7 @@ void test_rvalue_types() CALL_SUBTEST_1(rvalue_copyassign( Matrix::Random(50).eval() )); CALL_SUBTEST_1(rvalue_copyassign( Array::Random(50).eval() )); - + CALL_SUBTEST_2(rvalue_copyassign( Array::Random().eval() )); CALL_SUBTEST_2(rvalue_copyassign( Array::Random().eval() )); CALL_SUBTEST_2(rvalue_copyassign( Array::Random().eval() )); diff --git a/external/eigen3/test/schur_complex.cpp b/external/eigen3/test/schur_complex.cpp index 5e86979..deb78e4 100644 --- a/external/eigen3/test/schur_complex.cpp +++ b/external/eigen3/test/schur_complex.cpp @@ -25,7 +25,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim ComplexMatrixType T = schurOfA.matrixT(); for(int row = 1; row < size; ++row) { for(int col = 0; col < row; ++col) { - VERIFY(T(row,col) == (typename MatrixType::Scalar)0); + VERIFY(T(row,col) == (typename MatrixType::Scalar)0); } } VERIFY_IS_APPROX(A.template cast(), U * T * U.adjoint()); @@ -70,7 +70,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim VERIFY_IS_EQUAL(cs1.matrixT(), csOnlyT.matrixT()); VERIFY_RAISES_ASSERT(csOnlyT.matrixU()); - if (size > 1) + if (size > 1 && size < 20) { // Test matrix with NaN A(0,0) = std::numeric_limits::quiet_NaN(); diff --git a/external/eigen3/test/schur_real.cpp b/external/eigen3/test/schur_real.cpp index 36b9c24..4aede87 100644 --- a/external/eigen3/test/schur_real.cpp +++ b/external/eigen3/test/schur_real.cpp @@ -82,7 +82,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim Atriangular.template triangularView().setZero(); rs3.setMaxIterations(1).compute(Atriangular); // triangular matrices do not need any iterations VERIFY_IS_EQUAL(rs3.info(), Success); - VERIFY_IS_EQUAL(rs3.matrixT(), Atriangular); + VERIFY_IS_APPROX(rs3.matrixT(), Atriangular); // approx because of scaling... VERIFY_IS_EQUAL(rs3.matrixU(), MatrixType::Identity(size, size)); // Test computation of only T, not U @@ -91,7 +91,7 @@ template void schur(int size = MatrixType::ColsAtCompileTim VERIFY_IS_EQUAL(rs1.matrixT(), rsOnlyT.matrixT()); VERIFY_RAISES_ASSERT(rsOnlyT.matrixU()); - if (size > 2) + if (size > 2 && size < 20) { // Test matrix with NaN A(0,0) = std::numeric_limits::quiet_NaN(); diff --git a/external/eigen3/test/selfadjoint.cpp b/external/eigen3/test/selfadjoint.cpp index 76dab6d..92401e5 100644 --- a/external/eigen3/test/selfadjoint.cpp +++ b/external/eigen3/test/selfadjoint.cpp @@ -21,7 +21,9 @@ template void selfadjoint(const MatrixType& m) Index cols = m.cols(); MatrixType m1 = MatrixType::Random(rows, cols), - m3(rows, cols); + m2 = MatrixType::Random(rows, cols), + m3(rows, cols), + m4(rows, cols); m1.diagonal() = m1.diagonal().real().template cast(); @@ -30,10 +32,19 @@ template void selfadjoint(const MatrixType& m) VERIFY_IS_APPROX(MatrixType(m3.template triangularView()), MatrixType(m1.template triangularView())); VERIFY_IS_APPROX(m3, m3.adjoint()); - m3 = m1.template selfadjointView(); VERIFY_IS_APPROX(MatrixType(m3.template triangularView()), MatrixType(m1.template triangularView())); VERIFY_IS_APPROX(m3, m3.adjoint()); + + m3 = m1.template selfadjointView(); + m4 = m2; + m4 += m1.template selfadjointView(); + VERIFY_IS_APPROX(m4, m2+m3); + + m3 = m1.template selfadjointView(); + m4 = m2; + m4 -= m1.template selfadjointView(); + VERIFY_IS_APPROX(m4, m2-m3); } void bug_159() diff --git a/external/eigen3/test/simplicial_cholesky.cpp b/external/eigen3/test/simplicial_cholesky.cpp index 7864684..649c817 100644 --- a/external/eigen3/test/simplicial_cholesky.cpp +++ b/external/eigen3/test/simplicial_cholesky.cpp @@ -9,16 +9,17 @@ #include "sparse_solver.h" -template void test_simplicial_cholesky_T() +template void test_simplicial_cholesky_T() { - SimplicialCholesky, Lower> chol_colmajor_lower_amd; - SimplicialCholesky, Upper> chol_colmajor_upper_amd; - SimplicialLLT, Lower> llt_colmajor_lower_amd; - SimplicialLLT, Upper> llt_colmajor_upper_amd; - SimplicialLDLT, Lower> ldlt_colmajor_lower_amd; - SimplicialLDLT, Upper> ldlt_colmajor_upper_amd; - SimplicialLDLT, Lower, NaturalOrdering > ldlt_colmajor_lower_nat; - SimplicialLDLT, Upper, NaturalOrdering > ldlt_colmajor_upper_nat; + typedef SparseMatrix SparseMatrixType; + SimplicialCholesky chol_colmajor_lower_amd; + SimplicialCholesky chol_colmajor_upper_amd; + SimplicialLLT< SparseMatrixType, Lower> llt_colmajor_lower_amd; + SimplicialLLT< SparseMatrixType, Upper> llt_colmajor_upper_amd; + SimplicialLDLT< SparseMatrixType, Lower> ldlt_colmajor_lower_amd; + SimplicialLDLT< SparseMatrixType, Upper> ldlt_colmajor_upper_amd; + SimplicialLDLT< SparseMatrixType, Lower, NaturalOrdering > ldlt_colmajor_lower_nat; + SimplicialLDLT< SparseMatrixType, Upper, NaturalOrdering > ldlt_colmajor_upper_nat; check_sparse_spd_solving(chol_colmajor_lower_amd); check_sparse_spd_solving(chol_colmajor_upper_amd); @@ -34,12 +35,13 @@ template void test_simplicial_cholesky_T() check_sparse_spd_determinant(ldlt_colmajor_lower_amd); check_sparse_spd_determinant(ldlt_colmajor_upper_amd); - check_sparse_spd_solving(ldlt_colmajor_lower_nat); - check_sparse_spd_solving(ldlt_colmajor_upper_nat); + check_sparse_spd_solving(ldlt_colmajor_lower_nat, 300, 1000); + check_sparse_spd_solving(ldlt_colmajor_upper_nat, 300, 1000); } void test_simplicial_cholesky() { - CALL_SUBTEST_1(test_simplicial_cholesky_T()); - CALL_SUBTEST_2(test_simplicial_cholesky_T >()); + CALL_SUBTEST_1(( test_simplicial_cholesky_T() )); + CALL_SUBTEST_2(( test_simplicial_cholesky_T, int>() )); + CALL_SUBTEST_3(( test_simplicial_cholesky_T() )); } diff --git a/external/eigen3/test/sizeof.cpp b/external/eigen3/test/sizeof.cpp index d9ad356..03ad204 100644 --- a/external/eigen3/test/sizeof.cpp +++ b/external/eigen3/test/sizeof.cpp @@ -13,14 +13,27 @@ template void verifySizeOf(const MatrixType&) { typedef typename MatrixType::Scalar Scalar; if (MatrixType::RowsAtCompileTime!=Dynamic && MatrixType::ColsAtCompileTime!=Dynamic) - VERIFY(std::ptrdiff_t(sizeof(MatrixType))==std::ptrdiff_t(sizeof(Scalar))*std::ptrdiff_t(MatrixType::SizeAtCompileTime)); + VERIFY_IS_EQUAL(std::ptrdiff_t(sizeof(MatrixType)),std::ptrdiff_t(sizeof(Scalar))*std::ptrdiff_t(MatrixType::SizeAtCompileTime)); else - VERIFY(sizeof(MatrixType)==sizeof(Scalar*) + 2 * sizeof(typename MatrixType::Index)); + VERIFY_IS_EQUAL(sizeof(MatrixType),sizeof(Scalar*) + 2 * sizeof(typename MatrixType::Index)); } void test_sizeof() { CALL_SUBTEST(verifySizeOf(Matrix()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Array()) ); + CALL_SUBTEST(verifySizeOf(Vector2d()) ); + CALL_SUBTEST(verifySizeOf(Vector4f()) ); CALL_SUBTEST(verifySizeOf(Matrix4d()) ); CALL_SUBTEST(verifySizeOf(Matrix()) ); CALL_SUBTEST(verifySizeOf(Matrix()) ); diff --git a/external/eigen3/test/sizeoverflow.cpp b/external/eigen3/test/sizeoverflow.cpp index 16d6f8d..240d222 100644 --- a/external/eigen3/test/sizeoverflow.cpp +++ b/external/eigen3/test/sizeoverflow.cpp @@ -18,8 +18,6 @@ VERIFY(threw && "should have thrown bad_alloc: " #a); \ } -typedef DenseIndex Index; - template void triggerMatrixBadAlloc(Index rows, Index cols) { diff --git a/external/eigen3/test/sparse.h b/external/eigen3/test/sparse.h index e19a763..9912e1e 100644 --- a/external/eigen3/test/sparse.h +++ b/external/eigen3/test/sparse.h @@ -53,15 +53,15 @@ enum { * \param zeroCoords and nonzeroCoords allows to get the coordinate lists of the non zero, * and zero coefficients respectively. */ -template void +template void initSparse(double density, Matrix& refMat, - SparseMatrix& sparseMat, + SparseMatrix& sparseMat, int flags = 0, - std::vector >* zeroCoords = 0, - std::vector >* nonzeroCoords = 0) + std::vector >* zeroCoords = 0, + std::vector >* nonzeroCoords = 0) { - enum { IsRowMajor = SparseMatrix::IsRowMajor }; + enum { IsRowMajor = SparseMatrix::IsRowMajor }; sparseMat.setZero(); //sparseMat.reserve(int(refMat.rows()*refMat.cols()*density)); sparseMat.reserve(VectorXi::Constant(IsRowMajor ? refMat.rows() : refMat.cols(), int((1.5*density)*(IsRowMajor?refMat.cols():refMat.rows())))); @@ -71,14 +71,17 @@ initSparse(double density, //sparseMat.startVec(j); for(Index i=0; i(0,1) < density) ? internal::random() : Scalar(0); if ((flags&ForceNonZeroDiag) && (i==j)) { + // FIXME: the following is too conservative v = internal::random()*Scalar(3.); - v = v*v + Scalar(5.); + v = v*v; + if(numext::real(v)>0) v += Scalar(5); + else v -= Scalar(5); } if ((flags & MakeLowerTriangular) && aj>ai) v = Scalar(0); @@ -93,11 +96,11 @@ initSparse(double density, //sparseMat.insertBackByOuterInner(j,i) = v; sparseMat.insertByOuterInner(j,i) = v; if (nonzeroCoords) - nonzeroCoords->push_back(Matrix (ai,aj)); + nonzeroCoords->push_back(Matrix (ai,aj)); } else if (zeroCoords) { - zeroCoords->push_back(Matrix (ai,aj)); + zeroCoords->push_back(Matrix (ai,aj)); } refMat(ai,aj) = v; } @@ -163,7 +166,7 @@ initSparse(double density, { sparseVec.reserve(int(refVec.size()*density)); sparseVec.setZero(); - for(Index i=0; i(0,1) < density) ? internal::random() : Scalar(0); if (v!=Scalar(0)) diff --git a/external/eigen3/test/sparse_basic.cpp b/external/eigen3/test/sparse_basic.cpp index 2df7b63..3849850 100644 --- a/external/eigen3/test/sparse_basic.cpp +++ b/external/eigen3/test/sparse_basic.cpp @@ -9,22 +9,28 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +static long g_realloc_count = 0; +#define EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN g_realloc_count++; + #include "sparse.h" template void sparse_basic(const SparseMatrixType& ref) { - typedef typename SparseMatrixType::Index Index; - typedef Matrix Vector2; + typedef typename SparseMatrixType::StorageIndex StorageIndex; + typedef Matrix Vector2; const Index rows = ref.rows(); const Index cols = ref.cols(); + //const Index inner = ref.innerSize(); + //const Index outer = ref.outerSize(); + typedef typename SparseMatrixType::Scalar Scalar; + typedef typename SparseMatrixType::RealScalar RealScalar; enum { Flags = SparseMatrixType::Flags }; double density = (std::max)(8./(rows*cols), 0.01); typedef Matrix DenseMatrix; typedef Matrix DenseVector; - typedef Matrix RowDenseVector; Scalar eps = 1e-6; Scalar s1 = internal::random(); @@ -37,94 +43,22 @@ template void sparse_basic(const SparseMatrixType& re std::vector nonzeroCoords; initSparse(density, refMat, m, 0, &zeroCoords, &nonzeroCoords); - if (zeroCoords.size()==0 || nonzeroCoords.size()==0) - return; - // test coeff and coeffRef - for (int i=0; i<(int)zeroCoords.size(); ++i) + for (std::size_t i=0; i >::value) - VERIFY_RAISES_ASSERT( m.coeffRef(zeroCoords[0].x(),zeroCoords[0].y()) = 5 ); + VERIFY_RAISES_ASSERT( m.coeffRef(zeroCoords[i].x(),zeroCoords[i].y()) = 5 ); } VERIFY_IS_APPROX(m, refMat); - m.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5); - refMat.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5); + if(!nonzeroCoords.empty()) { + m.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5); + refMat.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5); + } VERIFY_IS_APPROX(m, refMat); - - // test InnerIterators and Block expressions - for (int t=0; t<10; ++t) - { - int j = internal::random(0,cols-1); - int i = internal::random(0,rows-1); - int w = internal::random(1,cols-j-1); - int h = internal::random(1,rows-i-1); - VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); - for(int c=0; c void sparse_basic(const SparseMatrixType& re DenseMatrix m1(rows,cols); m1.setZero(); SparseMatrixType m2(rows,cols); - if(internal::random()%2) - m2.reserve(VectorXi::Constant(m2.outerSize(), 2)); + bool call_reserve = internal::random()%2; + Index nnz = internal::random(1,int(rows)/2); + if(call_reserve) + { + if(internal::random()%2) + m2.reserve(VectorXi::Constant(m2.outerSize(), int(nnz))); + else + m2.reserve(m2.outerSize() * nnz); + } + g_realloc_count = 0; for (Index j=0; j(0,rows-1); if (m1.coeff(i,j)==Scalar(0)) m2.insert(i,j) = m1(i,j) = internal::random(); } } + + if(call_reserve && !SparseMatrixType::IsRowMajor) + { + VERIFY(g_realloc_count==0); + } + m2.finalize(); VERIFY_IS_APPROX(m2,m1); } @@ -179,9 +127,9 @@ template void sparse_basic(const SparseMatrixType& re DenseMatrix m1(rows,cols); m1.setZero(); SparseMatrixType m2(rows,cols); - VectorXi r(VectorXi::Constant(m2.outerSize(), ((mode%2)==0) ? m2.innerSize() : std::max(1,m2.innerSize()/8))); + VectorXi r(VectorXi::Constant(m2.outerSize(), ((mode%2)==0) ? int(m2.innerSize()) : std::max(1,int(m2.innerSize())/8))); m2.reserve(r); - for (int k=0; k(0,rows-1); Index j = internal::random(0,cols-1); @@ -195,111 +143,39 @@ template void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(m2,m1); } - // test innerVector() - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows); - SparseMatrixType m2(rows, rows); - initSparse(density, refMat2, m2); - Index j0 = internal::random(0,rows-1); - Index j1 = internal::random(0,rows-1); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.row(j0)); - else - VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0)); - - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.row(j0)+refMat2.row(j1)); - else - VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1)); - - SparseMatrixType m3(rows,rows); - m3.reserve(VectorXi::Constant(rows,rows/2)); - for(Index j=0; j0) - VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); - } - m3.makeCompressed(); - for(Index j=0; j0) - VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); - } - - //m2.innerVector(j0) = 2*m2.innerVector(j1); - //refMat2.col(j0) = 2*refMat2.col(j1); - //VERIFY_IS_APPROX(m2, refMat2); - } - - // test innerVectors() - { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows); - SparseMatrixType m2(rows, rows); - initSparse(density, refMat2, m2); - if(internal::random(0,1)>0.5) m2.makeCompressed(); - - Index j0 = internal::random(0,rows-2); - Index j1 = internal::random(0,rows-2); - Index n0 = internal::random(1,rows-(std::max)(j0,j1)); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols)); - else - VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0)); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), - refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0)); - else - VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), - refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); - - VERIFY_IS_APPROX(m2, refMat2); - - m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0); - if(SparseMatrixType::IsRowMajor) - refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval(); - else - refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval(); - - VERIFY_IS_APPROX(m2, refMat2); - - } - // test basic computations { - DenseMatrix refM1 = DenseMatrix::Zero(rows, rows); - DenseMatrix refM2 = DenseMatrix::Zero(rows, rows); - DenseMatrix refM3 = DenseMatrix::Zero(rows, rows); - DenseMatrix refM4 = DenseMatrix::Zero(rows, rows); - SparseMatrixType m1(rows, rows); - SparseMatrixType m2(rows, rows); - SparseMatrixType m3(rows, rows); - SparseMatrixType m4(rows, rows); + DenseMatrix refM1 = DenseMatrix::Zero(rows, cols); + DenseMatrix refM2 = DenseMatrix::Zero(rows, cols); + DenseMatrix refM3 = DenseMatrix::Zero(rows, cols); + DenseMatrix refM4 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m1(rows, cols); + SparseMatrixType m2(rows, cols); + SparseMatrixType m3(rows, cols); + SparseMatrixType m4(rows, cols); initSparse(density, refM1, m1); initSparse(density, refM2, m2); initSparse(density, refM3, m3); initSparse(density, refM4, m4); + if(internal::random()) + m1.makeCompressed(); + + Index m1_nnz = m1.nonZeros(); + + VERIFY_IS_APPROX(m1*s1, refM1*s1); VERIFY_IS_APPROX(m1+m2, refM1+refM2); VERIFY_IS_APPROX(m1+m2+m3, refM1+refM2+refM3); VERIFY_IS_APPROX(m3.cwiseProduct(m1+m2), refM3.cwiseProduct(refM1+refM2)); VERIFY_IS_APPROX(m1*s1-m2, refM1*s1-refM2); - - VERIFY_IS_APPROX(m1*=s1, refM1*=s1); - VERIFY_IS_APPROX(m1/=s1, refM1/=s1); - - VERIFY_IS_APPROX(m1+=m2, refM1+=refM2); - VERIFY_IS_APPROX(m1-=m2, refM1-=refM2); + VERIFY_IS_APPROX(m4=m1/s1, refM1/s1); + VERIFY_IS_EQUAL(m4.nonZeros(), m1_nnz); if(SparseMatrixType::IsRowMajor) VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.row(0).dot(refM2.row(0))); else - VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.col(0).dot(refM2.row(0))); - + VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.col(0)), refM1.col(0).dot(refM2.col(0))); + DenseVector rv = DenseVector::Random(m1.cols()); DenseVector cv = DenseVector::Random(m1.rows()); Index r = internal::random(0,m1.rows()-2); @@ -318,103 +194,163 @@ template void sparse_basic(const SparseMatrixType& re VERIFY_IS_APPROX(refM4.cwiseProduct(m3), refM4.cwiseProduct(refM3)); // VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4); + VERIFY_IS_APPROX(refM4 + m3, refM4 + refM3); + VERIFY_IS_APPROX(m3 + refM4, refM3 + refM4); + VERIFY_IS_APPROX(refM4 - m3, refM4 - refM3); + VERIFY_IS_APPROX(m3 - refM4, refM3 - refM4); + VERIFY_IS_APPROX((RealScalar(0.5)*refM4 + RealScalar(0.5)*m3).eval(), RealScalar(0.5)*refM4 + RealScalar(0.5)*refM3); + VERIFY_IS_APPROX((RealScalar(0.5)*refM4 + m3*RealScalar(0.5)).eval(), RealScalar(0.5)*refM4 + RealScalar(0.5)*refM3); + VERIFY_IS_APPROX((RealScalar(0.5)*refM4 + m3.cwiseProduct(m3)).eval(), RealScalar(0.5)*refM4 + refM3.cwiseProduct(refM3)); + + VERIFY_IS_APPROX((RealScalar(0.5)*refM4 + RealScalar(0.5)*m3).eval(), RealScalar(0.5)*refM4 + RealScalar(0.5)*refM3); + VERIFY_IS_APPROX((RealScalar(0.5)*refM4 + m3*RealScalar(0.5)).eval(), RealScalar(0.5)*refM4 + RealScalar(0.5)*refM3); + VERIFY_IS_APPROX((RealScalar(0.5)*refM4 + (m3+m3)).eval(), RealScalar(0.5)*refM4 + (refM3+refM3)); + VERIFY_IS_APPROX(((refM3+m3)+RealScalar(0.5)*m3).eval(), RealScalar(0.5)*refM3 + (refM3+refM3)); + VERIFY_IS_APPROX((RealScalar(0.5)*refM4 + (refM3+m3)).eval(), RealScalar(0.5)*refM4 + (refM3+refM3)); + VERIFY_IS_APPROX((RealScalar(0.5)*refM4 + (m3+refM3)).eval(), RealScalar(0.5)*refM4 + (refM3+refM3)); + + + VERIFY_IS_APPROX(m1.sum(), refM1.sum()); + + m4 = m1; refM4 = m4; + + VERIFY_IS_APPROX(m1*=s1, refM1*=s1); + VERIFY_IS_EQUAL(m1.nonZeros(), m1_nnz); + VERIFY_IS_APPROX(m1/=s1, refM1/=s1); + VERIFY_IS_EQUAL(m1.nonZeros(), m1_nnz); + + VERIFY_IS_APPROX(m1+=m2, refM1+=refM2); + VERIFY_IS_APPROX(m1-=m2, refM1-=refM2); + + if (rows>=2 && cols>=2) + { + VERIFY_RAISES_ASSERT( m1 += m1.innerVector(0) ); + VERIFY_RAISES_ASSERT( m1 -= m1.innerVector(0) ); + VERIFY_RAISES_ASSERT( refM1 -= m1.innerVector(0) ); + VERIFY_RAISES_ASSERT( refM1 += m1.innerVector(0) ); + m1 = m4; refM1 = refM4; + } + // test aliasing VERIFY_IS_APPROX((m1 = -m1), (refM1 = -refM1)); + VERIFY_IS_EQUAL(m1.nonZeros(), m1_nnz); + m1 = m4; refM1 = refM4; VERIFY_IS_APPROX((m1 = m1.transpose()), (refM1 = refM1.transpose().eval())); + VERIFY_IS_EQUAL(m1.nonZeros(), m1_nnz); + m1 = m4; refM1 = refM4; VERIFY_IS_APPROX((m1 = -m1.transpose()), (refM1 = -refM1.transpose().eval())); + VERIFY_IS_EQUAL(m1.nonZeros(), m1_nnz); + m1 = m4; refM1 = refM4; VERIFY_IS_APPROX((m1 += -m1), (refM1 += -refM1)); + VERIFY_IS_EQUAL(m1.nonZeros(), m1_nnz); + m1 = m4; refM1 = refM4; + + if(m1.isCompressed()) + { + VERIFY_IS_APPROX(m1.coeffs().sum(), m1.sum()); + m1.coeffs() += s1; + for(Index j = 0; j SpBool; + SpBool mb1 = m1.real().template cast(); + SpBool mb2 = m2.real().template cast(); + VERIFY_IS_EQUAL(mb1.template cast().sum(), refM1.real().template cast().count()); + VERIFY_IS_EQUAL((mb1 && mb2).template cast().sum(), (refM1.real().template cast() && refM2.real().template cast()).count()); + VERIFY_IS_EQUAL((mb1 || mb2).template cast().sum(), (refM1.real().template cast() || refM2.real().template cast()).count()); + SpBool mb3 = mb1 && mb2; + if(mb1.coeffs().all() && mb2.coeffs().all()) + { + VERIFY_IS_EQUAL(mb3.nonZeros(), (refM1.real().template cast() && refM2.real().template cast()).count()); + } + } } - // test transpose + // test reverse iterators { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows); - SparseMatrixType m2(rows, rows); + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); initSparse(density, refMat2, m2); - VERIFY_IS_APPROX(m2.transpose().eval(), refMat2.transpose().eval()); - VERIFY_IS_APPROX(m2.transpose(), refMat2.transpose()); + std::vector ref_value(m2.innerSize()); + std::vector ref_index(m2.innerSize()); + if(internal::random()) + m2.makeCompressed(); + for(Index j = 0; j(density, refMat2, m2); - Index j0 = internal::random(0,rows-2); - Index j1 = internal::random(0,rows-2); - Index n0 = internal::random(1,rows-(std::max)(j0,j1)); - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols)); - else - VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0)); - - if(SparseMatrixType::IsRowMajor) - VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols), - refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols)); - else - VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0), - refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); - - Index i = internal::random(0,m2.outerSize()-1); - if(SparseMatrixType::IsRowMajor) { - m2.innerVector(i) = m2.innerVector(i) * s1; - refMat2.row(i) = refMat2.row(i) * s1; - VERIFY_IS_APPROX(m2,refMat2); - } else { - m2.innerVector(i) = m2.innerVector(i) * s1; - refMat2.col(i) = refMat2.col(i) * s1; - VERIFY_IS_APPROX(m2,refMat2); - } - - VERIFY_IS_APPROX(DenseVector(m2.col(j0)), refMat2.col(j0)); - VERIFY_IS_APPROX(m2.col(j0), refMat2.col(j0)); - - VERIFY_IS_APPROX(RowDenseVector(m2.row(j0)), refMat2.row(j0)); - VERIFY_IS_APPROX(m2.row(j0), refMat2.row(j0)); + VERIFY_IS_APPROX(m2.transpose().eval(), refMat2.transpose().eval()); + VERIFY_IS_APPROX(m2.transpose(), refMat2.transpose()); + + VERIFY_IS_APPROX(SparseMatrixType(m2.adjoint()), refMat2.adjoint()); - VERIFY_IS_APPROX(m2.block(j0,j1,n0,n0), refMat2.block(j0,j1,n0,n0)); - VERIFY_IS_APPROX((2*m2).block(j0,j1,n0,n0), (2*refMat2).block(j0,j1,n0,n0)); + // check isApprox handles opposite storage order + typename Transpose::PlainObject m3(m2); + VERIFY(m2.isApprox(m3)); } // test prune { - SparseMatrixType m2(rows, rows); - DenseMatrix refM2(rows, rows); + SparseMatrixType m2(rows, cols); + DenseMatrix refM2(rows, cols); refM2.setZero(); int countFalseNonZero = 0; int countTrueNonZero = 0; - for (Index j=0; j(0,1); - if (x<0.1) + if (x<0.1f) { // do nothing } - else if (x<0.5) + else if (x<0.5f) { countFalseNonZero++; - m2.insertBackByOuterInner(j,i) = Scalar(0); + m2.insert(i,j) = Scalar(0); } else { countTrueNonZero++; - m2.insertBackByOuterInner(j,i) = Scalar(1); - if(SparseMatrixType::IsRowMajor) - refM2(j,i) = Scalar(1); - else - refM2(i,j) = Scalar(1); + m2.insert(i,j) = Scalar(1); + refM2(i,j) = Scalar(1); } } } - m2.finalize(); + if(internal::random()) + m2.makeCompressed(); VERIFY(countFalseNonZero+countTrueNonZero == m2.nonZeros()); - VERIFY_IS_APPROX(m2, refM2); + if(countTrueNonZero>0) + VERIFY_IS_APPROX(m2, refM2); m2.prune(Scalar(1)); VERIFY(countTrueNonZero==m2.nonZeros()); VERIFY_IS_APPROX(m2, refM2); @@ -422,29 +358,74 @@ template void sparse_basic(const SparseMatrixType& re // test setFromTriplets { - typedef Triplet TripletType; + typedef Triplet TripletType; std::vector triplets; - int ntriplets = rows*cols; + Index ntriplets = rows*cols; triplets.reserve(ntriplets); - DenseMatrix refMat(rows,cols); - refMat.setZero(); - for(int i=0;i(0,rows-1); - Index c = internal::random(0,cols-1); + StorageIndex r = internal::random(0,StorageIndex(rows-1)); + StorageIndex c = internal::random(0,StorageIndex(cols-1)); Scalar v = internal::random(); triplets.push_back(TripletType(r,c,v)); - refMat(r,c) += v; + refMat_sum(r,c) += v; + if(std::abs(refMat_prod(r,c))==0) + refMat_prod(r,c) = v; + else + refMat_prod(r,c) *= v; + refMat_last(r,c) = v; } SparseMatrixType m(rows,cols); m.setFromTriplets(triplets.begin(), triplets.end()); - VERIFY_IS_APPROX(m, refMat); + VERIFY_IS_APPROX(m, refMat_sum); + + m.setFromTriplets(triplets.begin(), triplets.end(), std::multiplies()); + VERIFY_IS_APPROX(m, refMat_prod); +#if (defined(__cplusplus) && __cplusplus >= 201103L) + m.setFromTriplets(triplets.begin(), triplets.end(), [] (Scalar,Scalar b) { return b; }); + VERIFY_IS_APPROX(m, refMat_last); +#endif + } + + // test Map + { + DenseMatrix refMat2(rows, cols), refMat3(rows, cols); + SparseMatrixType m2(rows, cols), m3(rows, cols); + initSparse(density, refMat2, m2); + initSparse(density, refMat3, m3); + { + Map mapMat2(m2.rows(), m2.cols(), m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(), m2.innerNonZeroPtr()); + Map mapMat3(m3.rows(), m3.cols(), m3.nonZeros(), m3.outerIndexPtr(), m3.innerIndexPtr(), m3.valuePtr(), m3.innerNonZeroPtr()); + VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3); + VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3); + } + { + MappedSparseMatrix mapMat2(m2.rows(), m2.cols(), m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(), m2.innerNonZeroPtr()); + MappedSparseMatrix mapMat3(m3.rows(), m3.cols(), m3.nonZeros(), m3.outerIndexPtr(), m3.innerIndexPtr(), m3.valuePtr(), m3.innerNonZeroPtr()); + VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3); + VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3); + } + + Index i = internal::random(0,rows-1); + Index j = internal::random(0,cols-1); + m2.coeffRef(i,j) = 123; + if(internal::random()) + m2.makeCompressed(); + Map mapMat2(rows, cols, m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(), m2.innerNonZeroPtr()); + VERIFY_IS_EQUAL(m2.coeff(i,j),Scalar(123)); + VERIFY_IS_EQUAL(mapMat2.coeff(i,j),Scalar(123)); + mapMat2.coeffRef(i,j) = -123; + VERIFY_IS_EQUAL(m2.coeff(i,j),Scalar(-123)); } // test triangularView { - DenseMatrix refMat2(rows, rows), refMat3(rows, rows); - SparseMatrixType m2(rows, rows), m3(rows, rows); + DenseMatrix refMat2(rows, cols), refMat3(rows, cols); + SparseMatrixType m2(rows, cols), m3(rows, cols); initSparse(density, refMat2, m2); refMat3 = refMat2.template triangularView(); m3 = m2.template triangularView(); @@ -454,13 +435,15 @@ template void sparse_basic(const SparseMatrixType& re m3 = m2.template triangularView(); VERIFY_IS_APPROX(m3, refMat3); - refMat3 = refMat2.template triangularView(); - m3 = m2.template triangularView(); - VERIFY_IS_APPROX(m3, refMat3); + { + refMat3 = refMat2.template triangularView(); + m3 = m2.template triangularView(); + VERIFY_IS_APPROX(m3, refMat3); - refMat3 = refMat2.template triangularView(); - m3 = m2.template triangularView(); - VERIFY_IS_APPROX(m3, refMat3); + refMat3 = refMat2.template triangularView(); + m3 = m2.template triangularView(); + VERIFY_IS_APPROX(m3, refMat3); + } refMat3 = refMat2.template triangularView(); m3 = m2.template triangularView(); @@ -469,6 +452,10 @@ template void sparse_basic(const SparseMatrixType& re refMat3 = refMat2.template triangularView(); m3 = m2.template triangularView(); VERIFY_IS_APPROX(m3, refMat3); + + // check sparse-triangular to dense + refMat3 = m2.template triangularView(); + VERIFY_IS_APPROX(refMat3, DenseMatrix(refMat2.template triangularView())); } // test selfadjointView @@ -480,6 +467,19 @@ template void sparse_basic(const SparseMatrixType& re refMat3 = refMat2.template selfadjointView(); m3 = m2.template selfadjointView(); VERIFY_IS_APPROX(m3, refMat3); + + refMat3 += refMat2.template selfadjointView(); + m3 += m2.template selfadjointView(); + VERIFY_IS_APPROX(m3, refMat3); + + refMat3 -= refMat2.template selfadjointView(); + m3 -= m2.template selfadjointView(); + VERIFY_IS_APPROX(m3, refMat3); + + // selfadjointView only works for square matrices: + SparseMatrixType m4(rows, rows+1); + VERIFY_RAISES_ASSERT(m4.template selfadjointView()); + VERIFY_RAISES_ASSERT(m4.template selfadjointView()); } // test sparseView @@ -488,28 +488,59 @@ template void sparse_basic(const SparseMatrixType& re SparseMatrixType m2(rows, rows); initSparse(density, refMat2, m2); VERIFY_IS_APPROX(m2.eval(), refMat2.sparseView().eval()); + + // sparse view on expressions: + VERIFY_IS_APPROX((s1*m2).eval(), (s1*refMat2).sparseView().eval()); + VERIFY_IS_APPROX((m2+m2).eval(), (refMat2+refMat2).sparseView().eval()); + VERIFY_IS_APPROX((m2*m2).eval(), (refMat2.lazyProduct(refMat2)).sparseView().eval()); + VERIFY_IS_APPROX((m2*m2).eval(), (refMat2*refMat2).sparseView().eval()); } // test diagonal { - DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows); - SparseMatrixType m2(rows, rows); + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); initSparse(density, refMat2, m2); VERIFY_IS_APPROX(m2.diagonal(), refMat2.diagonal().eval()); + DenseVector d = m2.diagonal(); + VERIFY_IS_APPROX(d, refMat2.diagonal().eval()); + d = m2.diagonal().array(); + VERIFY_IS_APPROX(d, refMat2.diagonal().eval()); + VERIFY_IS_APPROX(const_cast(m2).diagonal(), refMat2.diagonal().eval()); + + initSparse(density, refMat2, m2, ForceNonZeroDiag); + m2.diagonal() += refMat2.diagonal(); + refMat2.diagonal() += refMat2.diagonal(); + VERIFY_IS_APPROX(m2, refMat2); + } + + // test diagonal to sparse + { + DenseVector d = DenseVector::Random(rows); + DenseMatrix refMat2 = d.asDiagonal(); + SparseMatrixType m2(rows, rows); + m2 = d.asDiagonal(); + VERIFY_IS_APPROX(m2, refMat2); + SparseMatrixType m3(d.asDiagonal()); + VERIFY_IS_APPROX(m3, refMat2); + refMat2 += d.asDiagonal(); + m2 += d.asDiagonal(); + VERIFY_IS_APPROX(m2, refMat2); } // test conservative resize { - std::vector< std::pair > inc; - inc.push_back(std::pair(-3,-2)); - inc.push_back(std::pair(0,0)); - inc.push_back(std::pair(3,2)); - inc.push_back(std::pair(3,0)); - inc.push_back(std::pair(0,3)); + std::vector< std::pair > inc; + if(rows > 3 && cols > 2) + inc.push_back(std::pair(-3,-2)); + inc.push_back(std::pair(0,0)); + inc.push_back(std::pair(3,2)); + inc.push_back(std::pair(3,0)); + inc.push_back(std::pair(0,3)); for(size_t i = 0; i< inc.size(); i++) { - Index incRows = inc[i].first; - Index incCols = inc[i].second; + StorageIndex incRows = inc[i].first; + StorageIndex incCols = inc[i].second; SparseMatrixType m1(rows, cols); DenseMatrix refMat1 = DenseMatrix::Zero(rows, cols); initSparse(density, refMat1, m1); @@ -554,21 +585,104 @@ template void sparse_basic(const SparseMatrixType& re refMat1.setIdentity(); VERIFY_IS_APPROX(m1, refMat1); } + + // test array/vector of InnerIterator + { + typedef typename SparseMatrixType::InnerIterator IteratorType; + + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + IteratorType static_array[2]; + static_array[0] = IteratorType(m2,0); + static_array[1] = IteratorType(m2,m2.outerSize()-1); + VERIFY( static_array[0] || m2.innerVector(static_array[0].outer()).nonZeros() == 0 ); + VERIFY( static_array[1] || m2.innerVector(static_array[1].outer()).nonZeros() == 0 ); + if(static_array[0] && static_array[1]) + { + ++(static_array[1]); + static_array[1] = IteratorType(m2,0); + VERIFY( static_array[1] ); + VERIFY( static_array[1].index() == static_array[0].index() ); + VERIFY( static_array[1].outer() == static_array[0].outer() ); + VERIFY( static_array[1].value() == static_array[0].value() ); + } + + std::vector iters(2); + iters[0] = IteratorType(m2,0); + iters[1] = IteratorType(m2,m2.outerSize()-1); + } +} + + +template +void big_sparse_triplet(Index rows, Index cols, double density) { + typedef typename SparseMatrixType::StorageIndex StorageIndex; + typedef typename SparseMatrixType::Scalar Scalar; + typedef Triplet TripletType; + std::vector triplets; + double nelements = density * rows*cols; + VERIFY(nelements>=0 && nelements < NumTraits::highest()); + Index ntriplets = Index(nelements); + triplets.reserve(ntriplets); + Scalar sum = Scalar(0); + for(Index i=0;i(0,rows-1); + Index c = internal::random(0,cols-1); + Scalar v = internal::random(); + triplets.push_back(TripletType(r,c,v)); + sum += v; + } + SparseMatrixType m(rows,cols); + m.setFromTriplets(triplets.begin(), triplets.end()); + VERIFY(m.nonZeros() <= ntriplets); + VERIFY_IS_APPROX(sum, m.sum()); } + void test_sparse_basic() { for(int i = 0; i < g_repeat; i++) { - int s = Eigen::internal::random(1,50); - EIGEN_UNUSED_VARIABLE(s); + int r = Eigen::internal::random(1,200), c = Eigen::internal::random(1,200); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + EIGEN_UNUSED_VARIABLE(r+c); + CALL_SUBTEST_1(( sparse_basic(SparseMatrix(1, 1)) )); CALL_SUBTEST_1(( sparse_basic(SparseMatrix(8, 8)) )); - CALL_SUBTEST_2(( sparse_basic(SparseMatrix, ColMajor>(s, s)) )); - CALL_SUBTEST_2(( sparse_basic(SparseMatrix, RowMajor>(s, s)) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(s, s)) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(s, s)) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(s, s)) )); + CALL_SUBTEST_2(( sparse_basic(SparseMatrix, ColMajor>(r, c)) )); + CALL_SUBTEST_2(( sparse_basic(SparseMatrix, RowMajor>(r, c)) )); + CALL_SUBTEST_1(( sparse_basic(SparseMatrix(r, c)) )); + CALL_SUBTEST_5(( sparse_basic(SparseMatrix(r, c)) )); + CALL_SUBTEST_5(( sparse_basic(SparseMatrix(r, c)) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(short(s), short(s))) )); - CALL_SUBTEST_1(( sparse_basic(SparseMatrix(short(s), short(s))) )); + r = Eigen::internal::random(1,100); + c = Eigen::internal::random(1,100); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + + CALL_SUBTEST_6(( sparse_basic(SparseMatrix(short(r), short(c))) )); + CALL_SUBTEST_6(( sparse_basic(SparseMatrix(short(r), short(c))) )); + } + + // Regression test for bug 900: (manually insert higher values here, if you have enough RAM): + CALL_SUBTEST_3((big_sparse_triplet >(10000, 10000, 0.125))); + CALL_SUBTEST_4((big_sparse_triplet >(10000, 10000, 0.125))); + + // Regression test for bug 1105 +#ifdef EIGEN_TEST_PART_7 + { + int n = Eigen::internal::random(200,600); + SparseMatrix,0, long> mat(n, n); + std::complex val; + + for(int i=0; i +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse.h" + +template +typename Eigen::internal::enable_if<(T::Flags&RowMajorBit)==RowMajorBit, typename T::RowXpr>::type +innervec(T& A, Index i) +{ + return A.row(i); +} + +template +typename Eigen::internal::enable_if<(T::Flags&RowMajorBit)==0, typename T::ColXpr>::type +innervec(T& A, Index i) +{ + return A.col(i); +} + +template void sparse_block(const SparseMatrixType& ref) +{ + const Index rows = ref.rows(); + const Index cols = ref.cols(); + const Index inner = ref.innerSize(); + const Index outer = ref.outerSize(); + + typedef typename SparseMatrixType::Scalar Scalar; + typedef typename SparseMatrixType::StorageIndex StorageIndex; + + double density = (std::max)(8./(rows*cols), 0.01); + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + typedef Matrix RowDenseVector; + typedef SparseVector SparseVectorType; + + Scalar s1 = internal::random(); + { + SparseMatrixType m(rows, cols); + DenseMatrix refMat = DenseMatrix::Zero(rows, cols); + initSparse(density, refMat, m); + + VERIFY_IS_APPROX(m, refMat); + + // test InnerIterators and Block expressions + for (int t=0; t<10; ++t) + { + Index j = internal::random(0,cols-2); + Index i = internal::random(0,rows-2); + Index w = internal::random(1,cols-j); + Index h = internal::random(1,rows-i); + + VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w)); + for(Index c=0; c(density, refMat2, m2); + Index j0 = internal::random(0,outer-1); + Index j1 = internal::random(0,outer-1); + Index r0 = internal::random(0,rows-1); + Index c0 = internal::random(0,cols-1); + + VERIFY_IS_APPROX(m2.innerVector(j0), innervec(refMat2,j0)); + VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), innervec(refMat2,j0)+innervec(refMat2,j1)); + + m2.innerVector(j0) *= Scalar(2); + innervec(refMat2,j0) *= Scalar(2); + VERIFY_IS_APPROX(m2, refMat2); + + m2.row(r0) *= Scalar(3); + refMat2.row(r0) *= Scalar(3); + VERIFY_IS_APPROX(m2, refMat2); + + m2.col(c0) *= Scalar(4); + refMat2.col(c0) *= Scalar(4); + VERIFY_IS_APPROX(m2, refMat2); + + m2.row(r0) /= Scalar(3); + refMat2.row(r0) /= Scalar(3); + VERIFY_IS_APPROX(m2, refMat2); + + m2.col(c0) /= Scalar(4); + refMat2.col(c0) /= Scalar(4); + VERIFY_IS_APPROX(m2, refMat2); + + SparseVectorType v1; + VERIFY_IS_APPROX(v1 = m2.col(c0) * 4, refMat2.col(c0)*4); + VERIFY_IS_APPROX(v1 = m2.row(r0) * 4, refMat2.row(r0).transpose()*4); + + SparseMatrixType m3(rows,cols); + m3.reserve(VectorXi::Constant(outer,int(inner/2))); + for(Index j=0; j(k+1); + for(Index j=0; j<(std::min)(outer, inner); ++j) + { + VERIFY(j==numext::real(m3.innerVector(j).nonZeros())); + if(j>0) + VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); + } + m3.makeCompressed(); + for(Index j=0; j<(std::min)(outer, inner); ++j) + { + VERIFY(j==numext::real(m3.innerVector(j).nonZeros())); + if(j>0) + VERIFY(j==numext::real(m3.innerVector(j).lastCoeff())); + } + + VERIFY(m3.innerVector(j0).nonZeros() == m3.transpose().innerVector(j0).nonZeros()); + +// m2.innerVector(j0) = 2*m2.innerVector(j1); +// refMat2.col(j0) = 2*refMat2.col(j1); +// VERIFY_IS_APPROX(m2, refMat2); + } + + // test innerVectors() + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + if(internal::random(0,1)>0.5f) m2.makeCompressed(); + Index j0 = internal::random(0,outer-2); + Index j1 = internal::random(0,outer-2); + Index n0 = internal::random(1,outer-(std::max)(j0,j1)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(j0,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), + refMat2.middleRows(j0,n0)+refMat2.middleRows(j1,n0)); + else + VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0), + refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); + + VERIFY_IS_APPROX(m2, refMat2); + + VERIFY(m2.innerVectors(j0,n0).nonZeros() == m2.transpose().innerVectors(j0,n0).nonZeros()); + + m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0); + if(SparseMatrixType::IsRowMajor) + refMat2.middleRows(j0,n0) = (refMat2.middleRows(j0,n0) + refMat2.middleRows(j1,n0)).eval(); + else + refMat2.middleCols(j0,n0) = (refMat2.middleCols(j0,n0) + refMat2.middleCols(j1,n0)).eval(); + + VERIFY_IS_APPROX(m2, refMat2); + } + + // test generic blocks + { + DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols); + SparseMatrixType m2(rows, cols); + initSparse(density, refMat2, m2); + Index j0 = internal::random(0,outer-2); + Index j1 = internal::random(0,outer-2); + Index n0 = internal::random(1,outer-(std::max)(j0,j1)); + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.block(j0,0,n0,cols), refMat2.block(j0,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.block(0,j0,rows,n0), refMat2.block(0,j0,rows,n0)); + + if(SparseMatrixType::IsRowMajor) + VERIFY_IS_APPROX(m2.block(j0,0,n0,cols)+m2.block(j1,0,n0,cols), + refMat2.block(j0,0,n0,cols)+refMat2.block(j1,0,n0,cols)); + else + VERIFY_IS_APPROX(m2.block(0,j0,rows,n0)+m2.block(0,j1,rows,n0), + refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0)); + + Index i = internal::random(0,m2.outerSize()-1); + if(SparseMatrixType::IsRowMajor) { + m2.innerVector(i) = m2.innerVector(i) * s1; + refMat2.row(i) = refMat2.row(i) * s1; + VERIFY_IS_APPROX(m2,refMat2); + } else { + m2.innerVector(i) = m2.innerVector(i) * s1; + refMat2.col(i) = refMat2.col(i) * s1; + VERIFY_IS_APPROX(m2,refMat2); + } + + Index r0 = internal::random(0,rows-2); + Index c0 = internal::random(0,cols-2); + Index r1 = internal::random(1,rows-r0); + Index c1 = internal::random(1,cols-c0); + + VERIFY_IS_APPROX(DenseVector(m2.col(c0)), refMat2.col(c0)); + VERIFY_IS_APPROX(m2.col(c0), refMat2.col(c0)); + + VERIFY_IS_APPROX(RowDenseVector(m2.row(r0)), refMat2.row(r0)); + VERIFY_IS_APPROX(m2.row(r0), refMat2.row(r0)); + + VERIFY_IS_APPROX(m2.block(r0,c0,r1,c1), refMat2.block(r0,c0,r1,c1)); + VERIFY_IS_APPROX((2*m2).block(r0,c0,r1,c1), (2*refMat2).block(r0,c0,r1,c1)); + + if(m2.nonZeros()>0) + { + VERIFY_IS_APPROX(m2, refMat2); + SparseMatrixType m3(rows, cols); + DenseMatrix refMat3(rows, cols); refMat3.setZero(); + Index n = internal::random(1,10); + for(Index k=0; k(0,outer-1); + Index o2 = internal::random(0,outer-1); + if(SparseMatrixType::IsRowMajor) + { + m3.innerVector(o1) = m2.row(o2); + refMat3.row(o1) = refMat2.row(o2); + } + else + { + m3.innerVector(o1) = m2.col(o2); + refMat3.col(o1) = refMat2.col(o2); + } + if(internal::random()) + m3.makeCompressed(); + } + if(m3.nonZeros()>0) + VERIFY_IS_APPROX(m3, refMat3); + } + } +} + +void test_sparse_block() +{ + for(int i = 0; i < g_repeat; i++) { + int r = Eigen::internal::random(1,200), c = Eigen::internal::random(1,200); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + EIGEN_UNUSED_VARIABLE(r+c); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(1, 1)) )); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(8, 8)) )); + CALL_SUBTEST_1(( sparse_block(SparseMatrix(r, c)) )); + CALL_SUBTEST_2(( sparse_block(SparseMatrix, ColMajor>(r, c)) )); + CALL_SUBTEST_2(( sparse_block(SparseMatrix, RowMajor>(r, c)) )); + + CALL_SUBTEST_3(( sparse_block(SparseMatrix(r, c)) )); + CALL_SUBTEST_3(( sparse_block(SparseMatrix(r, c)) )); + + r = Eigen::internal::random(1,100); + c = Eigen::internal::random(1,100); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + + CALL_SUBTEST_4(( sparse_block(SparseMatrix(short(r), short(c))) )); + CALL_SUBTEST_4(( sparse_block(SparseMatrix(short(r), short(c))) )); + } +} diff --git a/external/eigen3/test/sparse_permutations.cpp b/external/eigen3/test/sparse_permutations.cpp index e4ce1d6..b82ccef 100644 --- a/external/eigen3/test/sparse_permutations.cpp +++ b/external/eigen3/test/sparse_permutations.cpp @@ -1,25 +1,57 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2011 Gael Guennebaud +// Copyright (C) 2011-2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +static long int nb_transposed_copies; +#define EIGEN_SPARSE_TRANSPOSED_COPY_PLUGIN {nb_transposed_copies++;} +#define VERIFY_TRANSPOSITION_COUNT(XPR,N) {\ + nb_transposed_copies = 0; \ + XPR; \ + if(nb_transposed_copies!=N) std::cerr << "nb_transposed_copies == " << nb_transposed_copies << "\n"; \ + VERIFY( (#XPR) && nb_transposed_copies==N ); \ + } + #include "sparse.h" -template void sparse_permutations(const SparseMatrixType& ref) +template +bool is_sorted(const T& mat) { + for(Index k = 0; k=it.index()) + return false; + prev = it.index(); + } + } + return true; +} + +template +typename internal::nested_eval::type eval(const T &xpr) { - typedef typename SparseMatrixType::Index Index; + VERIFY( int(internal::nested_eval::type::Flags&RowMajorBit) == int(internal::evaluator::Flags&RowMajorBit) ); + return xpr; +} +template void sparse_permutations(const SparseMatrixType& ref) +{ const Index rows = ref.rows(); const Index cols = ref.cols(); typedef typename SparseMatrixType::Scalar Scalar; - typedef typename SparseMatrixType::Index Index; - typedef SparseMatrix OtherSparseMatrixType; + typedef typename SparseMatrixType::StorageIndex StorageIndex; + typedef SparseMatrix OtherSparseMatrixType; typedef Matrix DenseMatrix; - typedef Matrix VectorI; + typedef Matrix VectorI; +// bool IsRowMajor1 = SparseMatrixType::IsRowMajor; +// bool IsRowMajor2 = OtherSparseMatrixType::IsRowMajor; double density = (std::max)(8./(rows*cols), 0.01); @@ -44,58 +76,69 @@ template void sparse_permutations(c randomPermutationVector(pi, cols); p.indices() = pi; - res = mat*p; + VERIFY( is_sorted( ::eval(mat*p) )); + VERIFY( is_sorted( res = mat*p )); + VERIFY_TRANSPOSITION_COUNT( ::eval(mat*p), 0); + //VERIFY_TRANSPOSITION_COUNT( res = mat*p, IsRowMajor ? 1 : 0 ); res_d = mat_d*p; VERIFY(res.isApprox(res_d) && "mat*p"); - res = p*mat; + VERIFY( is_sorted( ::eval(p*mat) )); + VERIFY( is_sorted( res = p*mat )); + VERIFY_TRANSPOSITION_COUNT( ::eval(p*mat), 0); res_d = p*mat_d; VERIFY(res.isApprox(res_d) && "p*mat"); - res = mat*p.inverse(); + VERIFY( is_sorted( (mat*p).eval() )); + VERIFY( is_sorted( res = mat*p.inverse() )); + VERIFY_TRANSPOSITION_COUNT( ::eval(mat*p.inverse()), 0); res_d = mat*p.inverse(); VERIFY(res.isApprox(res_d) && "mat*inv(p)"); - res = p.inverse()*mat; + VERIFY( is_sorted( (p*mat+p*mat).eval() )); + VERIFY( is_sorted( res = p.inverse()*mat )); + VERIFY_TRANSPOSITION_COUNT( ::eval(p.inverse()*mat), 0); res_d = p.inverse()*mat_d; VERIFY(res.isApprox(res_d) && "inv(p)*mat"); - res = mat.twistedBy(p); + VERIFY( is_sorted( (p * mat * p.inverse()).eval() )); + VERIFY( is_sorted( res = mat.twistedBy(p) )); + VERIFY_TRANSPOSITION_COUNT( ::eval(p * mat * p.inverse()), 0); res_d = (p * mat_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "p*mat*inv(p)"); - res = mat.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p_null) )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint upper to full"); - res = mat.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p_null) )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint lower to full"); - res = up.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = up.template selfadjointView().twistedBy(p_null) )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "upper selfadjoint to full"); - res = lo.template selfadjointView().twistedBy(p_null); + VERIFY( is_sorted( res = lo.template selfadjointView().twistedBy(p_null) )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "lower selfadjoint full"); - res = mat.template selfadjointView(); + VERIFY( is_sorted( res = mat.template selfadjointView() )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint upper to full"); - res = mat.template selfadjointView(); + VERIFY( is_sorted( res = mat.template selfadjointView() )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "full selfadjoint lower to full"); - res = up.template selfadjointView(); + VERIFY( is_sorted( res = up.template selfadjointView() )); res_d = up_sym_d; VERIFY(res.isApprox(res_d) && "upper selfadjoint to full"); - res = lo.template selfadjointView(); + VERIFY( is_sorted( res = lo.template selfadjointView() )); res_d = lo_sym_d; VERIFY(res.isApprox(res_d) && "lower selfadjoint full"); @@ -152,19 +195,19 @@ template void sparse_permutations(c VERIFY(res.isApprox(res_d) && "upper selfadjoint twisted to lower"); - res = mat.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p) )); res_d = (p * up_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "full selfadjoint upper twisted to full"); - res = mat.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = mat.template selfadjointView().twistedBy(p) )); res_d = (p * lo_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "full selfadjoint lower twisted to full"); - res = up.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = up.template selfadjointView().twistedBy(p) )); res_d = (p * up_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "upper selfadjoint twisted to full"); - res = lo.template selfadjointView().twistedBy(p); + VERIFY( is_sorted( res = lo.template selfadjointView().twistedBy(p) )); res_d = (p * lo_sym_d) * p.inverse(); VERIFY(res.isApprox(res_d) && "lower selfadjoint twisted to full"); } @@ -184,4 +227,10 @@ void test_sparse_permutations() CALL_SUBTEST_1(( sparse_permutations_all(s) )); CALL_SUBTEST_2(( sparse_permutations_all >(s) )); } + + VERIFY((internal::is_same,OnTheRight,false,SparseShape>::ReturnType, + internal::nested_eval,PermutationMatrix,AliasFreeProduct>,1>::type>::value)); + + VERIFY((internal::is_same,OnTheLeft,false,SparseShape>::ReturnType, + internal::nested_eval,SparseMatrix,AliasFreeProduct>,1>::type>::value)); } diff --git a/external/eigen3/test/sparse_product.cpp b/external/eigen3/test/sparse_product.cpp index a2ea9d5..1975867 100644 --- a/external/eigen3/test/sparse_product.cpp +++ b/external/eigen3/test/sparse_product.cpp @@ -7,37 +7,29 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#include "sparse.h" +static long int nb_temporaries; -template struct test_outer; +inline void on_temporary_creation() { + // here's a great place to set a breakpoint when debugging failures in this test! + nb_temporaries++; +} -template struct test_outer { - static void run(SparseMatrixType& m2, SparseMatrixType& m4, DenseMatrix& refMat2, DenseMatrix& refMat4) { - typedef typename SparseMatrixType::Index Index; - Index c = internal::random(0,m2.cols()-1); - Index c1 = internal::random(0,m2.cols()-1); - VERIFY_IS_APPROX(m4=m2.col(c)*refMat2.col(c1).transpose(), refMat4=refMat2.col(c)*refMat2.col(c1).transpose()); - VERIFY_IS_APPROX(m4=refMat2.col(c1)*m2.col(c).transpose(), refMat4=refMat2.col(c1)*refMat2.col(c).transpose()); - } -}; - -template struct test_outer { - static void run(SparseMatrixType& m2, SparseMatrixType& m4, DenseMatrix& refMat2, DenseMatrix& refMat4) { - typedef typename SparseMatrixType::Index Index; - Index r = internal::random(0,m2.rows()-1); - Index c1 = internal::random(0,m2.cols()-1); - VERIFY_IS_APPROX(m4=m2.row(r).transpose()*refMat2.col(c1).transpose(), refMat4=refMat2.row(r).transpose()*refMat2.col(c1).transpose()); - VERIFY_IS_APPROX(m4=refMat2.col(c1)*m2.row(r), refMat4=refMat2.col(c1)*refMat2.row(r)); +#define EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN { on_temporary_creation(); } + +#include "sparse.h" + +#define VERIFY_EVALUATION_COUNT(XPR,N) {\ + nb_temporaries = 0; \ + CALL_SUBTEST( XPR ); \ + if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ + VERIFY( (#XPR) && nb_temporaries==N ); \ } -}; -// (m2,m4,refMat2,refMat4,dv1); -// VERIFY_IS_APPROX(m4=m2.innerVector(c)*dv1.transpose(), refMat4=refMat2.colVector(c)*dv1.transpose()); -// VERIFY_IS_APPROX(m4=dv1*mcm.col(c).transpose(), refMat4=dv1*refMat2.col(c).transpose()); + template void sparse_product() { - typedef typename SparseMatrixType::Index Index; + typedef typename SparseMatrixType::StorageIndex StorageIndex; Index n = 100; const Index rows = internal::random(1,n); const Index cols = internal::random(1,n); @@ -45,12 +37,12 @@ template void sparse_product() typedef typename SparseMatrixType::Scalar Scalar; enum { Flags = SparseMatrixType::Flags }; - double density = (std::max)(8./(rows*cols), 0.1); + double density = (std::max)(8./(rows*cols), 0.2); typedef Matrix DenseMatrix; typedef Matrix DenseVector; typedef Matrix RowDenseVector; - typedef SparseVector ColSpVector; - typedef SparseVector RowSpVector; + typedef SparseVector ColSpVector; + typedef SparseVector RowSpVector; Scalar s1 = internal::random(); Scalar s2 = internal::random(); @@ -93,33 +85,124 @@ template void sparse_product() VERIFY_IS_APPROX(m4 = m2*m3/s1, refMat4 = refMat2*refMat3/s1); VERIFY_IS_APPROX(m4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1); VERIFY_IS_APPROX(m4 = s2*m2*m3*s1, refMat4 = s2*refMat2*refMat3*s1); + VERIFY_IS_APPROX(m4 = (m2+m2)*m3, refMat4 = (refMat2+refMat2)*refMat3); + VERIFY_IS_APPROX(m4 = m2*m3.leftCols(cols/2), refMat4 = refMat2*refMat3.leftCols(cols/2)); + VERIFY_IS_APPROX(m4 = m2*(m3+m3).leftCols(cols/2), refMat4 = refMat2*(refMat3+refMat3).leftCols(cols/2)); VERIFY_IS_APPROX(m4=(m2*m3).pruned(0), refMat4=refMat2*refMat3); VERIFY_IS_APPROX(m4=(m2t.transpose()*m3).pruned(0), refMat4=refMat2t.transpose()*refMat3); VERIFY_IS_APPROX(m4=(m2t.transpose()*m3t.transpose()).pruned(0), refMat4=refMat2t.transpose()*refMat3t.transpose()); VERIFY_IS_APPROX(m4=(m2*m3t.transpose()).pruned(0), refMat4=refMat2*refMat3t.transpose()); + // make sure the right product implementation is called: + if((!SparseMatrixType::IsRowMajor) && m2.rows()<=m3.cols()) + { + VERIFY_EVALUATION_COUNT(m4 = m2*m3, 3); // 1 temp for the result + 2 for transposing and get a sorted result. + VERIFY_EVALUATION_COUNT(m4 = (m2*m3).pruned(0), 1); + VERIFY_EVALUATION_COUNT(m4 = (m2*m3).eval().pruned(0), 4); + } + + // and that pruning is effective: + { + DenseMatrix Ad(2,2); + Ad << -1, 1, 1, 1; + SparseMatrixType As(Ad.sparseView()), B(2,2); + VERIFY_IS_EQUAL( (As*As.transpose()).eval().nonZeros(), 4); + VERIFY_IS_EQUAL( (Ad*Ad.transpose()).eval().sparseView().eval().nonZeros(), 2); + VERIFY_IS_EQUAL( (As*As.transpose()).pruned(1e-6).eval().nonZeros(), 2); + } + + // dense ?= sparse * sparse + VERIFY_IS_APPROX(dm4 =m2*m3, refMat4 =refMat2*refMat3); + VERIFY_IS_APPROX(dm4+=m2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4-=m2*m3, refMat4-=refMat2*refMat3); + VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3, refMat4 =refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3, refMat4+=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3, refMat4-=refMat2t.transpose()*refMat3); + VERIFY_IS_APPROX(dm4 =m2t.transpose()*m3t.transpose(), refMat4 =refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4+=m2t.transpose()*m3t.transpose(), refMat4+=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2t.transpose()*m3t.transpose(), refMat4-=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4 =m2*m3t.transpose(), refMat4 =refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4+=m2*m3t.transpose(), refMat4+=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4-=m2*m3t.transpose(), refMat4-=refMat2*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1); + // test aliasing m4 = m2; refMat4 = refMat2; VERIFY_IS_APPROX(m4=m4*m3, refMat4=refMat4*refMat3); - // sparse * dense + // sparse * dense matrix VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3); VERIFY_IS_APPROX(dm4=m2*refMat3t.transpose(), refMat4=refMat2*refMat3t.transpose()); VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3, refMat4=refMat2t.transpose()*refMat3); VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose()); + VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3); + VERIFY_IS_APPROX(dm4=dm4+m2*refMat3, refMat4=refMat4+refMat2*refMat3); + VERIFY_IS_APPROX(dm4+=m2*refMat3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4-=m2*refMat3, refMat4-=refMat2*refMat3); + VERIFY_IS_APPROX(dm4.noalias()+=m2*refMat3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4.noalias()-=m2*refMat3, refMat4-=refMat2*refMat3); VERIFY_IS_APPROX(dm4=m2*(refMat3+refMat3), refMat4=refMat2*(refMat3+refMat3)); VERIFY_IS_APPROX(dm4=m2t.transpose()*(refMat3+refMat5)*0.5, refMat4=refMat2t.transpose()*(refMat3+refMat5)*0.5); + + // sparse * dense vector + VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3.col(0), refMat4.col(0)=refMat2*refMat3.col(0)); + VERIFY_IS_APPROX(dm4.col(0)=m2*refMat3t.transpose().col(0), refMat4.col(0)=refMat2*refMat3t.transpose().col(0)); + VERIFY_IS_APPROX(dm4.col(0)=m2t.transpose()*refMat3.col(0), refMat4.col(0)=refMat2t.transpose()*refMat3.col(0)); + VERIFY_IS_APPROX(dm4.col(0)=m2t.transpose()*refMat3t.transpose().col(0), refMat4.col(0)=refMat2t.transpose()*refMat3t.transpose().col(0)); // dense * sparse VERIFY_IS_APPROX(dm4=refMat2*m3, refMat4=refMat2*refMat3); + VERIFY_IS_APPROX(dm4=dm4+refMat2*m3, refMat4=refMat4+refMat2*refMat3); + VERIFY_IS_APPROX(dm4+=refMat2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4-=refMat2*m3, refMat4-=refMat2*refMat3); + VERIFY_IS_APPROX(dm4.noalias()+=refMat2*m3, refMat4+=refMat2*refMat3); + VERIFY_IS_APPROX(dm4.noalias()-=refMat2*m3, refMat4-=refMat2*refMat3); VERIFY_IS_APPROX(dm4=refMat2*m3t.transpose(), refMat4=refMat2*refMat3t.transpose()); VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3, refMat4=refMat2t.transpose()*refMat3); VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose()); // sparse * dense and dense * sparse outer product - test_outer::run(m2,m4,refMat2,refMat4); + { + Index c = internal::random(0,depth-1); + Index r = internal::random(0,rows-1); + Index c1 = internal::random(0,cols-1); + Index r1 = internal::random(0,depth-1); + DenseMatrix dm5 = DenseMatrix::Random(depth, cols); + + VERIFY_IS_APPROX( m4=m2.col(c)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX( m4=m2.middleCols(c,1)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=m2.col(c)*dm5.col(c1).transpose(), refMat4=refMat2.col(c)*dm5.col(c1).transpose()); + + VERIFY_IS_APPROX(m4=dm5.col(c1)*m2.col(c).transpose(), refMat4=dm5.col(c1)*refMat2.col(c).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(m4=dm5.col(c1)*m2.middleCols(c,1).transpose(), refMat4=dm5.col(c1)*refMat2.col(c).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=dm5.col(c1)*m2.col(c).transpose(), refMat4=dm5.col(c1)*refMat2.col(c).transpose()); + + VERIFY_IS_APPROX( m4=dm5.row(r1).transpose()*m2.col(c).transpose(), refMat4=dm5.row(r1).transpose()*refMat2.col(c).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=dm5.row(r1).transpose()*m2.col(c).transpose(), refMat4=dm5.row(r1).transpose()*refMat2.col(c).transpose()); + + VERIFY_IS_APPROX( m4=m2.row(r).transpose()*dm5.col(c1).transpose(), refMat4=refMat2.row(r).transpose()*dm5.col(c1).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX( m4=m2.middleRows(r,1).transpose()*dm5.col(c1).transpose(), refMat4=refMat2.row(r).transpose()*dm5.col(c1).transpose()); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=m2.row(r).transpose()*dm5.col(c1).transpose(), refMat4=refMat2.row(r).transpose()*dm5.col(c1).transpose()); + + VERIFY_IS_APPROX( m4=dm5.col(c1)*m2.row(r), refMat4=dm5.col(c1)*refMat2.row(r)); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX( m4=dm5.col(c1)*m2.middleRows(r,1), refMat4=dm5.col(c1)*refMat2.row(r)); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=dm5.col(c1)*m2.row(r), refMat4=dm5.col(c1)*refMat2.row(r)); + + VERIFY_IS_APPROX( m4=dm5.row(r1).transpose()*m2.row(r), refMat4=dm5.row(r1).transpose()*refMat2.row(r)); + VERIFY_IS_EQUAL(m4.nonZeros(), (refMat4.array()!=0).count()); + VERIFY_IS_APPROX(dm4=dm5.row(r1).transpose()*m2.row(r), refMat4=dm5.row(r1).transpose()*refMat2.row(r)); + } VERIFY_IS_APPROX(m6=m6*m6, refMat6=refMat6*refMat6); @@ -131,11 +214,11 @@ template void sparse_product() RowSpVector rv0(depth), rv1; RowDenseVector drv0(depth), drv1(rv1); initSparse(2*density,drv0, rv0); - - VERIFY_IS_APPROX(cv1=rv0*m3, dcv1=drv0*refMat3); + + VERIFY_IS_APPROX(cv1=m3*cv0, dcv1=refMat3*dcv0); VERIFY_IS_APPROX(rv1=rv0*m3, drv1=drv0*refMat3); - VERIFY_IS_APPROX(cv1=m3*cv0, dcv1=refMat3*dcv0); VERIFY_IS_APPROX(cv1=m3t.adjoint()*cv0, dcv1=refMat3t.adjoint()*dcv0); + VERIFY_IS_APPROX(cv1=rv0*m3, dcv1=drv0*refMat3); VERIFY_IS_APPROX(rv1=m3*cv0, drv1=refMat3*dcv0); } @@ -158,12 +241,16 @@ template void sparse_product() // also check with a SparseWrapper: DenseVector v1 = DenseVector::Random(cols); DenseVector v2 = DenseVector::Random(rows); + DenseVector v3 = DenseVector::Random(rows); VERIFY_IS_APPROX(m3=m2*v1.asDiagonal(), refM3=refM2*v1.asDiagonal()); VERIFY_IS_APPROX(m3=m2.transpose()*v2.asDiagonal(), refM3=refM2.transpose()*v2.asDiagonal()); VERIFY_IS_APPROX(m3=v2.asDiagonal()*m2, refM3=v2.asDiagonal()*refM2); VERIFY_IS_APPROX(m3=v1.asDiagonal()*m2.transpose(), refM3=v1.asDiagonal()*refM2.transpose()); VERIFY_IS_APPROX(m3=v2.asDiagonal()*m2*v1.asDiagonal(), refM3=v2.asDiagonal()*refM2*v1.asDiagonal()); + + VERIFY_IS_APPROX(v2=m2*v1.asDiagonal()*v1, refM2*v1.asDiagonal()*v1); + VERIFY_IS_APPROX(v3=v2.asDiagonal()*m2*v1, v2.asDiagonal()*refM2*v1); // evaluate to a dense matrix to check the .row() and .col() iterator functions VERIFY_IS_APPROX(d3=m2*d1, refM3=refM2*d1); @@ -172,7 +259,7 @@ template void sparse_product() VERIFY_IS_APPROX(d3=d1*m2.transpose(), refM3=d1*refM2.transpose()); } - // test self adjoint products + // test self-adjoint and triangular-view products { DenseMatrix b = DenseMatrix::Random(rows, rows); DenseMatrix x = DenseMatrix::Random(rows, rows); @@ -180,9 +267,12 @@ template void sparse_product() DenseMatrix refUp = DenseMatrix::Zero(rows, rows); DenseMatrix refLo = DenseMatrix::Zero(rows, rows); DenseMatrix refS = DenseMatrix::Zero(rows, rows); + DenseMatrix refA = DenseMatrix::Zero(rows, rows); SparseMatrixType mUp(rows, rows); SparseMatrixType mLo(rows, rows); SparseMatrixType mS(rows, rows); + SparseMatrixType mA(rows, rows); + initSparse(density, refA, mA); do { initSparse(density, refUp, mUp, ForceRealDiag|/*ForceNonZeroDiag|*/MakeUpperTriangular); } while (refUp.isZero()); @@ -195,26 +285,45 @@ template void sparse_product() for (int k=0; k()*b, refX=refS*b); VERIFY_IS_APPROX(x=mLo.template selfadjointView()*b, refX=refS*b); VERIFY_IS_APPROX(x=mS.template selfadjointView()*b, refX=refS*b); + + VERIFY_IS_APPROX(x=b * mUp.template selfadjointView(), refX=b*refS); + VERIFY_IS_APPROX(x=b * mLo.template selfadjointView(), refX=b*refS); + VERIFY_IS_APPROX(x=b * mS.template selfadjointView(), refX=b*refS); + + VERIFY_IS_APPROX(x.noalias()+=mUp.template selfadjointView()*b, refX+=refS*b); + VERIFY_IS_APPROX(x.noalias()-=mLo.template selfadjointView()*b, refX-=refS*b); + VERIFY_IS_APPROX(x.noalias()+=mS.template selfadjointView()*b, refX+=refS*b); - // sparse selfadjointView * sparse + // sparse selfadjointView with sparse matrices SparseMatrixType mSres(rows,rows); VERIFY_IS_APPROX(mSres = mLo.template selfadjointView()*mS, refX = refLo.template selfadjointView()*refS); - // sparse * sparse selfadjointview VERIFY_IS_APPROX(mSres = mS * mLo.template selfadjointView(), refX = refS * refLo.template selfadjointView()); + + // sparse triangularView with dense matrices + VERIFY_IS_APPROX(x=mA.template triangularView()*b, refX=refA.template triangularView()*b); + VERIFY_IS_APPROX(x=mA.template triangularView()*b, refX=refA.template triangularView()*b); + VERIFY_IS_APPROX(x=b*mA.template triangularView(), refX=b*refA.template triangularView()); + VERIFY_IS_APPROX(x=b*mA.template triangularView(), refX=b*refA.template triangularView()); + + // sparse triangularView with sparse matrices + VERIFY_IS_APPROX(mSres = mA.template triangularView()*mS, refX = refA.template triangularView()*refS); + VERIFY_IS_APPROX(mSres = mS * mA.template triangularView(), refX = refS * refA.template triangularView()); + VERIFY_IS_APPROX(mSres = mA.template triangularView()*mS, refX = refA.template triangularView()*refS); + VERIFY_IS_APPROX(mSres = mS * mA.template triangularView(), refX = refS * refA.template triangularView()); } - } // New test for Bug in SparseTimeDenseProduct @@ -239,11 +348,35 @@ template void sparse_produc VERIFY_IS_APPROX( m4(0,0), 0.0 ); } +template +void bug_942() +{ + typedef Matrix Vector; + typedef SparseMatrix ColSpMat; + typedef SparseMatrix RowSpMat; + ColSpMat cmA(1,1); + cmA.insert(0,0) = 1; + + RowSpMat rmA(1,1); + rmA.insert(0,0) = 1; + + Vector d(1); + d[0] = 2; + + double res = 2; + + VERIFY_IS_APPROX( ( cmA*d.asDiagonal() ).eval().coeff(0,0), res ); + VERIFY_IS_APPROX( ( d.asDiagonal()*rmA ).eval().coeff(0,0), res ); + VERIFY_IS_APPROX( ( rmA*d.asDiagonal() ).eval().coeff(0,0), res ); + VERIFY_IS_APPROX( ( d.asDiagonal()*cmA ).eval().coeff(0,0), res ); +} + void test_sparse_product() { for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( (sparse_product >()) ); CALL_SUBTEST_1( (sparse_product >()) ); + CALL_SUBTEST_1( (bug_942()) ); CALL_SUBTEST_2( (sparse_product, ColMajor > >()) ); CALL_SUBTEST_2( (sparse_product, RowMajor > >()) ); CALL_SUBTEST_3( (sparse_product >()) ); diff --git a/external/eigen3/test/sparse_ref.cpp b/external/eigen3/test/sparse_ref.cpp new file mode 100644 index 0000000..5e96072 --- /dev/null +++ b/external/eigen3/test/sparse_ref.cpp @@ -0,0 +1,139 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 20015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// This unit test cannot be easily written to work with EIGEN_DEFAULT_TO_ROW_MAJOR +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#undef EIGEN_DEFAULT_TO_ROW_MAJOR +#endif + +static long int nb_temporaries; + +inline void on_temporary_creation() { + // here's a great place to set a breakpoint when debugging failures in this test! + nb_temporaries++; +} + +#define EIGEN_SPARSE_CREATE_TEMPORARY_PLUGIN { on_temporary_creation(); } + +#include "main.h" +#include + +#define VERIFY_EVALUATION_COUNT(XPR,N) {\ + nb_temporaries = 0; \ + CALL_SUBTEST( XPR ); \ + if(nb_temporaries!=N) std::cerr << "nb_temporaries == " << nb_temporaries << "\n"; \ + VERIFY( (#XPR) && nb_temporaries==N ); \ + } + +template void check_const_correctness(const PlainObjectType&) +{ + // verify that ref-to-const don't have LvalueBit + typedef typename internal::add_const::type ConstPlainObjectType; + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(internal::traits >::Flags & LvalueBit) ); + VERIFY( !(Ref::Flags & LvalueBit) ); + VERIFY( !(Ref::Flags & LvalueBit) ); +} + +template +EIGEN_DONT_INLINE void call_ref_1(Ref > a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +template +EIGEN_DONT_INLINE void call_ref_2(const Ref >& a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +template +EIGEN_DONT_INLINE void call_ref_3(const Ref, StandardCompressedFormat>& a, const B &b) { + VERIFY(a.isCompressed()); + VERIFY_IS_EQUAL(a.toDense(),b.toDense()); +} + +template +EIGEN_DONT_INLINE void call_ref_4(Ref > a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +template +EIGEN_DONT_INLINE void call_ref_5(const Ref >& a, const B &b) { VERIFY_IS_EQUAL(a.toDense(),b.toDense()); } + +void call_ref() +{ + SparseMatrix A = MatrixXf::Random(10,10).sparseView(0.5,1); + SparseMatrix B = MatrixXf::Random(10,10).sparseView(0.5,1); + SparseMatrix C = MatrixXf::Random(10,10).sparseView(0.5,1); + C.reserve(VectorXi::Constant(C.outerSize(), 2)); + const SparseMatrix& Ac(A); + Block > Ab(A,0,1, 3,3); + const Block > Abc(A,0,1,3,3); + SparseVector vc = VectorXf::Random(10).sparseView(0.5,1); + SparseVector vr = VectorXf::Random(10).sparseView(0.5,1); + SparseMatrix AA = A*A; + + + VERIFY_EVALUATION_COUNT( call_ref_1(A, A), 0); +// VERIFY_EVALUATION_COUNT( call_ref_1(Ac, Ac), 0); // does not compile on purpose + VERIFY_EVALUATION_COUNT( call_ref_2(A, A), 0); + VERIFY_EVALUATION_COUNT( call_ref_3(A, A), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(A.transpose(), A.transpose()), 1); + VERIFY_EVALUATION_COUNT( call_ref_3(A.transpose(), A.transpose()), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(Ac,Ac), 0); + VERIFY_EVALUATION_COUNT( call_ref_3(Ac,Ac), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(A+A,2*Ac), 1); + VERIFY_EVALUATION_COUNT( call_ref_3(A+A,2*Ac), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(B, B), 1); + VERIFY_EVALUATION_COUNT( call_ref_3(B, B), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(B.transpose(), B.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_3(B.transpose(), B.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(A*A, AA), 3); + VERIFY_EVALUATION_COUNT( call_ref_3(A*A, AA), 3); + + VERIFY(!C.isCompressed()); + VERIFY_EVALUATION_COUNT( call_ref_3(C, C), 1); + + Ref > Ar(A); + VERIFY_IS_APPROX(Ar+Ar, A+A); + VERIFY_EVALUATION_COUNT( call_ref_1(Ar, A), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(Ar, A), 0); + + Ref > Br(B); + VERIFY_EVALUATION_COUNT( call_ref_1(Br.transpose(), Br.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(Br, Br), 1); + VERIFY_EVALUATION_COUNT( call_ref_2(Br.transpose(), Br.transpose()), 0); + + Ref > Arc(A); +// VERIFY_EVALUATION_COUNT( call_ref_1(Arc, Arc), 0); // does not compile on purpose + VERIFY_EVALUATION_COUNT( call_ref_2(Arc, Arc), 0); + + VERIFY_EVALUATION_COUNT( call_ref_2(A.middleCols(1,3), A.middleCols(1,3)), 0); + + VERIFY_EVALUATION_COUNT( call_ref_2(A.col(2), A.col(2)), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vr.transpose(), vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_2(vr, vr.transpose()), 0); + + VERIFY_EVALUATION_COUNT( call_ref_2(A.block(1,1,3,3), A.block(1,1,3,3)), 1); // should be 0 (allocate starts/nnz only) + + VERIFY_EVALUATION_COUNT( call_ref_4(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_4(vr, vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(vc, vc), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(vr, vr.transpose()), 0); + VERIFY_EVALUATION_COUNT( call_ref_4(A.col(2), A.col(2)), 0); + VERIFY_EVALUATION_COUNT( call_ref_5(A.col(2), A.col(2)), 0); + // VERIFY_EVALUATION_COUNT( call_ref_4(A.row(2), A.row(2).transpose()), 1); // does not compile on purpose + VERIFY_EVALUATION_COUNT( call_ref_5(A.row(2), A.row(2).transpose()), 1); +} + +void test_sparse_ref() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( check_const_correctness(SparseMatrix()) ); + CALL_SUBTEST_1( check_const_correctness(SparseMatrix()) ); + CALL_SUBTEST_2( call_ref() ); + + CALL_SUBTEST_3( check_const_correctness(SparseVector()) ); + CALL_SUBTEST_3( check_const_correctness(SparseVector()) ); + } +} diff --git a/external/eigen3/test/sparse_solver.h b/external/eigen3/test/sparse_solver.h index e1619d6..5145bc3 100644 --- a/external/eigen3/test/sparse_solver.h +++ b/external/eigen3/test/sparse_solver.h @@ -9,68 +9,123 @@ #include "sparse.h" #include +#include + +template +void solve_with_guess(IterativeSolverBase& solver, const MatrixBase& b, const Guess& g, Result &x) { + if(internal::random()) + { + // With a temporary through evaluator + x = solver.derived().solveWithGuess(b,g) + Result::Zero(x.rows(), x.cols()); + } + else + { + // direct evaluation within x through Assignment + x = solver.derived().solveWithGuess(b.derived(),g); + } +} + +template +void solve_with_guess(SparseSolverBase& solver, const MatrixBase& b, const Guess& , Result& x) { + if(internal::random()) + x = solver.derived().solve(b) + Result::Zero(x.rows(), x.cols()); + else + x = solver.derived().solve(b); +} + +template +void solve_with_guess(SparseSolverBase& solver, const SparseMatrixBase& b, const Guess& , Result& x) { + x = solver.derived().solve(b); +} template void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A, const Rhs& b, const DenseMat& dA, const DenseRhs& db) { typedef typename Solver::MatrixType Mat; typedef typename Mat::Scalar Scalar; + typedef typename Mat::StorageIndex StorageIndex; - DenseRhs refX = dA.lu().solve(db); + DenseRhs refX = dA.householderQr().solve(db); { - Rhs x(b.rows(), b.cols()); + Rhs x(A.cols(), b.cols()); Rhs oldb = b; solver.compute(A); if (solver.info() != Success) { - std::cerr << "sparse solver testing: factorization failed (check_sparse_solving)\n"; - exit(0); - return; + std::cerr << "ERROR | sparse solver testing, factorization failed (" << typeid(Solver).name() << ")\n"; + VERIFY(solver.info() == Success); } x = solver.solve(b); if (solver.info() != Success) { - std::cerr << "sparse solver testing: solving failed\n"; + std::cerr << "WARNING | sparse solver testing: solving failed (" << typeid(Solver).name() << ")\n"; return; } VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(x.isApprox(refX,test_precision())); + x.setZero(); + solve_with_guess(solver, b, x, x); + VERIFY(solver.info() == Success && "solving failed when using analyzePattern/factorize API"); + VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!"); VERIFY(x.isApprox(refX,test_precision())); + x.setZero(); // test the analyze/factorize API solver.analyzePattern(A); solver.factorize(A); - if (solver.info() != Success) - { - std::cerr << "sparse solver testing: factorization failed (check_sparse_solving)\n"; - exit(0); - return; - } + VERIFY(solver.info() == Success && "factorization failed when using analyzePattern/factorize API"); x = solver.solve(b); - if (solver.info() != Success) - { - std::cerr << "sparse solver testing: solving failed\n"; - return; - } + VERIFY(solver.info() == Success && "solving failed when using analyzePattern/factorize API"); VERIFY(oldb.isApprox(b) && "sparse solver testing: the rhs should not be modified!"); - VERIFY(x.isApprox(refX,test_precision())); - } - - // test dense Block as the result and rhs: - { - DenseRhs x(db.rows(), db.cols()); - DenseRhs oldb(db); + x.setZero(); - x.block(0,0,x.rows(),x.cols()) = solver.solve(db.block(0,0,db.rows(),db.cols())); - VERIFY(oldb.isApprox(db) && "sparse solver testing: the rhs should not be modified!"); - VERIFY(x.isApprox(refX,test_precision())); + // test with Map + MappedSparseMatrix Am(A.rows(), A.cols(), A.nonZeros(), const_cast(A.outerIndexPtr()), const_cast(A.innerIndexPtr()), const_cast(A.valuePtr())); + solver.compute(Am); + VERIFY(solver.info() == Success && "factorization failed when using Map"); + DenseRhs dx(refX); + dx.setZero(); + Map xm(dx.data(), dx.rows(), dx.cols()); + Map bm(db.data(), db.rows(), db.cols()); + xm = solver.solve(bm); + VERIFY(solver.info() == Success && "solving failed when using Map"); + VERIFY(oldb.isApprox(bm) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(xm.isApprox(refX,test_precision())); } - + // if not too large, do some extra check: if(A.rows()<2000) { + // test initialization ctor + { + Rhs x(b.rows(), b.cols()); + Solver solver2(A); + VERIFY(solver2.info() == Success); + x = solver2.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test dense Block as the result and rhs: + { + DenseRhs x(refX.rows(), refX.cols()); + DenseRhs oldb(db); + x.setZero(); + x.block(0,0,x.rows(),x.cols()) = solver.solve(db.block(0,0,db.rows(),db.cols())); + VERIFY(oldb.isApprox(db) && "sparse solver testing: the rhs should not be modified!"); + VERIFY(x.isApprox(refX,test_precision())); + } + + // test uncompressed inputs + { + Mat A2 = A; + A2.reserve((ArrayXf::Random(A.outerSize())+2).template cast().eval()); + solver.compute(A2); + Rhs x = solver.solve(b); + VERIFY(x.isApprox(refX,test_precision())); + } // test expression as input { @@ -86,41 +141,35 @@ void check_sparse_solving(Solver& solver, const typename Solver::MatrixType& A, } template -void check_sparse_solving_real_cases(Solver& solver, const typename Solver::MatrixType& A, const Rhs& b, const Rhs& refX) +void check_sparse_solving_real_cases(Solver& solver, const typename Solver::MatrixType& A, const Rhs& b, const typename Solver::MatrixType& fullA, const Rhs& refX) { typedef typename Solver::MatrixType Mat; typedef typename Mat::Scalar Scalar; typedef typename Mat::RealScalar RealScalar; - Rhs x(b.rows(), b.cols()); - + Rhs x(A.cols(), b.cols()); + solver.compute(A); if (solver.info() != Success) { - std::cerr << "sparse solver testing: factorization failed (check_sparse_solving_real_cases)\n"; - exit(0); - return; + std::cerr << "ERROR | sparse solver testing, factorization failed (" << typeid(Solver).name() << ")\n"; + VERIFY(solver.info() == Success); } x = solver.solve(b); + if (solver.info() != Success) { - std::cerr << "sparse solver testing: solving failed\n"; + std::cerr << "WARNING | sparse solver testing, solving failed (" << typeid(Solver).name() << ")\n"; return; } - RealScalar res_error; - // Compute the norm of the relative error - if(refX.size() != 0) - res_error = (refX - x).norm()/refX.norm(); - else - { - // Compute the relative residual norm - res_error = (b - A * x).norm()/b.norm(); - } - if (res_error > test_precision() ){ - std::cerr << "Test " << g_test_stack.back() << " failed in "EI_PP_MAKE_STRING(__FILE__) - << " (" << EI_PP_MAKE_STRING(__LINE__) << ")" << std::endl << std::endl; - abort(); + RealScalar res_error = (fullA*x-b).norm()/b.norm(); + VERIFY( (res_error <= test_precision() ) && "sparse solver failed without noticing it"); + + + if(refX.size() != 0 && (refX - x).norm()/refX.norm() > test_precision()) + { + std::cerr << "WARNING | found solution is different from the provided reference one\n"; } } @@ -133,7 +182,7 @@ void check_sparse_determinant(Solver& solver, const typename Solver::MatrixType& solver.compute(A); if (solver.info() != Success) { - std::cerr << "sparse solver testing: factorization failed (check_sparse_determinant)\n"; + std::cerr << "WARNING | sparse solver testing: factorization failed (check_sparse_determinant)\n"; return; } @@ -150,7 +199,7 @@ void check_sparse_abs_determinant(Solver& solver, const typename Solver::MatrixT solver.compute(A); if (solver.info() != Success) { - std::cerr << "sparse solver testing: factorization failed (check_sparse_abs_determinant)\n"; + std::cerr << "WARNING | sparse solver testing: factorization failed (check_sparse_abs_determinant)\n"; return; } @@ -197,13 +246,33 @@ inline std::string get_matrixfolder() mat_folder = mat_folder + static_cast("/real/"); return mat_folder; } +std::string sym_to_string(int sym) +{ + if(sym==Symmetric) return "Symmetric "; + if(sym==SPD) return "SPD "; + return ""; +} +template +std::string solver_stats(const IterativeSolverBase &solver) +{ + std::stringstream ss; + ss << solver.iterations() << " iters, error: " << solver.error(); + return ss.str(); +} +template +std::string solver_stats(const SparseSolverBase &/*solver*/) +{ + return ""; +} #endif -template void check_sparse_spd_solving(Solver& solver) +template void check_sparse_spd_solving(Solver& solver, int maxSize = 300, int maxRealWorldSize = 100000) { typedef typename Solver::MatrixType Mat; typedef typename Mat::Scalar Scalar; - typedef SparseMatrix SpMat; + typedef typename Mat::StorageIndex StorageIndex; + typedef SparseMatrix SpMat; + typedef SparseVector SpVec; typedef Matrix DenseMatrix; typedef Matrix DenseVector; @@ -211,7 +280,7 @@ template void check_sparse_spd_solving(Solver& solver) Mat A, halfA; DenseMatrix dA; for (int i = 0; i < g_repeat; i++) { - int size = generate_sparse_spd_problem(solver, A, halfA, dA); + int size = generate_sparse_spd_problem(solver, A, halfA, dA, maxSize); // generate the right hand sides int rhsCols = internal::random(1,16); @@ -220,13 +289,17 @@ template void check_sparse_spd_solving(Solver& solver) DenseVector b = DenseVector::Random(size); DenseMatrix dB(size,rhsCols); initSparse(density, dB, B, ForceNonZeroDiag); + SpVec c = B.col(0); + DenseVector dc = dB.col(0); - check_sparse_solving(solver, A, b, dA, b); - check_sparse_solving(solver, halfA, b, dA, b); - check_sparse_solving(solver, A, dB, dA, dB); - check_sparse_solving(solver, halfA, dB, dA, dB); - check_sparse_solving(solver, A, B, dA, dB); - check_sparse_solving(solver, halfA, B, dA, dB); + CALL_SUBTEST( check_sparse_solving(solver, A, b, dA, b) ); + CALL_SUBTEST( check_sparse_solving(solver, halfA, b, dA, b) ); + CALL_SUBTEST( check_sparse_solving(solver, A, dB, dA, dB) ); + CALL_SUBTEST( check_sparse_solving(solver, halfA, dB, dA, dB) ); + CALL_SUBTEST( check_sparse_solving(solver, A, B, dA, dB) ); + CALL_SUBTEST( check_sparse_solving(solver, halfA, B, dA, dB) ); + CALL_SUBTEST( check_sparse_solving(solver, A, c, dA, dc) ); + CALL_SUBTEST( check_sparse_solving(solver, halfA, c, dA, dc) ); // check only once if(i==0) @@ -237,25 +310,44 @@ template void check_sparse_spd_solving(Solver& solver) } // First, get the folder -#ifdef TEST_REAL_CASES - if (internal::is_same::value - || internal::is_same >::value) - return ; - - std::string mat_folder = get_matrixfolder(); - MatrixMarketIterator it(mat_folder); - for (; it; ++it) +#ifdef TEST_REAL_CASES + // Test real problems with double precision only + if (internal::is_same::Real, double>::value) { - if (it.sym() == SPD){ - Mat halfA; - PermutationMatrix pnull; - halfA.template selfadjointView() = it.matrix().template triangularView().twistedBy(pnull); - - std::cout<< " ==== SOLVING WITH MATRIX " << it.matname() << " ==== \n"; - check_sparse_solving_real_cases(solver, it.matrix(), it.rhs(), it.refX()); - check_sparse_solving_real_cases(solver, halfA, it.rhs(), it.refX()); + std::string mat_folder = get_matrixfolder(); + MatrixMarketIterator it(mat_folder); + for (; it; ++it) + { + if (it.sym() == SPD){ + A = it.matrix(); + if(A.diagonal().size() <= maxRealWorldSize) + { + DenseVector b = it.rhs(); + DenseVector refX = it.refX(); + PermutationMatrix pnull; + halfA.resize(A.rows(), A.cols()); + if(Solver::UpLo == (Lower|Upper)) + halfA = A; + else + halfA.template selfadjointView() = A.template triangularView().twistedBy(pnull); + + std::cout << "INFO | Testing " << sym_to_string(it.sym()) << "sparse problem " << it.matname() + << " (" << A.rows() << "x" << A.cols() << ") using " << typeid(Solver).name() << "..." << std::endl; + CALL_SUBTEST( check_sparse_solving_real_cases(solver, A, b, A, refX) ); + std::string stats = solver_stats(solver); + if(stats.size()>0) + std::cout << "INFO | " << stats << std::endl; + CALL_SUBTEST( check_sparse_solving_real_cases(solver, halfA, b, A, refX) ); + } + else + { + std::cout << "INFO | Skip sparse problem \"" << it.matname() << "\" (too large)" << std::endl; + } + } } } +#else + EIGEN_UNUSED_VARIABLE(maxRealWorldSize); #endif } @@ -277,37 +369,39 @@ template void check_sparse_spd_determinant(Solver& solver) } template -int generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300) +Index generate_sparse_square_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag) { typedef typename Solver::MatrixType Mat; typedef typename Mat::Scalar Scalar; - int size = internal::random(1,maxSize); + Index size = internal::random(1,maxSize); double density = (std::max)(8./(size*size), 0.01); A.resize(size,size); dA.resize(size,size); - initSparse(density, dA, A, ForceNonZeroDiag); + initSparse(density, dA, A, options); return size; } struct prune_column { - int m_col; - prune_column(int col) : m_col(col) {} + Index m_col; + prune_column(Index col) : m_col(col) {} template - bool operator()(int, int col, const Scalar&) const { + bool operator()(Index, Index col, const Scalar&) const { return col != m_col; } }; -template void check_sparse_square_solving(Solver& solver, bool checkDeficient = false) + +template void check_sparse_square_solving(Solver& solver, int maxSize = 300, int maxRealWorldSize = 100000, bool checkDeficient = false) { typedef typename Solver::MatrixType Mat; typedef typename Mat::Scalar Scalar; - typedef SparseMatrix SpMat; + typedef SparseMatrix SpMat; + typedef SparseVector SpVec; typedef Matrix DenseMatrix; typedef Matrix DenseVector; @@ -316,7 +410,7 @@ template void check_sparse_square_solving(Solver& solver, bool Mat A; DenseMatrix dA; for (int i = 0; i < g_repeat; i++) { - int size = generate_sparse_square_problem(solver, A, dA); + Index size = generate_sparse_square_problem(solver, A, dA, maxSize); A.makeCompressed(); DenseVector b = DenseVector::Random(size); @@ -325,9 +419,12 @@ template void check_sparse_square_solving(Solver& solver, bool double density = (std::max)(8./(size*rhsCols), 0.1); initSparse(density, dB, B, ForceNonZeroDiag); B.makeCompressed(); - check_sparse_solving(solver, A, b, dA, b); - check_sparse_solving(solver, A, dB, dA, dB); - check_sparse_solving(solver, A, B, dA, dB); + SpVec c = B.col(0); + DenseVector dc = dB.col(0); + CALL_SUBTEST(check_sparse_solving(solver, A, b, dA, b)); + CALL_SUBTEST(check_sparse_solving(solver, A, dB, dA, dB)); + CALL_SUBTEST(check_sparse_solving(solver, A, B, dA, dB)); + CALL_SUBTEST(check_sparse_solving(solver, A, c, dA, dc)); // check only once if(i==0) @@ -337,7 +434,7 @@ template void check_sparse_square_solving(Solver& solver, bool } // regression test for Bug 792 (structurally rank deficient matrices): if(checkDeficient && size>1) { - int col = internal::random(0,size-1); + Index col = internal::random(0,int(size-1)); A.prune(prune_column(col)); solver.compute(A); VERIFY_IS_EQUAL(solver.info(), NumericalIssue); @@ -346,17 +443,33 @@ template void check_sparse_square_solving(Solver& solver, bool // First, get the folder #ifdef TEST_REAL_CASES - if (internal::is_same::value - || internal::is_same >::value) - return ; - - std::string mat_folder = get_matrixfolder(); - MatrixMarketIterator it(mat_folder); - for (; it; ++it) + // Test real problems with double precision only + if (internal::is_same::Real, double>::value) { - std::cout<< " ==== SOLVING WITH MATRIX " << it.matname() << " ==== \n"; - check_sparse_solving_real_cases(solver, it.matrix(), it.rhs(), it.refX()); + std::string mat_folder = get_matrixfolder(); + MatrixMarketIterator it(mat_folder); + for (; it; ++it) + { + A = it.matrix(); + if(A.diagonal().size() <= maxRealWorldSize) + { + DenseVector b = it.rhs(); + DenseVector refX = it.refX(); + std::cout << "INFO | Testing " << sym_to_string(it.sym()) << "sparse problem " << it.matname() + << " (" << A.rows() << "x" << A.cols() << ") using " << typeid(Solver).name() << "..." << std::endl; + CALL_SUBTEST(check_sparse_solving_real_cases(solver, A, b, A, refX)); + std::string stats = solver_stats(solver); + if(stats.size()>0) + std::cout << "INFO | " << stats << std::endl; + } + else + { + std::cout << "INFO | SKIP sparse problem \"" << it.matname() << "\" (too large)" << std::endl; + } + } } +#else + EIGEN_UNUSED_VARIABLE(maxRealWorldSize); #endif } @@ -366,13 +479,20 @@ template void check_sparse_square_determinant(Solver& solver) typedef typename Solver::MatrixType Mat; typedef typename Mat::Scalar Scalar; typedef Matrix DenseMatrix; - - // generate the problem - Mat A; - DenseMatrix dA; - generate_sparse_square_problem(solver, A, dA, 30); - A.makeCompressed(); + for (int i = 0; i < g_repeat; i++) { + // generate the problem + Mat A; + DenseMatrix dA; + + int size = internal::random(1,30); + dA.setRandom(size,size); + + dA = (dA.array().abs()<0.3).select(0,dA); + dA.diagonal() = (dA.diagonal().array()==0).select(1,dA.diagonal()); + A = dA.sparseView(); + A.makeCompressed(); + check_sparse_determinant(solver, A, dA); } } @@ -383,13 +503,63 @@ template void check_sparse_square_abs_determinant(Solver& solve typedef typename Mat::Scalar Scalar; typedef Matrix DenseMatrix; - // generate the problem - Mat A; - DenseMatrix dA; - generate_sparse_square_problem(solver, A, dA, 30); - A.makeCompressed(); for (int i = 0; i < g_repeat; i++) { + // generate the problem + Mat A; + DenseMatrix dA; + generate_sparse_square_problem(solver, A, dA, 30); + A.makeCompressed(); check_sparse_abs_determinant(solver, A, dA); } } +template +void generate_sparse_leastsquare_problem(Solver&, typename Solver::MatrixType& A, DenseMat& dA, int maxSize = 300, int options = ForceNonZeroDiag) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + + int rows = internal::random(1,maxSize); + int cols = internal::random(1,rows); + double density = (std::max)(8./(rows*cols), 0.01); + + A.resize(rows,cols); + dA.resize(rows,cols); + + initSparse(density, dA, A, options); +} + +template void check_sparse_leastsquare_solving(Solver& solver) +{ + typedef typename Solver::MatrixType Mat; + typedef typename Mat::Scalar Scalar; + typedef SparseMatrix SpMat; + typedef Matrix DenseMatrix; + typedef Matrix DenseVector; + + int rhsCols = internal::random(1,16); + + Mat A; + DenseMatrix dA; + for (int i = 0; i < g_repeat; i++) { + generate_sparse_leastsquare_problem(solver, A, dA); + + A.makeCompressed(); + DenseVector b = DenseVector::Random(A.rows()); + DenseMatrix dB(A.rows(),rhsCols); + SpMat B(A.rows(),rhsCols); + double density = (std::max)(8./(A.rows()*rhsCols), 0.1); + initSparse(density, dB, B, ForceNonZeroDiag); + B.makeCompressed(); + check_sparse_solving(solver, A, b, dA, b); + check_sparse_solving(solver, A, dB, dA, dB); + check_sparse_solving(solver, A, B, dA, dB); + + // check only once + if(i==0) + { + b = DenseVector::Zero(A.rows()); + check_sparse_solving(solver, A, b, dA, b); + } + } +} diff --git a/external/eigen3/test/sparse_vector.cpp b/external/eigen3/test/sparse_vector.cpp index 0c94768..b3e1dda 100644 --- a/external/eigen3/test/sparse_vector.cpp +++ b/external/eigen3/test/sparse_vector.cpp @@ -9,22 +9,22 @@ #include "sparse.h" -template void sparse_vector(int rows, int cols) +template void sparse_vector(int rows, int cols) { double densityMat = (std::max)(8./(rows*cols), 0.01); - double densityVec = (std::max)(8./float(rows), 0.1); + double densityVec = (std::max)(8./(rows), 0.1); typedef Matrix DenseMatrix; typedef Matrix DenseVector; - typedef SparseVector SparseVectorType; - typedef SparseMatrix SparseMatrixType; + typedef SparseVector SparseVectorType; + typedef SparseMatrix SparseMatrixType; Scalar eps = 1e-6; SparseMatrixType m1(rows,rows); SparseVectorType v1(rows), v2(rows), v3(rows); DenseMatrix refM1 = DenseMatrix::Zero(rows, rows); DenseVector refV1 = DenseVector::Random(rows), - refV2 = DenseVector::Random(rows), - refV3 = DenseVector::Random(rows); + refV2 = DenseVector::Random(rows), + refV3 = DenseVector::Random(rows); std::vector zerocoords, nonzerocoords; initSparse(densityVec, refV1, v1, &zerocoords, &nonzerocoords); @@ -52,6 +52,20 @@ template void sparse_vector(int rows, int cols) } } VERIFY_IS_APPROX(v1, refV1); + + // test coeffRef with reallocation + { + SparseVectorType v4(rows); + DenseVector v5 = DenseVector::Zero(rows); + for(int k=0; k(0,rows-1); + Scalar v = internal::random(); + v4.coeffRef(i) += v; + v5.coeffRef(i) += v; + } + VERIFY_IS_APPROX(v4,v5); + } v1.coeffRef(nonzerocoords[0]) = Scalar(5); refV1.coeffRef(nonzerocoords[0]) = Scalar(5); @@ -71,9 +85,12 @@ template void sparse_vector(int rows, int cols) VERIFY_IS_APPROX(v1.dot(v2), refV1.dot(refV2)); VERIFY_IS_APPROX(v1.dot(refV2), refV1.dot(refV2)); + VERIFY_IS_APPROX(m1*v2, refM1*refV2); VERIFY_IS_APPROX(v1.dot(m1*v2), refV1.dot(refM1*refV2)); - int i = internal::random(0,rows-1); - VERIFY_IS_APPROX(v1.dot(m1.col(i)), refV1.dot(refM1.col(i))); + { + int i = internal::random(0,rows-1); + VERIFY_IS_APPROX(v1.dot(m1.col(i)), refV1.dot(refM1.col(i))); + } VERIFY_IS_APPROX(v1.squaredNorm(), refV1.squaredNorm()); @@ -96,15 +113,51 @@ template void sparse_vector(int rows, int cols) VERIFY_IS_APPROX(refV3 = v1.transpose(),v1.toDense()); VERIFY_IS_APPROX(DenseVector(v1),v1.toDense()); + // test conservative resize + { + std::vector inc; + if(rows > 3) + inc.push_back(-3); + inc.push_back(0); + inc.push_back(3); + inc.push_back(1); + inc.push_back(10); + + for(std::size_t i = 0; i< inc.size(); i++) { + StorageIndex incRows = inc[i]; + SparseVectorType vec1(rows); + DenseVector refVec1 = DenseVector::Zero(rows); + initSparse(densityVec, refVec1, vec1); + + vec1.conservativeResize(rows+incRows); + refVec1.conservativeResize(rows+incRows); + if (incRows > 0) refVec1.tail(incRows).setZero(); + + VERIFY_IS_APPROX(vec1, refVec1); + + // Insert new values + if (incRows > 0) + vec1.insert(vec1.rows()-1) = refVec1(refVec1.rows()-1) = 1; + + VERIFY_IS_APPROX(vec1, refVec1); + } + } + } void test_sparse_vector() { for(int i = 0; i < g_repeat; i++) { + int r = Eigen::internal::random(1,500), c = Eigen::internal::random(1,500); + if(Eigen::internal::random(0,4) == 0) { + r = c; // check square matrices in 25% of tries + } + EIGEN_UNUSED_VARIABLE(r+c); + CALL_SUBTEST_1(( sparse_vector(8, 8) )); - CALL_SUBTEST_2(( sparse_vector, int>(16, 16) )); - CALL_SUBTEST_1(( sparse_vector(299, 535) )); - CALL_SUBTEST_1(( sparse_vector(299, 535) )); + CALL_SUBTEST_2(( sparse_vector, int>(r, c) )); + CALL_SUBTEST_1(( sparse_vector(r, c) )); + CALL_SUBTEST_1(( sparse_vector(r, c) )); } } diff --git a/external/eigen3/test/sparselu.cpp b/external/eigen3/test/sparselu.cpp index b3d67ae..bd000ba 100644 --- a/external/eigen3/test/sparselu.cpp +++ b/external/eigen3/test/sparselu.cpp @@ -3,25 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // SparseLU solve does not accept column major matrices for the destination. // However, as expected, the generic check_sparse_square_solving routines produces row-major @@ -41,9 +25,9 @@ template void test_sparselu_T() SparseLU, AMDOrdering > sparselu_amd; SparseLU, NaturalOrdering > sparselu_natural; - check_sparse_square_solving(sparselu_colamd, true); - check_sparse_square_solving(sparselu_amd, true); - check_sparse_square_solving(sparselu_natural,true); + check_sparse_square_solving(sparselu_colamd, 300, 100000, true); + check_sparse_square_solving(sparselu_amd, 300, 10000, true); + check_sparse_square_solving(sparselu_natural, 300, 2000, true); check_sparse_square_abs_determinant(sparselu_colamd); check_sparse_square_abs_determinant(sparselu_amd); diff --git a/external/eigen3/test/sparseqr.cpp b/external/eigen3/test/sparseqr.cpp index 451c0e7..e8605fd 100644 --- a/external/eigen3/test/sparseqr.cpp +++ b/external/eigen3/test/sparseqr.cpp @@ -10,11 +10,12 @@ #include template -int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows = 300) +int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows = 300, int maxCols = 150) { + eigen_assert(maxRows >= maxCols); typedef typename MatrixType::Scalar Scalar; int rows = internal::random(1,maxRows); - int cols = internal::random(1,rows); + int cols = internal::random(1,maxCols); double density = (std::max)(8./(rows*cols), 0.01); A.resize(rows,cols); @@ -53,7 +54,7 @@ template void test_sparseqr_scalar() b = dA * DenseVector::Random(A.cols()); solver.compute(A); - if(internal::random(0,1)>0.5) + if(internal::random(0,1)>0.5f) solver.factorize(A); // this checks that calling analyzePattern is not needed if the pattern do not change. if (solver.info() != Success) { @@ -88,6 +89,11 @@ template void test_sparseqr_scalar() QtQ = Q * Q.adjoint(); idM.resize(Q.rows(), Q.rows()); idM.setIdentity(); VERIFY(idM.isApprox(QtQ)); + + // Q to dense + DenseMat dQ; + dQ = solver.matrixQ(); + VERIFY_IS_APPROX(Q, dQ); } void test_sparseqr() { diff --git a/external/eigen3/test/spqr_support.cpp b/external/eigen3/test/spqr_support.cpp index b8980e0..81e63b6 100644 --- a/external/eigen3/test/spqr_support.cpp +++ b/external/eigen3/test/spqr_support.cpp @@ -5,6 +5,8 @@ // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed + +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS #include "sparse.h" #include @@ -18,8 +20,8 @@ int generate_sparse_rectangular_problem(MatrixType& A, DenseMat& dA, int maxRows int cols = internal::random(1,rows); double density = (std::max)(8./(rows*cols), 0.01); - A.resize(rows,rows); - dA.resize(rows,rows); + A.resize(rows,cols); + dA.resize(rows,cols); initSparse(density, dA, A,ForceNonZeroDiag); A.makeCompressed(); return rows; @@ -35,7 +37,7 @@ template void test_spqr_scalar() SPQR solver; generate_sparse_rectangular_problem(A,dA); - int m = A.rows(); + Index m = A.rows(); b = DenseVector::Random(m); solver.compute(A); if (solver.info() != Success) diff --git a/external/eigen3/test/stable_norm.cpp b/external/eigen3/test/stable_norm.cpp index 231dd91..c3eb5ff 100644 --- a/external/eigen3/test/stable_norm.cpp +++ b/external/eigen3/test/stable_norm.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2009-2014 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -9,14 +9,6 @@ #include "main.h" -// workaround aggressive optimization in ICC -template EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; } - -template bool isFinite(const T& x) -{ - return isNotNaN(sub(x,x)); -} - template EIGEN_DONT_INLINE T copy(const T& x) { return x; @@ -32,6 +24,8 @@ template void stable_norm(const MatrixType& m) typedef typename MatrixType::Index Index; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits::Real RealScalar; + + bool complex_real_product_ok = true; // Check the basic machine-dependent constants. { @@ -44,6 +38,16 @@ template void stable_norm(const MatrixType& m) VERIFY( (!(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5) || (it<=4 && ibeta <= 3 ) || it<2)) && "the stable norm algorithm cannot be guaranteed on this computer"); + + Scalar inf = std::numeric_limits::infinity(); + if(NumTraits::IsComplex && (numext::isnan)(inf*RealScalar(1)) ) + { + complex_real_product_ok = false; + static bool first = true; + if(first) + std::cerr << "WARNING: compiler mess up complex*real product, " << inf << " * " << 1.0 << " = " << inf*RealScalar(1) << std::endl; + first = false; + } } @@ -76,19 +80,19 @@ template void stable_norm(const MatrixType& m) RealScalar size = static_cast(m.size()); - // test isFinite - VERIFY(!isFinite( std::numeric_limits::infinity())); - VERIFY(!isFinite(sqrt(-abs(big)))); + // test numext::isfinite + VERIFY(!(numext::isfinite)( std::numeric_limits::infinity())); + VERIFY(!(numext::isfinite)(sqrt(-abs(big)))); // test overflow - VERIFY(isFinite(sqrt(size)*abs(big))); + VERIFY((numext::isfinite)(sqrt(size)*abs(big))); VERIFY_IS_NOT_APPROX(sqrt(copy(vbig.squaredNorm())), abs(sqrt(size)*big)); // here the default norm must fail VERIFY_IS_APPROX(vbig.stableNorm(), sqrt(size)*abs(big)); VERIFY_IS_APPROX(vbig.blueNorm(), sqrt(size)*abs(big)); VERIFY_IS_APPROX(vbig.hypotNorm(), sqrt(size)*abs(big)); // test underflow - VERIFY(isFinite(sqrt(size)*abs(small))); + VERIFY((numext::isfinite)(sqrt(size)*abs(small))); VERIFY_IS_NOT_APPROX(sqrt(copy(vsmall.squaredNorm())), abs(sqrt(size)*small)); // here the default norm must fail VERIFY_IS_APPROX(vsmall.stableNorm(), sqrt(size)*abs(small)); VERIFY_IS_APPROX(vsmall.blueNorm(), sqrt(size)*abs(small)); @@ -101,6 +105,79 @@ template void stable_norm(const MatrixType& m) VERIFY_IS_APPROX(vrand.rowwise().stableNorm(), vrand.rowwise().norm()); VERIFY_IS_APPROX(vrand.rowwise().blueNorm(), vrand.rowwise().norm()); VERIFY_IS_APPROX(vrand.rowwise().hypotNorm(), vrand.rowwise().norm()); + + // test NaN, +inf, -inf + MatrixType v; + Index i = internal::random(0,rows-1); + Index j = internal::random(0,cols-1); + + // NaN + { + v = vrand; + v(i,j) = std::numeric_limits::quiet_NaN(); + VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY((numext::isnan)(v.squaredNorm())); + VERIFY(!(numext::isfinite)(v.norm())); VERIFY((numext::isnan)(v.norm())); + VERIFY(!(numext::isfinite)(v.stableNorm())); VERIFY((numext::isnan)(v.stableNorm())); + VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm())); + VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm())); + } + + // +inf + { + v = vrand; + v(i,j) = std::numeric_limits::infinity(); + VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY(isPlusInf(v.squaredNorm())); + VERIFY(!(numext::isfinite)(v.norm())); VERIFY(isPlusInf(v.norm())); + VERIFY(!(numext::isfinite)(v.stableNorm())); + if(complex_real_product_ok){ + VERIFY(isPlusInf(v.stableNorm())); + } + VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY(isPlusInf(v.blueNorm())); + VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY(isPlusInf(v.hypotNorm())); + } + + // -inf + { + v = vrand; + v(i,j) = -std::numeric_limits::infinity(); + VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY(isPlusInf(v.squaredNorm())); + VERIFY(!(numext::isfinite)(v.norm())); VERIFY(isPlusInf(v.norm())); + VERIFY(!(numext::isfinite)(v.stableNorm())); + if(complex_real_product_ok) { + VERIFY(isPlusInf(v.stableNorm())); + } + VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY(isPlusInf(v.blueNorm())); + VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY(isPlusInf(v.hypotNorm())); + } + + // mix + { + Index i2 = internal::random(0,rows-1); + Index j2 = internal::random(0,cols-1); + v = vrand; + v(i,j) = -std::numeric_limits::infinity(); + v(i2,j2) = std::numeric_limits::quiet_NaN(); + VERIFY(!(numext::isfinite)(v.squaredNorm())); VERIFY((numext::isnan)(v.squaredNorm())); + VERIFY(!(numext::isfinite)(v.norm())); VERIFY((numext::isnan)(v.norm())); + VERIFY(!(numext::isfinite)(v.stableNorm())); VERIFY((numext::isnan)(v.stableNorm())); + VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm())); + VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm())); + } + + // stableNormalize[d] + { + VERIFY_IS_APPROX(vrand.stableNormalized(), vrand.normalized()); + MatrixType vcopy(vrand); + vcopy.stableNormalize(); + VERIFY_IS_APPROX(vcopy, vrand.normalized()); + VERIFY_IS_APPROX((vrand.stableNormalized()).norm(), RealScalar(1)); + VERIFY_IS_APPROX(vcopy.norm(), RealScalar(1)); + VERIFY_IS_APPROX((vbig.stableNormalized()).norm(), RealScalar(1)); + VERIFY_IS_APPROX((vsmall.stableNormalized()).norm(), RealScalar(1)); + RealScalar big_scaling = ((std::numeric_limits::max)() * RealScalar(1e-4)); + VERIFY_IS_APPROX(vbig/big_scaling, (vbig.stableNorm() * vbig.stableNormalized()).eval()/big_scaling); + VERIFY_IS_APPROX(vsmall, vsmall.stableNorm() * vsmall.stableNormalized()); + } } void test_stable_norm() diff --git a/external/eigen3/test/stdvector.cpp b/external/eigen3/test/stdvector.cpp index 6e173c6..50cb334 100644 --- a/external/eigen3/test/stdvector.cpp +++ b/external/eigen3/test/stdvector.cpp @@ -34,7 +34,7 @@ void check_stdvector_matrix(const MatrixType& m) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(MatrixType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) @@ -69,7 +69,7 @@ void check_stdvector_transform(const TransformType&) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(TransformType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(TransformType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) @@ -104,7 +104,7 @@ void check_stdvector_quaternion(const QuaternionType&) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(QuaternionType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(QuaternionType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) diff --git a/external/eigen3/test/stdvector_overload.cpp b/external/eigen3/test/stdvector_overload.cpp index 736ff0e..9596659 100644 --- a/external/eigen3/test/stdvector_overload.cpp +++ b/external/eigen3/test/stdvector_overload.cpp @@ -48,7 +48,7 @@ void check_stdvector_matrix(const MatrixType& m) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(MatrixType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(MatrixType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) @@ -83,7 +83,7 @@ void check_stdvector_transform(const TransformType&) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(TransformType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(TransformType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) @@ -118,7 +118,7 @@ void check_stdvector_quaternion(const QuaternionType&) VERIFY_IS_APPROX(v[21], y); v.push_back(x); VERIFY_IS_APPROX(v[22], x); - VERIFY((size_t)&(v[22]) == (size_t)&(v[21]) + sizeof(QuaternionType)); + VERIFY((internal::UIntPtr)&(v[22]) == (internal::UIntPtr)&(v[21]) + sizeof(QuaternionType)); // do a lot of push_back such that the vector gets internally resized // (with memory reallocation) diff --git a/external/eigen3/test/superlu_support.cpp b/external/eigen3/test/superlu_support.cpp index 3b16135..98a7bc5 100644 --- a/external/eigen3/test/superlu_support.cpp +++ b/external/eigen3/test/superlu_support.cpp @@ -7,6 +7,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS #include "sparse_solver.h" #include diff --git a/external/eigen3/test/svd_common.h b/external/eigen3/test/svd_common.h new file mode 100644 index 0000000..605d5df --- /dev/null +++ b/external/eigen3/test/svd_common.h @@ -0,0 +1,483 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef SVD_DEFAULT +#error a macro SVD_DEFAULT(MatrixType) must be defined prior to including svd_common.h +#endif + +#ifndef SVD_FOR_MIN_NORM +#error a macro SVD_FOR_MIN_NORM(MatrixType) must be defined prior to including svd_common.h +#endif + +#include "svd_fill.h" + +// Check that the matrix m is properly reconstructed and that the U and V factors are unitary +// The SVD must have already been computed. +template +void svd_check_full(const MatrixType& m, const SvdType& svd) +{ + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef Matrix MatrixUType; + typedef Matrix MatrixVType; + + MatrixType sigma = MatrixType::Zero(rows,cols); + sigma.diagonal() = svd.singularValues().template cast(); + MatrixUType u = svd.matrixU(); + MatrixVType v = svd.matrixV(); + RealScalar scaling = m.cwiseAbs().maxCoeff(); + if(scaling<(std::numeric_limits::min)()) + { + VERIFY(sigma.cwiseAbs().maxCoeff() <= (std::numeric_limits::min)()); + } + else + { + VERIFY_IS_APPROX(m/scaling, u * (sigma/scaling) * v.adjoint()); + } + VERIFY_IS_UNITARY(u); + VERIFY_IS_UNITARY(v); +} + +// Compare partial SVD defined by computationOptions to a full SVD referenceSvd +template +void svd_compare_to_full(const MatrixType& m, + unsigned int computationOptions, + const SvdType& referenceSvd) +{ + typedef typename MatrixType::RealScalar RealScalar; + Index rows = m.rows(); + Index cols = m.cols(); + Index diagSize = (std::min)(rows, cols); + RealScalar prec = test_precision(); + + SvdType svd(m, computationOptions); + + VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues()); + + if(computationOptions & (ComputeFullV|ComputeThinV)) + { + VERIFY( (svd.matrixV().adjoint()*svd.matrixV()).isIdentity(prec) ); + VERIFY_IS_APPROX( svd.matrixV().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint(), + referenceSvd.matrixV().leftCols(diagSize) * referenceSvd.singularValues().asDiagonal() * referenceSvd.matrixV().leftCols(diagSize).adjoint()); + } + + if(computationOptions & (ComputeFullU|ComputeThinU)) + { + VERIFY( (svd.matrixU().adjoint()*svd.matrixU()).isIdentity(prec) ); + VERIFY_IS_APPROX( svd.matrixU().leftCols(diagSize) * svd.singularValues().cwiseAbs2().asDiagonal() * svd.matrixU().leftCols(diagSize).adjoint(), + referenceSvd.matrixU().leftCols(diagSize) * referenceSvd.singularValues().cwiseAbs2().asDiagonal() * referenceSvd.matrixU().leftCols(diagSize).adjoint()); + } + + // The following checks are not critical. + // For instance, with Dived&Conquer SVD, if only the factor 'V' is computedt then different matrix-matrix product implementation will be used + // and the resulting 'V' factor might be significantly different when the SVD decomposition is not unique, especially with single precision float. + ++g_test_level; + if(computationOptions & ComputeFullU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU()); + if(computationOptions & ComputeThinU) VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize)); + if(computationOptions & ComputeFullV) VERIFY_IS_APPROX(svd.matrixV().cwiseAbs(), referenceSvd.matrixV().cwiseAbs()); + if(computationOptions & ComputeThinV) VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize)); + --g_test_level; +} + +// +template +void svd_least_square(const MatrixType& m, unsigned int computationOptions) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + typedef Matrix RhsType; + typedef Matrix SolutionType; + + RhsType rhs = RhsType::Random(rows, internal::random(1, cols)); + SvdType svd(m, computationOptions); + + if(internal::is_same::value) svd.setThreshold(1e-8); + else if(internal::is_same::value) svd.setThreshold(2e-4); + + SolutionType x = svd.solve(rhs); + + RealScalar residual = (m*x-rhs).norm(); + RealScalar rhs_norm = rhs.norm(); + if(!test_isMuchSmallerThan(residual,rhs.norm())) + { + // ^^^ If the residual is very small, then we have an exact solution, so we are already good. + + // evaluate normal equation which works also for least-squares solutions + if(internal::is_same::value || svd.rank()==m.diagonal().size()) + { + using std::sqrt; + // This test is not stable with single precision. + // This is probably because squaring m signicantly affects the precision. + if(internal::is_same::value) ++g_test_level; + + VERIFY_IS_APPROX(m.adjoint()*(m*x),m.adjoint()*rhs); + + if(internal::is_same::value) --g_test_level; + } + + // Check that there is no significantly better solution in the neighborhood of x + for(Index k=0;k::epsilon())*x.row(k); + RealScalar residual_y = (m*y-rhs).norm(); + VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y ); + if(internal::is_same::value) ++g_test_level; + VERIFY( test_isApprox(residual_y,residual) || residual < residual_y ); + if(internal::is_same::value) --g_test_level; + + y.row(k) = (RealScalar(1)-2*NumTraits::epsilon())*x.row(k); + residual_y = (m*y-rhs).norm(); + VERIFY( test_isMuchSmallerThan(abs(residual_y-residual), rhs_norm) || residual < residual_y ); + if(internal::is_same::value) ++g_test_level; + VERIFY( test_isApprox(residual_y,residual) || residual < residual_y ); + if(internal::is_same::value) --g_test_level; + } + } +} + +// check minimal norm solutions, the inoput matrix m is only used to recover problem size +template +void svd_min_norm(const MatrixType& m, unsigned int computationOptions) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + Index cols = m.cols(); + + enum { + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + typedef Matrix SolutionType; + + // generate a full-rank m x n problem with m MatrixType2; + typedef Matrix RhsType2; + typedef Matrix MatrixType2T; + Index rank = RankAtCompileTime2==Dynamic ? internal::random(1,cols) : Index(RankAtCompileTime2); + MatrixType2 m2(rank,cols); + int guard = 0; + do { + m2.setRandom(); + } while(SVD_FOR_MIN_NORM(MatrixType2)(m2).setThreshold(test_precision()).rank()!=rank && (++guard)<10); + VERIFY(guard<10); + + RhsType2 rhs2 = RhsType2::Random(rank); + // use QR to find a reference minimal norm solution + HouseholderQR qr(m2.adjoint()); + Matrix tmp = qr.matrixQR().topLeftCorner(rank,rank).template triangularView().adjoint().solve(rhs2); + tmp.conservativeResize(cols); + tmp.tail(cols-rank).setZero(); + SolutionType x21 = qr.householderQ() * tmp; + // now check with SVD + SVD_FOR_MIN_NORM(MatrixType2) svd2(m2, computationOptions); + SolutionType x22 = svd2.solve(rhs2); + VERIFY_IS_APPROX(m2*x21, rhs2); + VERIFY_IS_APPROX(m2*x22, rhs2); + VERIFY_IS_APPROX(x21, x22); + + // Now check with a rank deficient matrix + typedef Matrix MatrixType3; + typedef Matrix RhsType3; + Index rows3 = RowsAtCompileTime3==Dynamic ? internal::random(rank+1,2*cols) : Index(RowsAtCompileTime3); + Matrix C = Matrix::Random(rows3,rank); + MatrixType3 m3 = C * m2; + RhsType3 rhs3 = C * rhs2; + SVD_FOR_MIN_NORM(MatrixType3) svd3(m3, computationOptions); + SolutionType x3 = svd3.solve(rhs3); + VERIFY_IS_APPROX(m3*x3, rhs3); + VERIFY_IS_APPROX(m3*x21, rhs3); + VERIFY_IS_APPROX(m2*x3, rhs2); + VERIFY_IS_APPROX(x21, x3); +} + +// Check full, compare_to_full, least_square, and min_norm for all possible compute-options +template +void svd_test_all_computation_options(const MatrixType& m, bool full_only) +{ +// if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols()) +// return; + SvdType fullSvd(m, ComputeFullU|ComputeFullV); + CALL_SUBTEST(( svd_check_full(m, fullSvd) )); + CALL_SUBTEST(( svd_least_square(m, ComputeFullU | ComputeFullV) )); + CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeFullV) )); + + #if defined __INTEL_COMPILER + // remark #111: statement is unreachable + #pragma warning disable 111 + #endif + if(full_only) + return; + + CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullV, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, 0, fullSvd) )); + + if (MatrixType::ColsAtCompileTime == Dynamic) { + // thin U/V are only available with dynamic number of columns + CALL_SUBTEST(( svd_compare_to_full(m, ComputeFullU|ComputeThinV, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinV, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeFullV, fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU , fullSvd) )); + CALL_SUBTEST(( svd_compare_to_full(m, ComputeThinU|ComputeThinV, fullSvd) )); + + CALL_SUBTEST(( svd_least_square(m, ComputeFullU | ComputeThinV) )); + CALL_SUBTEST(( svd_least_square(m, ComputeThinU | ComputeFullV) )); + CALL_SUBTEST(( svd_least_square(m, ComputeThinU | ComputeThinV) )); + + CALL_SUBTEST(( svd_min_norm(m, ComputeFullU | ComputeThinV) )); + CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeFullV) )); + CALL_SUBTEST(( svd_min_norm(m, ComputeThinU | ComputeThinV) )); + + // test reconstruction + typedef typename MatrixType::Index Index; + Index diagSize = (std::min)(m.rows(), m.cols()); + SvdType svd(m, ComputeThinU | ComputeThinV); + VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint()); + } +} + + +// work around stupid msvc error when constructing at compile time an expression that involves +// a division by zero, even if the numeric type has floating point +template +EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); } + +// workaround aggressive optimization in ICC +template EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; } + +// all this function does is verify we don't iterate infinitely on nan/inf values +template +void svd_inf_nan() +{ + SvdType svd; + typedef typename MatrixType::Scalar Scalar; + Scalar some_inf = Scalar(1) / zero(); + VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf)); + svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV); + + Scalar nan = std::numeric_limits::quiet_NaN(); + VERIFY(nan != nan); + svd.compute(MatrixType::Constant(10,10,nan), ComputeFullU | ComputeFullV); + + MatrixType m = MatrixType::Zero(10,10); + m(internal::random(0,9), internal::random(0,9)) = some_inf; + svd.compute(m, ComputeFullU | ComputeFullV); + + m = MatrixType::Zero(10,10); + m(internal::random(0,9), internal::random(0,9)) = nan; + svd.compute(m, ComputeFullU | ComputeFullV); + + // regression test for bug 791 + m.resize(3,3); + m << 0, 2*NumTraits::epsilon(), 0.5, + 0, -0.5, 0, + nan, 0, 0; + svd.compute(m, ComputeFullU | ComputeFullV); + + m.resize(4,4); + m << 1, 0, 0, 0, + 0, 3, 1, 2e-308, + 1, 0, 1, nan, + 0, nan, nan, 0; + svd.compute(m, ComputeFullU | ComputeFullV); +} + +// Regression test for bug 286: JacobiSVD loops indefinitely with some +// matrices containing denormal numbers. +template +void svd_underoverflow() +{ +#if defined __INTEL_COMPILER +// shut up warning #239: floating point underflow +#pragma warning push +#pragma warning disable 239 +#endif + Matrix2d M; + M << -7.90884e-313, -4.94e-324, + 0, 5.60844e-313; + SVD_DEFAULT(Matrix2d) svd; + svd.compute(M,ComputeFullU|ComputeFullV); + CALL_SUBTEST( svd_check_full(M,svd) ); + + // Check all 2x2 matrices made with the following coefficients: + VectorXd value_set(9); + value_set << 0, 1, -1, 5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324, -4.94e-223, 4.94e-223; + Array4i id(0,0,0,0); + int k = 0; + do + { + M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3)); + svd.compute(M,ComputeFullU|ComputeFullV); + CALL_SUBTEST( svd_check_full(M,svd) ); + + id(k)++; + if(id(k)>=value_set.size()) + { + while(k<3 && id(k)>=value_set.size()) id(++k)++; + id.head(k).setZero(); + k=0; + } + + } while((id +void svd_all_trivial_2x2( void (*cb)(const MatrixType&,bool) ) +{ + MatrixType M; + VectorXd value_set(3); + value_set << 0, 1, -1; + Array4i id(0,0,0,0); + int k = 0; + do + { + M << value_set(id(0)), value_set(id(1)), value_set(id(2)), value_set(id(3)); + + cb(M,false); + + id(k)++; + if(id(k)>=value_set.size()) + { + while(k<3 && id(k)>=value_set.size()) id(++k)++; + id.head(k).setZero(); + k=0; + } + + } while((id +void svd_preallocate() +{ + Vector3f v(3.f, 2.f, 1.f); + MatrixXf m = v.asDiagonal(); + + internal::set_is_malloc_allowed(false); + VERIFY_RAISES_ASSERT(VectorXf tmp(10);) + SVD_DEFAULT(MatrixXf) svd; + internal::set_is_malloc_allowed(true); + svd.compute(m); + VERIFY_IS_APPROX(svd.singularValues(), v); + + SVD_DEFAULT(MatrixXf) svd2(3,3); + internal::set_is_malloc_allowed(false); + svd2.compute(m); + internal::set_is_malloc_allowed(true); + VERIFY_IS_APPROX(svd2.singularValues(), v); + VERIFY_RAISES_ASSERT(svd2.matrixU()); + VERIFY_RAISES_ASSERT(svd2.matrixV()); + svd2.compute(m, ComputeFullU | ComputeFullV); + VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity()); + VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity()); + internal::set_is_malloc_allowed(false); + svd2.compute(m); + internal::set_is_malloc_allowed(true); + + SVD_DEFAULT(MatrixXf) svd3(3,3,ComputeFullU|ComputeFullV); + internal::set_is_malloc_allowed(false); + svd2.compute(m); + internal::set_is_malloc_allowed(true); + VERIFY_IS_APPROX(svd2.singularValues(), v); + VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity()); + VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity()); + internal::set_is_malloc_allowed(false); + svd2.compute(m, ComputeFullU|ComputeFullV); + internal::set_is_malloc_allowed(true); +} + +template +void svd_verify_assert(const MatrixType& m) +{ + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + Index rows = m.rows(); + Index cols = m.cols(); + + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime + }; + + typedef Matrix RhsType; + RhsType rhs(rows); + SvdType svd; + VERIFY_RAISES_ASSERT(svd.matrixU()) + VERIFY_RAISES_ASSERT(svd.singularValues()) + VERIFY_RAISES_ASSERT(svd.matrixV()) + VERIFY_RAISES_ASSERT(svd.solve(rhs)) + MatrixType a = MatrixType::Zero(rows, cols); + a.setZero(); + svd.compute(a, 0); + VERIFY_RAISES_ASSERT(svd.matrixU()) + VERIFY_RAISES_ASSERT(svd.matrixV()) + svd.singularValues(); + VERIFY_RAISES_ASSERT(svd.solve(rhs)) + + if (ColsAtCompileTime == Dynamic) + { + svd.compute(a, ComputeThinU); + svd.matrixU(); + VERIFY_RAISES_ASSERT(svd.matrixV()) + VERIFY_RAISES_ASSERT(svd.solve(rhs)) + svd.compute(a, ComputeThinV); + svd.matrixV(); + VERIFY_RAISES_ASSERT(svd.matrixU()) + VERIFY_RAISES_ASSERT(svd.solve(rhs)) + } + else + { + VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU)) + VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV)) + } +} + +#undef SVD_DEFAULT +#undef SVD_FOR_MIN_NORM diff --git a/external/eigen3/test/svd_fill.h b/external/eigen3/test/svd_fill.h new file mode 100644 index 0000000..3877c0c --- /dev/null +++ b/external/eigen3/test/svd_fill.h @@ -0,0 +1,119 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014-2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +template +Array four_denorms(); + +template<> +Array4f four_denorms() { return Array4f(5.60844e-39f, -5.60844e-39f, 4.94e-44f, -4.94e-44f); } +template<> +Array4d four_denorms() { return Array4d(5.60844e-313, -5.60844e-313, 4.94e-324, -4.94e-324); } +template +Array four_denorms() { return four_denorms().cast(); } + +template +void svd_fill_random(MatrixType &m, int Option = 0) +{ + using std::pow; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + Index diagSize = (std::min)(m.rows(), m.cols()); + RealScalar s = std::numeric_limits::max_exponent10/4; + s = internal::random(1,s); + Matrix d = Matrix::Random(diagSize); + for(Index k=0; k(-s,s)); + + bool dup = internal::random(0,10) < 3; + bool unit_uv = internal::random(0,10) < (dup?7:3); // if we duplicate some diagonal entries, then increase the chance to preserve them using unitary U and V factors + + // duplicate some singular values + if(dup) + { + Index n = internal::random(0,d.size()-1); + for(Index i=0; i(0,d.size()-1)) = d(internal::random(0,d.size()-1)); + } + + Matrix U(m.rows(),diagSize); + Matrix VT(diagSize,m.cols()); + if(unit_uv) + { + // in very rare cases let's try with a pure diagonal matrix + if(internal::random(0,10) < 1) + { + U.setIdentity(); + VT.setIdentity(); + } + else + { + createRandomPIMatrixOfRank(diagSize,U.rows(), U.cols(), U); + createRandomPIMatrixOfRank(diagSize,VT.rows(), VT.cols(), VT); + } + } + else + { + U.setRandom(); + VT.setRandom(); + } + + Matrix samples(9); + samples << 0, four_denorms(), + -RealScalar(1)/NumTraits::highest(), RealScalar(1)/NumTraits::highest(), (std::numeric_limits::min)(), pow((std::numeric_limits::min)(),0.8); + + if(Option==Symmetric) + { + m = U * d.asDiagonal() * U.transpose(); + + // randomly nullify some rows/columns + { + Index count = internal::random(-diagSize,diagSize); + for(Index k=0; k(0,diagSize-1); + m.row(i).setZero(); + m.col(i).setZero(); + } + if(count<0) + // (partly) cancel some coeffs + if(!(dup && unit_uv)) + { + + Index n = internal::random(0,m.size()-1); + for(Index k=0; k(0,m.rows()-1); + Index j = internal::random(0,m.cols()-1); + m(j,i) = m(i,j) = samples(internal::random(0,samples.size()-1)); + if(NumTraits::IsComplex) + *(&numext::real_ref(m(j,i))+1) = *(&numext::real_ref(m(i,j))+1) = samples.real()(internal::random(0,samples.size()-1)); + } + } + } + } + else + { + m = U * d.asDiagonal() * VT; + // (partly) cancel some coeffs + if(!(dup && unit_uv)) + { + Index n = internal::random(0,m.size()-1); + for(Index k=0; k(0,m.rows()-1); + Index j = internal::random(0,m.cols()-1); + m(i,j) = samples(internal::random(0,samples.size()-1)); + if(NumTraits::IsComplex) + *(&numext::real_ref(m(i,j))+1) = samples.real()(internal::random(0,samples.size()-1)); + } + } + } +} + diff --git a/external/eigen3/test/swap.cpp b/external/eigen3/test/swap.cpp index 36b3531..f76e362 100644 --- a/external/eigen3/test/swap.cpp +++ b/external/eigen3/test/swap.cpp @@ -41,9 +41,15 @@ template void swap(const MatrixType& m) OtherMatrixType m3_copy = m3; // test swapping 2 matrices of same type + Scalar *d1=m1.data(), *d2=m2.data(); m1.swap(m2); VERIFY_IS_APPROX(m1,m2_copy); VERIFY_IS_APPROX(m2,m1_copy); + if(MatrixType::SizeAtCompileTime==Dynamic) + { + VERIFY(m1.data()==d2); + VERIFY(m2.data()==d1); + } m1 = m1_copy; m2 = m2_copy; @@ -68,16 +74,21 @@ template void swap(const MatrixType& m) m1 = m1_copy; m3 = m3_copy; - // test assertion on mismatching size -- matrix case - VERIFY_RAISES_ASSERT(m1.swap(m1.row(0))); - // test assertion on mismatching size -- xpr case - VERIFY_RAISES_ASSERT(m1.row(0).swap(m1)); + if(m1.rows()>1) + { + // test assertion on mismatching size -- matrix case + VERIFY_RAISES_ASSERT(m1.swap(m1.row(0))); + // test assertion on mismatching size -- xpr case + VERIFY_RAISES_ASSERT(m1.row(0).swap(m1)); + } } void test_swap() { + int s = internal::random(1,EIGEN_TEST_MAX_SIZE); CALL_SUBTEST_1( swap(Matrix3f()) ); // fixed size, no vectorization CALL_SUBTEST_2( swap(Matrix4d()) ); // fixed size, possible vectorization - CALL_SUBTEST_3( swap(MatrixXd(3,3)) ); // dyn size, no vectorization - CALL_SUBTEST_4( swap(MatrixXf(30,30)) ); // dyn size, possible vectorization + CALL_SUBTEST_3( swap(MatrixXd(s,s)) ); // dyn size, no vectorization + CALL_SUBTEST_4( swap(MatrixXf(s,s)) ); // dyn size, possible vectorization + TEST_SET_BUT_UNUSED_VARIABLE(s) } diff --git a/external/eigen3/test/testsuite.cmake b/external/eigen3/test/testsuite.cmake deleted file mode 100644 index 3bec56b..0000000 --- a/external/eigen3/test/testsuite.cmake +++ /dev/null @@ -1,229 +0,0 @@ - -#################################################################### -# -# Usage: -# - create a new folder, let's call it cdash -# - in that folder, do: -# ctest -S path/to/eigen/test/testsuite.cmake[,option1=value1[,option2=value2]] -# -# Options: -# - EIGEN_CXX: compiler, eg.: g++-4.2 -# default: default c++ compiler -# - EIGEN_SITE: eg, INRIA-Bdx_pc-gael, or the name of the contributor, etc. -# default: hostname -# - EIGEN_BUILD_STRING: a string which identify the system/compiler. It should be formed like that: -# --- -# with: -# = opensuse, debian, osx, windows, cygwin, freebsd, solaris, etc. -# = 11.1, XP, vista, leopard, etc. -# = i386, x86_64, ia64, powerpc, etc. -# = gcc-4.3.2, icc-11.0, MSVC-2008, etc. -# - EIGEN_EXPLICIT_VECTORIZATION: novec, SSE2, Altivec -# default: SSE2 for x86_64 systems, novec otherwise -# Its value is automatically appended to EIGEN_BUILD_STRING -# - EIGEN_CMAKE_DIR: path to cmake executable -# - EIGEN_MODE: dashboard model, can be Experimental, Nightly, or Continuous -# default: Nightly -# - EIGEN_WORK_DIR: directory used to download the source files and make the builds -# default: folder which contains this script -# - EIGEN_CMAKE_ARGS: additional arguments passed to cmake -# - EIGEN_GENERATOR_TYPE: allows to overwrite the generator type -# default: nmake (windows -# See http://www.cmake.org/cmake/help/cmake2.6docs.html#section_Generators for a complete -# list of supported generators. -# - EIGEN_NO_UPDATE: allows to submit dash boards from local repositories -# This might be interesting in case you want to submit dashboards -# including local changes. -# - CTEST_SOURCE_DIRECTORY: path to eigen's src (use a new and empty folder, not the one you are working on) -# default: /src -# - CTEST_BINARY_DIRECTORY: build directory -# default: /nightly- -# -# Here is an example running several compilers on a linux system: -# #!/bin/bash -# ARCH=`uname -m` -# SITE=`hostname` -# VERSION=opensuse-11.1 -# WORK_DIR=/home/gael/Coding/eigen/cdash -# # get the last version of the script -# wget http://bitbucket.org/eigen/eigen/raw/tip/test/testsuite.cmake -o $WORK_DIR/testsuite.cmake -# COMMON="ctest -S $WORK_DIR/testsuite.cmake,EIGEN_WORK_DIR=$WORK_DIR,EIGEN_SITE=$SITE,EIGEN_MODE=$1,EIGEN_BUILD_STRING=$OS_VERSION-$ARCH" -# $COMMON-gcc-3.4.6,EIGEN_CXX=g++-3.4 -# $COMMON-gcc-4.0.1,EIGEN_CXX=g++-4.0.1 -# $COMMON-gcc-4.3.2,EIGEN_CXX=g++-4.3,EIGEN_EXPLICIT_VECTORIZATION=novec -# $COMMON-gcc-4.3.2,EIGEN_CXX=g++-4.3,EIGEN_EXPLICIT_VECTORIZATION=SSE2 -# $COMMON-icc-11.0,EIGEN_CXX=icpc -# -#################################################################### - -# process the arguments - -set(ARGLIST ${CTEST_SCRIPT_ARG}) -while(${ARGLIST} MATCHES ".+.*") - - # pick first - string(REGEX MATCH "([^,]*)(,.*)?" DUMMY ${ARGLIST}) - SET(TOP ${CMAKE_MATCH_1}) - - # remove first - string(REGEX MATCHALL "[^,]*,(.*)" DUMMY ${ARGLIST}) - SET(ARGLIST ${CMAKE_MATCH_1}) - - # decompose as a pair key=value - string(REGEX MATCH "([^=]*)(=.*)?" DUMMY ${TOP}) - SET(KEY ${CMAKE_MATCH_1}) - - string(REGEX MATCH "[^=]*=(.*)" DUMMY ${TOP}) - SET(VALUE ${CMAKE_MATCH_1}) - - # set the variable to the specified value - if(VALUE) - SET(${KEY} ${VALUE}) - else(VALUE) - SET(${KEY} ON) - endif(VALUE) - -endwhile(${ARGLIST} MATCHES ".+.*") - -#################################################################### -# Automatically set some user variables if they have not been defined manually -#################################################################### -cmake_minimum_required(VERSION 2.6 FATAL_ERROR) - -if(NOT EIGEN_SITE) - site_name(EIGEN_SITE) -endif(NOT EIGEN_SITE) - -if(NOT EIGEN_CMAKE_DIR) - SET(EIGEN_CMAKE_DIR "") -endif(NOT EIGEN_CMAKE_DIR) - -if(NOT EIGEN_BUILD_STRING) - - # let's try to find all information we need to make the build string ourself - - # OS - build_name(EIGEN_OS_VERSION) - - # arch - set(EIGEN_ARCH ${CMAKE_SYSTEM_PROCESSOR}) - if(WIN32) - set(EIGEN_ARCH $ENV{PROCESSOR_ARCHITECTURE}) - else(WIN32) - execute_process(COMMAND uname -m OUTPUT_VARIABLE EIGEN_ARCH OUTPUT_STRIP_TRAILING_WHITESPACE) - endif(WIN32) - - set(EIGEN_BUILD_STRING ${EIGEN_OS_VERSION}${EIGEN_ARCH}-${EIGEN_CXX}) - -endif(NOT EIGEN_BUILD_STRING) - -if(DEFINED EIGEN_EXPLICIT_VECTORIZATION) - set(EIGEN_BUILD_STRING ${EIGEN_BUILD_STRING}-${EIGEN_EXPLICIT_VECTORIZATION}) -endif(DEFINED EIGEN_EXPLICIT_VECTORIZATION) - -if(NOT EIGEN_WORK_DIR) - set(EIGEN_WORK_DIR ${CTEST_SCRIPT_DIRECTORY}) -endif(NOT EIGEN_WORK_DIR) - -if(NOT CTEST_SOURCE_DIRECTORY) - SET (CTEST_SOURCE_DIRECTORY "${EIGEN_WORK_DIR}/src") -endif(NOT CTEST_SOURCE_DIRECTORY) - -if(NOT CTEST_BINARY_DIRECTORY) - SET (CTEST_BINARY_DIRECTORY "${EIGEN_WORK_DIR}/nightly_${EIGEN_CXX}") -endif(NOT CTEST_BINARY_DIRECTORY) - -if(NOT EIGEN_MODE) - set(EIGEN_MODE Nightly) -endif(NOT EIGEN_MODE) - -## mandatory variables (the default should be ok in most cases): - -if(NOT EIGEN_NO_UPDATE) - SET (CTEST_CVS_COMMAND "hg") - SET (CTEST_CVS_CHECKOUT "${CTEST_CVS_COMMAND} clone http://bitbucket.org/eigen/eigen \"${CTEST_SOURCE_DIRECTORY}\"") - SET(CTEST_BACKUP_AND_RESTORE TRUE) # the backup is CVS related ... -endif(NOT EIGEN_NO_UPDATE) - -# which ctest command to use for running the dashboard -SET (CTEST_COMMAND "${EIGEN_CMAKE_DIR}ctest -D ${EIGEN_MODE} --no-compress-output") -if($ENV{EIGEN_CTEST_ARGS}) -SET (CTEST_COMMAND "${CTEST_COMMAND} $ENV{EIGEN_CTEST_ARGS}") -endif($ENV{EIGEN_CTEST_ARGS}) -# what cmake command to use for configuring this dashboard -SET (CTEST_CMAKE_COMMAND "${EIGEN_CMAKE_DIR}cmake -DEIGEN_LEAVE_TEST_IN_ALL_TARGET=ON") - -#################################################################### -# The values in this section are optional you can either -# have them or leave them commented out -#################################################################### - -# this make sure we get consistent outputs -SET($ENV{LC_MESSAGES} "en_EN") - -# should ctest wipe the binary tree before running -SET(CTEST_START_WITH_EMPTY_BINARY_DIRECTORY TRUE) - -# raise the warning/error limit -set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "33331") -set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "33331") - -# this is the initial cache to use for the binary tree, be careful to escape -# any quotes inside of this string if you use it -if(WIN32 AND NOT UNIX) - #message(SEND_ERROR "win32") - if(EIGEN_GENERATOR_TYPE) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -G \"${EIGEN_GENERATOR_TYPE}\"") - SET (CTEST_INITIAL_CACHE " - CMAKE_BUILD_TYPE:STRING=Release - BUILDNAME:STRING=${EIGEN_BUILD_STRING} - SITE:STRING=${EIGEN_SITE} - ") - else(EIGEN_GENERATOR_TYPE) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -G \"NMake Makefiles\" -DCMAKE_MAKE_PROGRAM=nmake") - SET (CTEST_INITIAL_CACHE " - MAKECOMMAND:STRING=nmake /i - CMAKE_MAKE_PROGRAM:FILEPATH=nmake - CMAKE_GENERATOR:INTERNAL=NMake Makefiles - CMAKE_BUILD_TYPE:STRING=Release - BUILDNAME:STRING=${EIGEN_BUILD_STRING} - SITE:STRING=${EIGEN_SITE} - ") - endif(EIGEN_GENERATOR_TYPE) -else(WIN32 AND NOT UNIX) - SET (CTEST_INITIAL_CACHE " - BUILDNAME:STRING=${EIGEN_BUILD_STRING} - SITE:STRING=${EIGEN_SITE} - ") -endif(WIN32 AND NOT UNIX) - -# set any extra environment variables to use during the execution of the script here: -# setting this variable on windows machines causes trouble ... - -if(EIGEN_CXX AND NOT WIN32) - set(CTEST_ENVIRONMENT "CXX=${EIGEN_CXX}") -endif(EIGEN_CXX AND NOT WIN32) - -if(DEFINED EIGEN_EXPLICIT_VECTORIZATION) - if(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE2) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_SSE2=ON") - elseif(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE3) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_SSE2=ON -DEIGEN_TEST_SSE3=ON") - elseif(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSSE3) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_SSE2=ON -DEIGEN_TEST_SSE3=ON -DEIGEN_TEST_SSSE3=ON") - elseif(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE4_1) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_SSE2=ON -DEIGEN_TEST_SSE3=ON -DEIGEN_TEST_SSSE3=ON -DEIGEN_TEST_SSE4_1=ON") - elseif(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE4_2) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_SSE2=ON -DEIGEN_TEST_SSE3=ON -DEIGEN_TEST_SSSE3=ON -DEIGEN_TEST_SSE4_1=ON -DEIGEN_TEST_SSE4_2=ON") - elseif(EIGEN_EXPLICIT_VECTORIZATION MATCHES Altivec) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_ALTIVEC=ON") - elseif(EIGEN_EXPLICIT_VECTORIZATION MATCHES novec) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} -DEIGEN_TEST_NO_EXPLICIT_VECTORIZATION=ON") - else(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE2) - message(FATAL_ERROR "Invalid value for EIGEN_EXPLICIT_VECTORIZATION (${EIGEN_EXPLICIT_VECTORIZATION}), must be: novec, SSE2, SSE3, Altivec") - endif(EIGEN_EXPLICIT_VECTORIZATION MATCHES SSE2) -endif(DEFINED EIGEN_EXPLICIT_VECTORIZATION) - -if(DEFINED EIGEN_CMAKE_ARGS) - set(CTEST_CMAKE_COMMAND "${CTEST_CMAKE_COMMAND} ${EIGEN_CMAKE_ARGS}") -endif(DEFINED EIGEN_CMAKE_ARGS) diff --git a/external/eigen3/test/triangular.cpp b/external/eigen3/test/triangular.cpp index 5432039..b968564 100644 --- a/external/eigen3/test/triangular.cpp +++ b/external/eigen3/test/triangular.cpp @@ -65,7 +65,7 @@ template void triangular_square(const MatrixType& m) m1 = MatrixType::Random(rows, cols); for (int i=0; i(); + while (numext::abs2(m1(i,i))(); Transpose trm4(m4); // test back and forward subsitution with a vector as the rhs @@ -78,7 +78,7 @@ template void triangular_square(const MatrixType& m) m3 = m1.template triangularView(); VERIFY(v2.isApprox(m3.conjugate() * (m1.conjugate().template triangularView().solve(v2)), largerEps)); - // test back and forward subsitution with a matrix as the rhs + // test back and forward substitution with a matrix as the rhs m3 = m1.template triangularView(); VERIFY(m2.isApprox(m3.adjoint() * (m1.adjoint().template triangularView().solve(m2)), largerEps)); m3 = m1.template triangularView(); @@ -113,6 +113,21 @@ template void triangular_square(const MatrixType& m) m3.setZero(); m3.template triangularView().setOnes(); VERIFY_IS_APPROX(m2,m3); + + m1.setRandom(); + m3 = m1.template triangularView(); + Matrix m5(cols, internal::random(1,20)); m5.setRandom(); + Matrix m6(internal::random(1,20), rows); m6.setRandom(); + VERIFY_IS_APPROX(m1.template triangularView() * m5, m3*m5); + VERIFY_IS_APPROX(m6*m1.template triangularView(), m6*m3); + + m1up = m1.template triangularView(); + VERIFY_IS_APPROX(m1.template selfadjointView().template triangularView().toDenseMatrix(), m1up); + VERIFY_IS_APPROX(m1up.template selfadjointView().template triangularView().toDenseMatrix(), m1up); + VERIFY_IS_APPROX(m1.template selfadjointView().template triangularView().toDenseMatrix(), m1up.adjoint()); + VERIFY_IS_APPROX(m1up.template selfadjointView().template triangularView().toDenseMatrix(), m1up.adjoint()); + + VERIFY_IS_APPROX(m1.template selfadjointView().diagonal(), m1.diagonal()); } diff --git a/external/eigen3/test/umfpack_support.cpp b/external/eigen3/test/umfpack_support.cpp index 9eb84c1..37ab11f 100644 --- a/external/eigen3/test/umfpack_support.cpp +++ b/external/eigen3/test/umfpack_support.cpp @@ -7,6 +7,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define EIGEN_NO_DEBUG_SMALL_PRODUCT_BLOCKS #include "sparse_solver.h" #include diff --git a/external/eigen3/test/unalignedassert.cpp b/external/eigen3/test/unalignedassert.cpp index 601dbf2..731a089 100644 --- a/external/eigen3/test/unalignedassert.cpp +++ b/external/eigen3/test/unalignedassert.cpp @@ -2,13 +2,39 @@ // for linear algebra. // // Copyright (C) 2008 Benoit Jacob +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#if defined(EIGEN_TEST_PART_1) + // default +#elif defined(EIGEN_TEST_PART_2) + #define EIGEN_MAX_STATIC_ALIGN_BYTES 16 + #define EIGEN_MAX_ALIGN_BYTES 16 +#elif defined(EIGEN_TEST_PART_3) + #define EIGEN_MAX_STATIC_ALIGN_BYTES 32 + #define EIGEN_MAX_ALIGN_BYTES 32 +#elif defined(EIGEN_TEST_PART_4) + #define EIGEN_MAX_STATIC_ALIGN_BYTES 64 + #define EIGEN_MAX_ALIGN_BYTES 64 +#endif + #include "main.h" +typedef Matrix Vector6f; +typedef Matrix Vector8f; +typedef Matrix Vector12f; + +typedef Matrix Vector5d; +typedef Matrix Vector6d; +typedef Matrix Vector7d; +typedef Matrix Vector8d; +typedef Matrix Vector9d; +typedef Matrix Vector10d; +typedef Matrix Vector12d; + struct TestNew1 { MatrixXd m; // good: m will allocate its own array, taking care of alignment. @@ -36,7 +62,7 @@ struct TestNew4 struct TestNew5 { EIGEN_MAKE_ALIGNED_OPERATOR_NEW - float f; // try the f at first -- the EIGEN_ALIGN16 attribute of m should make that still work + float f; // try the f at first -- the EIGEN_ALIGN_MAX attribute of m should make that still work Matrix4f m; }; @@ -63,13 +89,13 @@ void check_unalignedassert_good() delete[] y; } -#if EIGEN_ALIGN_STATICALLY +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 template void construct_at_boundary(int boundary) { char buf[sizeof(T)+256]; - size_t _buf = reinterpret_cast(buf); - _buf += (16 - (_buf % 16)); // make 16-byte aligned + size_t _buf = reinterpret_cast(buf); + _buf += (EIGEN_MAX_ALIGN_BYTES - (_buf % EIGEN_MAX_ALIGN_BYTES)); // make 16/32/...-byte aligned _buf += boundary; // make exact boundary-aligned T *x = ::new(reinterpret_cast(_buf)) T; x[0].setZero(); // just in order to silence warnings @@ -79,26 +105,36 @@ void construct_at_boundary(int boundary) void unalignedassert() { - #if EIGEN_ALIGN_STATICALLY +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 construct_at_boundary(4); construct_at_boundary(4); construct_at_boundary(16); + construct_at_boundary(4); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(16); construct_at_boundary(16); construct_at_boundary(4); - construct_at_boundary(16); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); construct_at_boundary(16); construct_at_boundary(4); - construct_at_boundary(16); - construct_at_boundary(16); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(4); + construct_at_boundary(16); + construct_at_boundary(4); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(4); + construct_at_boundary(16); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); construct_at_boundary(4); - construct_at_boundary(16); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); construct_at_boundary(16); construct_at_boundary(4); - construct_at_boundary(16); + construct_at_boundary(EIGEN_MAX_ALIGN_BYTES); construct_at_boundary(16); - #endif +#endif check_unalignedassert_good(); check_unalignedassert_good(); @@ -109,15 +145,32 @@ void unalignedassert() check_unalignedassert_good(); check_unalignedassert_good >(); -#if EIGEN_ALIGN_STATICALLY - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); - VERIFY_RAISES_ASSERT(construct_at_boundary(8)); +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 + if(EIGEN_MAX_ALIGN_BYTES>=16) + { + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + // Complexes are disabled because the compiler might aggressively vectorize + // the initialization of complex coeffs to 0 before we can check for alignedness + //VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + VERIFY_RAISES_ASSERT(construct_at_boundary(8)); + } + for(int b=8; b(b)); + if(b<64) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + if(b<128) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + //if(b<32) VERIFY_RAISES_ASSERT(construct_at_boundary(b)); + } #endif } diff --git a/external/eigen3/test/unalignedcount.cpp b/external/eigen3/test/unalignedcount.cpp index ca7e159..d6ffeaf 100644 --- a/external/eigen3/test/unalignedcount.cpp +++ b/external/eigen3/test/unalignedcount.cpp @@ -30,7 +30,14 @@ static int nb_storeu; void test_unalignedcount() { - #ifdef EIGEN_VECTORIZE_SSE + #if defined(EIGEN_VECTORIZE_AVX) + VectorXf a(40), b(40); + VERIFY_ALIGNED_UNALIGNED_COUNT(a += b, 10, 0, 5, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) += b.segment(0,40), 5, 5, 5, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) -= b.segment(0,40), 5, 5, 5, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) *= 3.5, 5, 0, 5, 0); + VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) /= 3.5, 5, 0, 5, 0); + #elif defined(EIGEN_VECTORIZE_SSE) VectorXf a(40), b(40); VERIFY_ALIGNED_UNALIGNED_COUNT(a += b, 20, 0, 10, 0); VERIFY_ALIGNED_UNALIGNED_COUNT(a.segment(0,40) += b.segment(0,40), 10, 10, 10, 0); diff --git a/external/eigen3/test/upperbidiagonalization.cpp b/external/eigen3/test/upperbidiagonalization.cpp index d15bf58..847b34b 100644 --- a/external/eigen3/test/upperbidiagonalization.cpp +++ b/external/eigen3/test/upperbidiagonalization.cpp @@ -35,7 +35,7 @@ void test_upperbidiagonalization() CALL_SUBTEST_1( upperbidiag(MatrixXf(3,3)) ); CALL_SUBTEST_2( upperbidiag(MatrixXd(17,12)) ); CALL_SUBTEST_3( upperbidiag(MatrixXcf(20,20)) ); - CALL_SUBTEST_4( upperbidiag(MatrixXcd(16,15)) ); + CALL_SUBTEST_4( upperbidiag(Matrix,Dynamic,Dynamic,RowMajor>(16,15)) ); CALL_SUBTEST_5( upperbidiag(Matrix()) ); CALL_SUBTEST_6( upperbidiag(Matrix()) ); CALL_SUBTEST_7( upperbidiag(Matrix()) ); diff --git a/external/eigen3/test/vectorization_logic.cpp b/external/eigen3/test/vectorization_logic.cpp index aee68a8..83c1439 100644 --- a/external/eigen3/test/vectorization_logic.cpp +++ b/external/eigen3/test/vectorization_logic.cpp @@ -1,45 +1,51 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifdef EIGEN_TEST_PART_1 +#define EIGEN_UNALIGNED_VECTORIZE 1 +#endif + +#ifdef EIGEN_TEST_PART_2 +#define EIGEN_UNALIGNED_VECTORIZE 0 +#endif + +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#undef EIGEN_DEFAULT_TO_ROW_MAJOR +#endif #define EIGEN_DEBUG_ASSIGN #include "main.h" #include -std::string demangle_traversal(int t) -{ - if(t==DefaultTraversal) return "DefaultTraversal"; - if(t==LinearTraversal) return "LinearTraversal"; - if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; - if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; - if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; - return "?"; -} -std::string demangle_unrolling(int t) -{ - if(t==NoUnrolling) return "NoUnrolling"; - if(t==InnerUnrolling) return "InnerUnrolling"; - if(t==CompleteUnrolling) return "CompleteUnrolling"; - return "?"; -} +using internal::demangle_flags; +using internal::demangle_traversal; +using internal::demangle_unrolling; template bool test_assign(const Dst&, const Src&, int traversal, int unrolling) { - internal::assign_traits::debug(); - bool res = internal::assign_traits::Traversal==traversal - && internal::assign_traits::Unrolling==unrolling; + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + bool res = traits::Traversal==traversal; + if(unrolling==InnerUnrolling+CompleteUnrolling) + res = res && (int(traits::Unrolling)==InnerUnrolling || int(traits::Unrolling)==CompleteUnrolling); + else + res = res && int(traits::Unrolling)==unrolling; if(!res) { + std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl; + std::cerr << " " << demangle_flags(internal::evaluator::Flags) << std::endl; + std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl; + std::cerr << " " << demangle_flags(internal::evaluator::Flags) << std::endl; + traits::debug(); std::cerr << " Expected Traversal == " << demangle_traversal(traversal) - << " got " << demangle_traversal(internal::assign_traits::Traversal) << "\n"; + << " got " << demangle_traversal(traits::Traversal) << "\n"; std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling) - << " got " << demangle_unrolling(internal::assign_traits::Unrolling) << "\n"; + << " got " << demangle_unrolling(traits::Unrolling) << "\n"; } return res; } @@ -47,15 +53,19 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling) template bool test_assign(int traversal, int unrolling) { - internal::assign_traits::debug(); - bool res = internal::assign_traits::Traversal==traversal - && internal::assign_traits::Unrolling==unrolling; + typedef internal::copy_using_evaluator_traits,internal::evaluator, internal::assign_op > traits; + bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; if(!res) { + std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl; + std::cerr << " " << demangle_flags(internal::evaluator::Flags) << std::endl; + std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl; + std::cerr << " " << demangle_flags(internal::evaluator::Flags) << std::endl; + traits::debug(); std::cerr << " Expected Traversal == " << demangle_traversal(traversal) - << " got " << demangle_traversal(internal::assign_traits::Traversal) << "\n"; + << " got " << demangle_traversal(traits::Traversal) << "\n"; std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling) - << " got " << demangle_unrolling(internal::assign_traits::Unrolling) << "\n"; + << " got " << demangle_unrolling(traits::Unrolling) << "\n"; } return res; } @@ -63,10 +73,16 @@ bool test_assign(int traversal, int unrolling) template bool test_redux(const Xpr&, int traversal, int unrolling) { - typedef internal::redux_traits,Xpr> traits; + typedef typename Xpr::Scalar Scalar; + typedef internal::redux_traits,internal::redux_evaluator > traits; + bool res = traits::Traversal==traversal && traits::Unrolling==unrolling; if(!res) { + std::cerr << demangle_flags(Xpr::Flags) << std::endl; + std::cerr << demangle_flags(internal::evaluator::Flags) << std::endl; + traits::debug(); + std::cerr << " Expected Traversal == " << demangle_traversal(traversal) << " got " << demangle_traversal(traits::Traversal) << "\n"; std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling) @@ -75,10 +91,16 @@ bool test_redux(const Xpr&, int traversal, int unrolling) return res; } -template::Vectorizable> struct vectorization_logic +template::Vectorizable> +struct vectorization_logic { + typedef internal::packet_traits PacketTraits; + + typedef typename internal::packet_traits::type PacketType; + typedef typename internal::unpacket_traits::half HalfPacketType; enum { - PacketSize = internal::packet_traits::size + PacketSize = internal::unpacket_traits::size, + HalfPacketSize = internal::unpacket_traits::size }; static void run() { @@ -90,8 +112,8 @@ template::Vectori typedef Matrix Matrix22; typedef Matrix Matrix44; typedef Matrix Matrix44u; - typedef Matrix Matrix44c; - typedef Matrix Matrix44r; + typedef Matrix Matrix44c; + typedef Matrix Matrix44r; typedef Matrix::Vectori InnerVectorizedTraversal,InnerUnrolling)); VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(), - LinearTraversal,NoUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal, + EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling)); + + VERIFY(test_assign(Matrix1(),Matrix1()+Matrix1(), + (Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal, + CompleteUnrolling)); VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) + : LinearTraversal, CompleteUnrolling)); VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3), InnerVectorizedTraversal,CompleteUnrolling)); - + VERIFY(test_assign(Matrix44r().row(2),Matrix44r().row(1)+Matrix44r().row(1), InnerVectorizedTraversal,CompleteUnrolling)); - + if(PacketSize>1) { typedef Matrix Matrix33c; + typedef Matrix Vector3; VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1), LinearTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector3(),Vector3()+Vector3(), + EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal), CompleteUnrolling)); VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1), - LinearTraversal,CompleteUnrolling)); - - VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()), + EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? SliceVectorizedTraversal : LinearTraversal), + ((!EIGEN_UNALIGNED_VECTORIZE) && HalfPacketSize==1) ? NoUnrolling : CompleteUnrolling)); + + VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()), LinearVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - LinearTraversal,NoUnrolling)); + HalfPacketSize==1 ? InnerVectorizedTraversal : + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : + LinearTraversal, + NoUnrolling)); - VERIFY(test_assign(Matrix11(),Matrix().template block(2,3)+Matrix().template block(10,4), - DefaultTraversal,CompleteUnrolling)); + VERIFY(test_assign(Matrix11(), Matrix11()+Matrix11(),InnerVectorizedTraversal,CompleteUnrolling)); + + + VERIFY(test_assign(Matrix11(),Matrix().template block(2,3)+Matrix().template block(8,4), + (EIGEN_UNALIGNED_VECTORIZE) ? InnerVectorizedTraversal : DefaultTraversal, CompleteUnrolling|InnerUnrolling)); + + VERIFY(test_assign(Vector1(),Matrix11()*Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix11(),Matrix11().lazyProduct(Matrix11()), + InnerVectorizedTraversal,InnerUnrolling+CompleteUnrolling)); } - + + VERIFY(test_redux(Vector1(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix(), + LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_redux(Matrix3(), LinearVectorizedTraversal,CompleteUnrolling)); @@ -174,18 +224,19 @@ template::Vectori VERIFY(test_redux(Matrix44r().template block<1,2*PacketSize>(2,1), LinearVectorizedTraversal,CompleteUnrolling)); - + VERIFY((test_assign< - Map >, + Map >, Matrix22 >(InnerVectorizedTraversal,CompleteUnrolling))); VERIFY((test_assign< - Map >, - Matrix22 - >(DefaultTraversal,CompleteUnrolling))); + Map, AlignedMax, InnerStride<3*PacketSize> >, + Matrix + >(DefaultTraversal,PacketSize>=8?InnerUnrolling:CompleteUnrolling))); - VERIFY((test_assign(Matrix11(), Matrix11()*Matrix11(), InnerVectorizedTraversal, CompleteUnrolling))); + VERIFY((test_assign(Matrix11(), Matrix()*Matrix(), + InnerVectorizedTraversal, CompleteUnrolling))); #endif VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3), @@ -193,12 +244,138 @@ template::Vectori VERIFY(test_redux(VectorX(10), LinearVectorizedTraversal,NoUnrolling)); + } +}; + +template struct vectorization_logic +{ + static void run() {} +}; + +template::type>::half, + typename internal::packet_traits::type>::value > +struct vectorization_logic_half +{ + typedef internal::packet_traits PacketTraits; + typedef typename internal::unpacket_traits::type>::half PacketType; + enum { + PacketSize = internal::unpacket_traits::size + }; + static void run() + { + + typedef Matrix Vector1; + typedef Matrix Matrix11; + typedef Matrix Matrix57; + typedef Matrix Matrix35; + typedef Matrix Matrix57u; +// typedef Matrix Matrix44; +// typedef Matrix Matrix44u; +// typedef Matrix Matrix44c; +// typedef Matrix Matrix44r; + + typedef Matrix Matrix1; + + typedef Matrix Matrix1u; + // this type is made such that it can only be vectorized when viewed as a linear 1D vector + typedef Matrix Matrix3; + #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT + VERIFY(test_assign(Vector1(),Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1()+Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().template segment(0).derived(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Scalar(2.1)*Vector1()-Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),(Scalar(2.1)*Vector1().template segment(0)-Vector1().template segment(0)).derived(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().template cast(), + InnerVectorizedTraversal,CompleteUnrolling)); + + + VERIFY(test_assign(Vector1(),Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1()+Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()), + InnerVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix57(),Matrix57()+Matrix57(), + InnerVectorizedTraversal,InnerUnrolling)); + + VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal, + EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling)); + + VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(), + EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling)); + + if(PacketSize>1) + { + typedef Matrix Matrix33c; + VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1), + LinearTraversal,CompleteUnrolling)); + VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1), + EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()), + PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix(),Matrix()+Matrix(), + EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal, + NoUnrolling)); + + VERIFY(test_assign(Matrix11(),Matrix().template block(2,3)+Matrix().template block(8,4), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling)); + + VERIFY(test_assign(Vector1(),Matrix11()*Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_assign(Matrix11(),Matrix11().lazyProduct(Matrix11()), + InnerVectorizedTraversal,InnerUnrolling+CompleteUnrolling)); + } + + VERIFY(test_redux(Vector1(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix3(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix35(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux(Matrix57().template block(1,0), + DefaultTraversal,CompleteUnrolling)); + + VERIFY((test_assign< + Map, AlignedMax, InnerStride<3*PacketSize> >, + Matrix + >(DefaultTraversal,CompleteUnrolling))); + + VERIFY((test_assign(Matrix57(), Matrix()*Matrix(), + InnerVectorizedTraversal, InnerUnrolling|CompleteUnrolling))); + #endif } }; -template struct vectorization_logic +template struct vectorization_logic_half { static void run() {} }; @@ -208,27 +385,34 @@ void test_vectorization_logic() #ifdef EIGEN_VECTORIZE + CALL_SUBTEST( vectorization_logic::run() ); CALL_SUBTEST( vectorization_logic::run() ); CALL_SUBTEST( vectorization_logic::run() ); CALL_SUBTEST( vectorization_logic >::run() ); CALL_SUBTEST( vectorization_logic >::run() ); + CALL_SUBTEST( vectorization_logic_half::run() ); + CALL_SUBTEST( vectorization_logic_half::run() ); + CALL_SUBTEST( vectorization_logic_half::run() ); + CALL_SUBTEST( vectorization_logic_half >::run() ); + CALL_SUBTEST( vectorization_logic_half >::run() ); + if(internal::packet_traits::Vectorizable) { VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix(), - DefaultTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); } if(internal::packet_traits::Vectorizable) { VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix(), - DefaultTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); } #endif // EIGEN_VECTORIZE diff --git a/external/eigen3/test/vectorwiseop.cpp b/external/eigen3/test/vectorwiseop.cpp index d32fd10..f3ab561 100644 --- a/external/eigen3/test/vectorwiseop.cpp +++ b/external/eigen3/test/vectorwiseop.cpp @@ -2,11 +2,13 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2015 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_ENABLE_TEMPORARY_TRACKING #define EIGEN_NO_STATIC_ASSERT #include "main.h" @@ -101,11 +103,11 @@ template void vectorwiseop_array(const ArrayType& m) VERIFY_RAISES_ASSERT(m2.rowwise() /= rowvec.transpose()); VERIFY_RAISES_ASSERT(m1.rowwise() / rowvec.transpose()); - + m2 = m1; // yes, there might be an aliasing issue there but ".rowwise() /=" - // is suppposed to evaluate " m2.colwise().sum()" into to temporary to avoid - // evaluating the reducions multiple times + // is supposed to evaluate " m2.colwise().sum()" into a temporary to avoid + // evaluating the reduction multiple times if(ArrayType::RowsAtCompileTime>2 || ArrayType::RowsAtCompileTime==Dynamic) { m2.rowwise() /= m2.colwise().sum(); @@ -156,16 +158,22 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX(m2, m1.colwise() + colvec); VERIFY_IS_APPROX(m2.col(c), m1.col(c) + colvec); - VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); - VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() += colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() + colvec.transpose()); + } m2 = m1; m2.rowwise() += rowvec; VERIFY_IS_APPROX(m2, m1.rowwise() + rowvec); VERIFY_IS_APPROX(m2.row(r), m1.row(r) + rowvec); - VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); - VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() += rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() + rowvec.transpose()); + } // test substraction @@ -174,29 +182,43 @@ template void vectorwiseop_matrix(const MatrixType& m) VERIFY_IS_APPROX(m2, m1.colwise() - colvec); VERIFY_IS_APPROX(m2.col(c), m1.col(c) - colvec); - VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); - VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + if(rows>1) + { + VERIFY_RAISES_ASSERT(m2.colwise() -= colvec.transpose()); + VERIFY_RAISES_ASSERT(m1.colwise() - colvec.transpose()); + } m2 = m1; m2.rowwise() -= rowvec; VERIFY_IS_APPROX(m2, m1.rowwise() - rowvec); VERIFY_IS_APPROX(m2.row(r), m1.row(r) - rowvec); - VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); - VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); - + if(cols>1) + { + VERIFY_RAISES_ASSERT(m2.rowwise() -= rowvec.transpose()); + VERIFY_RAISES_ASSERT(m1.rowwise() - rowvec.transpose()); + } + // test norm rrres = m1.colwise().norm(); VERIFY_IS_APPROX(rrres(c), m1.col(c).norm()); rcres = m1.rowwise().norm(); VERIFY_IS_APPROX(rcres(r), m1.row(r).norm()); - + + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().sum(), m1.colwise().template lpNorm<1>()); + VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().sum(), m1.rowwise().template lpNorm<1>()); + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().maxCoeff(), m1.colwise().template lpNorm()); + VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().maxCoeff(), m1.rowwise().template lpNorm()); + + // regression for bug 1158 + VERIFY_IS_APPROX(m1.cwiseAbs().colwise().sum().x(), m1.col(0).cwiseAbs().sum()); + // test normalized m2 = m1.colwise().normalized(); VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized()); m2 = m1.rowwise().normalized(); VERIFY_IS_APPROX(m2.row(r), m1.row(r).normalized()); - + // test normalize m2 = m1; m2.colwise().normalize(); @@ -204,14 +226,27 @@ template void vectorwiseop_matrix(const MatrixType& m) m2 = m1; m2.rowwise().normalize(); VERIFY_IS_APPROX(m2.row(r), m1.row(r).normalized()); + + // test with partial reduction of products + Matrix m1m1 = m1 * m1.transpose(); + VERIFY_IS_APPROX( (m1 * m1.transpose()).colwise().sum(), m1m1.colwise().sum()); + Matrix tmp(rows); + VERIFY_EVALUATION_COUNT( tmp = (m1 * m1.transpose()).colwise().sum(), 1); + + m2 = m1.rowwise() - (m1.colwise().sum()/RealScalar(m1.rows())).eval(); + m1 = m1.rowwise() - (m1.colwise().sum()/RealScalar(m1.rows())); + VERIFY_IS_APPROX( m1, m2 ); + VERIFY_EVALUATION_COUNT( m2 = (m1.rowwise() - m1.colwise().sum()/RealScalar(m1.rows())), (MatrixType::RowsAtCompileTime!=1 ? 1 : 0) ); } void test_vectorwiseop() { - CALL_SUBTEST_1(vectorwiseop_array(Array22cd())); - CALL_SUBTEST_2(vectorwiseop_array(Array())); - CALL_SUBTEST_3(vectorwiseop_array(ArrayXXf(3, 4))); - CALL_SUBTEST_4(vectorwiseop_matrix(Matrix4cf())); - CALL_SUBTEST_5(vectorwiseop_matrix(Matrix())); - CALL_SUBTEST_6(vectorwiseop_matrix(MatrixXd(7,2))); + CALL_SUBTEST_1( vectorwiseop_array(Array22cd()) ); + CALL_SUBTEST_2( vectorwiseop_array(Array()) ); + CALL_SUBTEST_3( vectorwiseop_array(ArrayXXf(3, 4)) ); + CALL_SUBTEST_4( vectorwiseop_matrix(Matrix4cf()) ); + CALL_SUBTEST_5( vectorwiseop_matrix(Matrix()) ); + CALL_SUBTEST_6( vectorwiseop_matrix(MatrixXd(internal::random(1,EIGEN_TEST_MAX_SIZE), internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(VectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_7( vectorwiseop_matrix(RowVectorXd(internal::random(1,EIGEN_TEST_MAX_SIZE))) ); } diff --git a/external/eigen3/test/zerosized.cpp b/external/eigen3/test/zerosized.cpp index da7dd04..477ff00 100644 --- a/external/eigen3/test/zerosized.cpp +++ b/external/eigen3/test/zerosized.cpp @@ -25,6 +25,7 @@ template void zeroReduction(const MatrixType& m) { template void zeroSizedMatrix() { MatrixType t1; + typedef typename MatrixType::Scalar Scalar; if (MatrixType::SizeAtCompileTime == Dynamic || MatrixType::SizeAtCompileTime == 0) { @@ -37,7 +38,7 @@ template void zeroSizedMatrix() if (MatrixType::RowsAtCompileTime == Dynamic && MatrixType::ColsAtCompileTime == Dynamic) { - MatrixType t2(0, 0); + MatrixType t2(0, 0), t3(t1); VERIFY(t2.rows() == 0); VERIFY(t2.cols() == 0); @@ -45,6 +46,23 @@ template void zeroSizedMatrix() VERIFY(t1==t2); } } + + if(MatrixType::MaxColsAtCompileTime!=0 && MatrixType::MaxRowsAtCompileTime!=0) + { + Index rows = MatrixType::RowsAtCompileTime==Dynamic ? internal::random(1,10) : Index(MatrixType::RowsAtCompileTime); + Index cols = MatrixType::ColsAtCompileTime==Dynamic ? internal::random(1,10) : Index(MatrixType::ColsAtCompileTime); + MatrixType m(rows,cols); + zeroReduction(m.template block<0,MatrixType::ColsAtCompileTime>(0,0,0,cols)); + zeroReduction(m.template block(0,0,rows,0)); + zeroReduction(m.template block<0,1>(0,0)); + zeroReduction(m.template block<1,0>(0,0)); + Matrix prod = m.template block(0,0,rows,0) * m.template block<0,MatrixType::ColsAtCompileTime>(0,0,0,cols); + VERIFY(prod.rows()==rows && prod.cols()==cols); + VERIFY(prod.isZero()); + prod = m.template block<1,0>(0,0) * m.template block<0,1>(0,0); + VERIFY(prod.size()==1); + VERIFY(prod.isZero()); + } } template void zeroSizedVector() diff --git a/external/eigen3/unsupported/Eigen/AdolcForward b/external/eigen3/unsupported/Eigen/AdolcForward index 2627dec..15f5f07 100644 --- a/external/eigen3/unsupported/Eigen/AdolcForward +++ b/external/eigen3/unsupported/Eigen/AdolcForward @@ -25,7 +25,7 @@ #ifndef NUMBER_DIRECTIONS # define NUMBER_DIRECTIONS 2 #endif -#include +#include // adolc defines some very stupid macros: #if defined(malloc) diff --git a/external/eigen3/unsupported/Eigen/AlignedVector3 b/external/eigen3/unsupported/Eigen/AlignedVector3 index 29d5c90..47a86d4 100644 --- a/external/eigen3/unsupported/Eigen/AlignedVector3 +++ b/external/eigen3/unsupported/Eigen/AlignedVector3 @@ -57,6 +57,11 @@ template class AlignedVector3 inline Index rows() const { return 3; } inline Index cols() const { return 1; } + + Scalar* data() { return m_coeffs.data(); } + const Scalar* data() const { return m_coeffs.data(); } + Index innerStride() const { return 1; } + Index outerStride() const { return 3; } inline const Scalar& coeff(Index row, Index col) const { return m_coeffs.coeff(row, col); } @@ -100,7 +105,7 @@ template class AlignedVector3 }; template - inline explicit AlignedVector3(const MatrixBase& other) + inline AlignedVector3(const MatrixBase& other) { generic_assign_selector::run(*this,other.derived()); } @@ -108,6 +113,12 @@ template class AlignedVector3 inline AlignedVector3& operator=(const AlignedVector3& other) { m_coeffs = other.m_coeffs; return *this; } + template + inline AlignedVector3& operator=(const MatrixBase& other) + { + generic_assign_selector::run(*this,other.derived()); + return *this; + } inline AlignedVector3 operator+(const AlignedVector3& other) const { return AlignedVector3(m_coeffs + other.m_coeffs); } @@ -148,7 +159,7 @@ template class AlignedVector3 m_coeffs /= norm(); } - inline AlignedVector3 normalized() + inline AlignedVector3 normalized() const { return AlignedVector3(m_coeffs / norm()); } @@ -181,8 +192,31 @@ template class AlignedVector3 { return m_coeffs.template head<3>().isApprox(other,eps); } + + CoeffType& coeffs() { return m_coeffs; } + const CoeffType& coeffs() const { return m_coeffs; } }; +namespace internal { + +template +struct eval, Dense> +{ + typedef const AlignedVector3<_Scalar>& type; +}; + +template +struct evaluator > + : evaluator > +{ + typedef AlignedVector3 XprType; + typedef evaluator > Base; + + evaluator(const XprType &m) : Base(m.coeffs()) {} +}; + +} + //@} } diff --git a/external/eigen3/unsupported/Eigen/CMakeLists.txt b/external/eigen3/unsupported/Eigen/CMakeLists.txt index e1fbf97..631a060 100644 --- a/external/eigen3/unsupported/Eigen/CMakeLists.txt +++ b/external/eigen3/unsupported/Eigen/CMakeLists.txt @@ -1,11 +1,32 @@ -set(Eigen_HEADERS AdolcForward AlignedVector3 ArpackSupport AutoDiff BVH FFT IterativeSolvers KroneckerProduct LevenbergMarquardt - MatrixFunctions MoreVectorization MPRealSupport NonLinearOptimization NumericalDiff OpenGLSupport Polynomials - Skyline SparseExtra Splines - ) +set(Eigen_HEADERS + AdolcForward + AlignedVector3 + ArpackSupport + AutoDiff + BVH + EulerAngles + FFT + IterativeSolvers + KroneckerProduct + LevenbergMarquardt + MatrixFunctions + MoreVectorization + MPRealSupport + NonLinearOptimization + NumericalDiff + OpenGLSupport + Polynomials + Skyline + SparseExtra + SpecialFunctions + Splines + ) install(FILES ${Eigen_HEADERS} DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel ) -add_subdirectory(src) +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") + +add_subdirectory(CXX11) diff --git a/external/eigen3/unsupported/Eigen/CXX11/CMakeLists.txt b/external/eigen3/unsupported/Eigen/CXX11/CMakeLists.txt new file mode 100644 index 0000000..385ed24 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/CMakeLists.txt @@ -0,0 +1,8 @@ +set(Eigen_CXX11_HEADERS Tensor TensorSymmetry ThreadPool) + +install(FILES + ${Eigen_CXX11_HEADERS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel + ) + +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/external/eigen3/unsupported/Eigen/CXX11/Tensor b/external/eigen3/unsupported/Eigen/CXX11/Tensor new file mode 100644 index 0000000..7ecb4c7 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/Tensor @@ -0,0 +1,152 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//#ifndef EIGEN_CXX11_TENSOR_MODULE +//#define EIGEN_CXX11_TENSOR_MODULE + +#include "../../../Eigen/Core" + +#ifdef EIGEN_USE_SYCL +#undef min +#undef max +#undef isnan +#undef isinf +#undef isfinite +#include +#include +#include +#include +#endif + +#include + +#include "../SpecialFunctions" +#include "src/util/CXX11Meta.h" +#include "src/util/MaxSizeVector.h" + +/** \defgroup CXX11_Tensor_Module Tensor Module + * + * This module provides a Tensor class for storing arbitrarily indexed + * objects. + * + * \code + * #include + * \endcode + */ + +#include +#include +#include + +#ifdef _WIN32 +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#else +#include +#endif + +#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 +#include +#endif + +#ifdef _WIN32 +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif + +#ifdef EIGEN_USE_THREADS +#include "ThreadPool" +#endif + +#ifdef EIGEN_USE_GPU +#include +#include +#if __cplusplus >= 201103L +#include +#include +#endif +#endif + +#include "src/Tensor/TensorMacros.h" +#include "src/Tensor/TensorForwardDeclarations.h" +#include "src/Tensor/TensorMeta.h" +#include "src/Tensor/TensorFunctors.h" +#include "src/Tensor/TensorCostModel.h" +#include "src/Tensor/TensorDeviceDefault.h" +#include "src/Tensor/TensorDeviceThreadPool.h" +#include "src/Tensor/TensorDeviceCuda.h" +#include "src/Tensor/TensorDeviceSycl.h" +#include "src/Tensor/TensorIndexList.h" +#include "src/Tensor/TensorDimensionList.h" +#include "src/Tensor/TensorDimensions.h" +#include "src/Tensor/TensorInitializer.h" +#include "src/Tensor/TensorTraits.h" +#include "src/Tensor/TensorRandom.h" +#include "src/Tensor/TensorUInt128.h" +#include "src/Tensor/TensorIntDiv.h" +#include "src/Tensor/TensorGlobalFunctions.h" + +#include "src/Tensor/TensorBase.h" + +#include "src/Tensor/TensorEvaluator.h" +#include "src/Tensor/TensorExpr.h" +#include "src/Tensor/TensorReduction.h" +#include "src/Tensor/TensorReductionCuda.h" +#include "src/Tensor/TensorArgMax.h" +#include "src/Tensor/TensorConcatenation.h" +#include "src/Tensor/TensorContractionMapper.h" +#include "src/Tensor/TensorContractionBlocking.h" +#include "src/Tensor/TensorContraction.h" +#include "src/Tensor/TensorContractionThreadPool.h" +#include "src/Tensor/TensorContractionCuda.h" +#include "src/Tensor/TensorConversion.h" +#include "src/Tensor/TensorConvolution.h" +#include "src/Tensor/TensorFFT.h" +#include "src/Tensor/TensorPatch.h" +#include "src/Tensor/TensorImagePatch.h" +#include "src/Tensor/TensorVolumePatch.h" +#include "src/Tensor/TensorBroadcasting.h" +#include "src/Tensor/TensorChipping.h" +#include "src/Tensor/TensorInflation.h" +#include "src/Tensor/TensorLayoutSwap.h" +#include "src/Tensor/TensorMorphing.h" +#include "src/Tensor/TensorPadding.h" +#include "src/Tensor/TensorReverse.h" +#include "src/Tensor/TensorShuffling.h" +#include "src/Tensor/TensorStriding.h" +#include "src/Tensor/TensorCustomOp.h" +#include "src/Tensor/TensorEvalTo.h" +#include "src/Tensor/TensorForcedEval.h" +#include "src/Tensor/TensorGenerator.h" +#include "src/Tensor/TensorAssign.h" +#include "src/Tensor/TensorScan.h" + +#include "src/Tensor/TensorSycl.h" +#include "src/Tensor/TensorExecutor.h" +#include "src/Tensor/TensorDevice.h" + +#include "src/Tensor/TensorStorage.h" +#include "src/Tensor/Tensor.h" +#include "src/Tensor/TensorFixedSize.h" +#include "src/Tensor/TensorMap.h" +#include "src/Tensor/TensorRef.h" + +#include "src/Tensor/TensorIO.h" + +#include + +//#endif // EIGEN_CXX11_TENSOR_MODULE diff --git a/external/eigen3/unsupported/Eigen/CXX11/TensorSymmetry b/external/eigen3/unsupported/Eigen/CXX11/TensorSymmetry new file mode 100644 index 0000000..fb1b0c0 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/TensorSymmetry @@ -0,0 +1,42 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSORSYMMETRY_MODULE +#define EIGEN_CXX11_TENSORSYMMETRY_MODULE + +#include + +#include + +#include "src/util/CXX11Meta.h" + +/** \defgroup CXX11_TensorSymmetry_Module Tensor Symmetry Module + * + * This module provides a classes that allow for the definition of + * symmetries w.r.t. tensor indices. + * + * Including this module will implicitly include the Tensor module. + * + * \code + * #include + * \endcode + */ + +#include "src/TensorSymmetry/util/TemplateGroupTheory.h" +#include "src/TensorSymmetry/Symmetry.h" +#include "src/TensorSymmetry/StaticSymmetry.h" +#include "src/TensorSymmetry/DynamicSymmetry.h" + +#include + +#endif // EIGEN_CXX11_TENSORSYMMETRY_MODULE + +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; + */ diff --git a/external/eigen3/unsupported/Eigen/CXX11/ThreadPool b/external/eigen3/unsupported/Eigen/CXX11/ThreadPool new file mode 100644 index 0000000..09d637e --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/ThreadPool @@ -0,0 +1,65 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_THREADPOOL_MODULE +#define EIGEN_CXX11_THREADPOOL_MODULE + +#include "../../../Eigen/Core" + +#include + +/** \defgroup CXX11_ThreadPool_Module C++11 ThreadPool Module + * + * This module provides 2 threadpool implementations + * - a simple reference implementation + * - a faster non blocking implementation + * + * This module requires C++11. + * + * \code + * #include + * \endcode + */ + + +// The code depends on CXX11, so only include the module if the +// compiler supports it. +#if __cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900 +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "src/util/CXX11Meta.h" +#include "src/util/MaxSizeVector.h" + +#include "src/ThreadPool/ThreadLocal.h" +#include "src/ThreadPool/ThreadYield.h" +#include "src/ThreadPool/EventCount.h" +#include "src/ThreadPool/RunQueue.h" +#include "src/ThreadPool/ThreadPoolInterface.h" +#include "src/ThreadPool/ThreadEnvironment.h" +#include "src/ThreadPool/SimpleThreadPool.h" +#include "src/ThreadPool/NonBlockingThreadPool.h" + +#endif + +#include + +#endif // EIGEN_CXX11_THREADPOOL_MODULE + diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/README.md b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/README.md new file mode 100644 index 0000000..98e8381 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -0,0 +1,1757 @@ +# Eigen Tensors + +Tensors are multidimensional arrays of elements. Elements are typically scalars, +but more complex types such as strings are also supported. + +[TOC] + +## Tensor Classes + +You can manipulate a tensor with one of the following classes. They all are in +the namespace ```::Eigen.``` + + +### Class Tensor + +This is the class to use to create a tensor and allocate memory for it. The +class is templatized with the tensor datatype, such as float or int, and the +tensor rank. The rank is the number of dimensions, for example rank 2 is a +matrix. + +Tensors of this class are resizable. For example, if you assign a tensor of a +different size to a Tensor, that tensor is resized to match its new value. + +#### Constructor Tensor(size0, size1, ...) + +Constructor for a Tensor. The constructor must be passed ```rank``` integers +indicating the sizes of the instance along each of the the ```rank``` +dimensions. + + // Create a tensor of rank 3 of sizes 2, 3, 4. This tensor owns + // memory to hold 24 floating point values (24 = 2 x 3 x 4). + Tensor t_3d(2, 3, 4); + + // Resize t_3d by assigning a tensor of different sizes, but same rank. + t_3d = Tensor(3, 4, 3); + +#### Constructor Tensor(size_array) + +Constructor where the sizes for the constructor are specified as an array of +values instead of an explicitly list of parameters. The array type to use is +```Eigen::array```. The array can be constructed automatically +from an initializer list. + + // Create a tensor of strings of rank 2 with sizes 5, 7. + Tensor t_2d({5, 7}); + + +### Class TensorFixedSize> + +Class to use for tensors of fixed size, where the size is known at compile +time. Fixed sized tensors can provide very fast computations because all their +dimensions are known by the compiler. FixedSize tensors are not resizable. + +If the total number of elements in a fixed size tensor is small enough the +tensor data is held onto the stack and does not cause heap allocation and free. + + // Create a 4 x 3 tensor of floats. + TensorFixedSize> t_4x3; + +### Class TensorMap> + +This is the class to use to create a tensor on top of memory allocated and +owned by another part of your code. It allows to view any piece of allocated +memory as a Tensor. Instances of this class do not own the memory where the +data are stored. + +A TensorMap is not resizable because it does not own the memory where its data +are stored. + +#### Constructor TensorMap>(data, size0, size1, ...) + +Constructor for a Tensor. The constructor must be passed a pointer to the +storage for the data, and "rank" size attributes. The storage has to be +large enough to hold all the data. + + // Map a tensor of ints on top of stack-allocated storage. + int storage[128]; // 2 x 4 x 2 x 8 = 128 + TensorMap> t_4d(storage, 2, 4, 2, 8); + + // The same storage can be viewed as a different tensor. + // You can also pass the sizes as an array. + TensorMap> t_2d(storage, 16, 8); + + // You can also map fixed-size tensors. Here we get a 1d view of + // the 2d fixed-size tensor. + Tensor> t_4x3; + TensorMap> t_12(t_4x3, 12); + + +#### Class TensorRef + +See Assigning to a TensorRef below. + +## Accessing Tensor Elements + +#### tensor(index0, index1...) + +Return the element at position ```(index0, index1...)``` in tensor +```tensor```. You must pass as many parameters as the rank of ```tensor```. +The expression can be used as an l-value to set the value of the element at the +specified position. The value returned is of the datatype of the tensor. + + // Set the value of the element at position (0, 1, 0); + Tensor t_3d(2, 3, 4); + t_3d(0, 1, 0) = 12.0f; + + // Initialize all elements to random values. + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 4; ++k) { + t_3d(i, j, k) = ...some random value...; + } + } + } + + // Print elements of a tensor. + for (int i = 0; i < 2; ++i) { + LOG(INFO) << t_3d(i, 0, 0); + } + + +## TensorLayout + +The tensor library supports 2 layouts: ```ColMajor``` (the default) and +```RowMajor```. Only the default column major layout is currently fully +supported, and it is therefore not recommended to attempt to use the row major +layout at the moment. + +The layout of a tensor is optionally specified as part of its type. If not +specified explicitly column major is assumed. + + Tensor col_major; // equivalent to Tensor + TensorMap > row_major(data, ...); + +All the arguments to an expression must use the same layout. Attempting to mix +different layouts will result in a compilation error. + +It is possible to change the layout of a tensor or an expression using the +```swap_layout()``` method. Note that this will also reverse the order of the +dimensions. + + Tensor col_major(2, 4); + Tensor row_major(2, 4); + + Tensor col_major_result = col_major; // ok, layouts match + Tensor col_major_result = row_major; // will not compile + + // Simple layout swap + col_major_result = row_major.swap_layout(); + eigen_assert(col_major_result.dimension(0) == 4); + eigen_assert(col_major_result.dimension(1) == 2); + + // Swap the layout and preserve the order of the dimensions + array shuffle(1, 0); + col_major_result = row_major.swap_layout().shuffle(shuffle); + eigen_assert(col_major_result.dimension(0) == 2); + eigen_assert(col_major_result.dimension(1) == 4); + + +## Tensor Operations + +The Eigen Tensor library provides a vast library of operations on Tensors: +numerical operations such as addition and multiplication, geometry operations +such as slicing and shuffling, etc. These operations are available as methods +of the Tensor classes, and in some cases as operator overloads. For example +the following code computes the elementwise addition of two tensors: + + Tensor t1(2, 3, 4); + ...set some values in t1... + Tensor t2(2, 3, 4); + ...set some values in t2... + // Set t3 to the element wise sum of t1 and t2 + Tensor t3 = t1 + t2; + +While the code above looks easy enough, it is important to understand that the +expression ```t1 + t2``` is not actually adding the values of the tensors. The +expression instead constructs a "tensor operator" object of the class +TensorCwiseBinaryOp, which has references to the tensors +```t1``` and ```t2```. This is a small C++ object that knows how to add +```t1``` and ```t2```. It is only when the value of the expression is assigned +to the tensor ```t3``` that the addition is actually performed. Technically, +this happens through the overloading of ```operator=()``` in the Tensor class. + +This mechanism for computing tensor expressions allows for lazy evaluation and +optimizations which are what make the tensor library very fast. + +Of course, the tensor operators do nest, and the expression ```t1 + t2 * +0.3f``` is actually represented with the (approximate) tree of operators: + + TensorCwiseBinaryOp(t1, TensorCwiseUnaryOp(t2, 0.3f)) + + +### Tensor Operations and C++ "auto" + +Because Tensor operations create tensor operators, the C++ ```auto``` keyword +does not have its intuitive meaning. Consider these 2 lines of code: + + Tensor t3 = t1 + t2; + auto t4 = t1 + t2; + +In the first line we allocate the tensor ```t3``` and it will contain the +result of the addition of ```t1``` and ```t2```. In the second line, ```t4``` +is actually the tree of tensor operators that will compute the addition of +```t1``` and ```t2```. In fact, ```t4``` is *not* a tensor and you cannot get +the values of its elements: + + Tensor t3 = t1 + t2; + cout << t3(0, 0, 0); // OK prints the value of t1(0, 0, 0) + t2(0, 0, 0) + + auto t4 = t1 + t2; + cout << t4(0, 0, 0); // Compilation error! + +When you use ```auto``` you do not get a Tensor as a result but instead a +non-evaluated expression. So only use ```auto``` to delay evaluation. + +Unfortunately, there is no single underlying concrete type for holding +non-evaluated expressions, hence you have to use auto in the case when you do +want to hold non-evaluated expressions. + +When you need the results of set of tensor computations you have to assign the +result to a Tensor that will be capable of holding onto them. This can be +either a normal Tensor, a fixed size Tensor, or a TensorMap on an existing +piece of memory. All the following will work: + + auto t4 = t1 + t2; + + Tensor result = t4; // Could also be: result(t4); + cout << result(0, 0, 0); + + TensorMap result(, , ...) = t4; + cout << result(0, 0, 0); + + TensorFixedSize> result = t4; + cout << result(0, 0, 0); + +Until you need the results, you can keep the operation around, and even reuse +it for additional operations. As long as you keep the expression as an +operation, no computation is performed. + + // One way to compute exp((t1 + t2) * 0.2f); + auto t3 = t1 + t2; + auto t4 = t3 * 0.2f; + auto t5 = t4.exp(); + Tensor result = t5; + + // Another way, exactly as efficient as the previous one: + Tensor result = ((t1 + t2) * 0.2f).exp(); + +### Controlling When Expression are Evaluated + +There are several ways to control when expressions are evaluated: + +* Assignment to a Tensor, TensorFixedSize, or TensorMap. +* Use of the eval() method. +* Assignment to a TensorRef. + +#### Assigning to a Tensor, TensorFixedSize, or TensorMap. + +The most common way to evaluate an expression is to assign it to a Tensor. In +the example below, the ```auto``` declarations make the intermediate values +"Operations", not Tensors, and do not cause the expressions to be evaluated. +The assignment to the Tensor ```result``` causes the evaluation of all the +operations. + + auto t3 = t1 + t2; // t3 is an Operation. + auto t4 = t3 * 0.2f; // t4 is an Operation. + auto t5 = t4.exp(); // t5 is an Operation. + Tensor result = t5; // The operations are evaluated. + +If you know the ranks and sizes of the Operation value you can assign the +Operation to a TensorFixedSize instead of a Tensor, which is a bit more +efficient. + + // We know that the result is a 4x4x2 tensor! + TensorFixedSize result = t5; + +Simiarly, assigning an expression to a TensorMap causes its evaluation. Like +tensors of type TensorFixedSize, TensorMaps cannot be resized so they have to +have the rank and sizes of the expression that are assigned to them. + +#### Calling eval(). + +When you compute large composite expressions, you sometimes want to tell Eigen +that an intermediate value in the expression tree is worth evaluating ahead of +time. This is done by inserting a call to the ```eval()``` method of the +expression Operation. + + // The previous example could have been written: + Tensor result = ((t1 + t2) * 0.2f).exp(); + + // If you want to compute (t1 + t2) once ahead of time you can write: + Tensor result = ((t1 + t2).eval() * 0.2f).exp(); + +Semantically, calling ```eval()``` is equivalent to materializing the value of +the expression in a temporary Tensor of the right size. The code above in +effect does: + + // .eval() knows the size! + TensorFixedSize tmp = t1 + t2; + Tensor result = (tmp * 0.2f).exp(); + +Note that the return value of ```eval()``` is itself an Operation, so the +following code does not do what you may think: + + // Here t3 is an evaluation Operation. t3 has not been evaluated yet. + auto t3 = (t1 + t2).eval(); + + // You can use t3 in another expression. Still no evaluation. + auto t4 = (t3 * 0.2f).exp(); + + // The value is evaluated when you assign the Operation to a Tensor, using + // an intermediate tensor to represent t3.x + Tensor result = t4; + +While in the examples above calling ```eval()``` does not make a difference in +performance, in other cases it can make a huge difference. In the expression +below the ```broadcast()``` expression causes the ```X.maximum()``` expression +to be evaluated many times: + + Tensor<...> X ...; + Tensor<...> Y = ((X - X.maximum(depth_dim).reshape(dims2d).broadcast(bcast)) + * beta).exp(); + +Inserting a call to ```eval()``` between the ```maximum()``` and +```reshape()``` calls guarantees that maximum() is only computed once and +greatly speeds-up execution: + + Tensor<...> Y = + ((X - X.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)) + * beta).exp(); + +In the other example below, the tensor ```Y``` is both used in the expression +and its assignment. This is an aliasing problem and if the evaluation is not +done in the right order Y will be updated incrementally during the evaluation +resulting in bogus results: + + Tensor<...> Y ...; + Y = Y / (Y.sum(depth_dim).reshape(dims2d).broadcast(bcast)); + +Inserting a call to ```eval()``` between the ```sum()``` and ```reshape()``` +expressions ensures that the sum is computed before any updates to ```Y``` are +done. + + Y = Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); + +Note that an eval around the full right hand side expression is not needed +because the generated has to compute the i-th value of the right hand side +before assigning it to the left hand side. + +However, if you were assigning the expression value to a shuffle of ```Y``` +then you would need to force an eval for correctness by adding an ```eval()``` +call for the right hand side: + + Y.shuffle(...) = + (Y / (Y.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast))).eval(); + + +#### Assigning to a TensorRef. + +If you need to access only a few elements from the value of an expression you +can avoid materializing the value in a full tensor by using a TensorRef. + +A TensorRef is a small wrapper class for any Eigen Operation. It provides +overloads for the ```()``` operator that let you access individual values in +the expression. TensorRef is convenient, because the Operation themselves do +not provide a way to access individual elements. + + // Create a TensorRef for the expression. The expression is not + // evaluated yet. + TensorRef > ref = ((t1 + t2) * 0.2f).exp(); + + // Use "ref" to access individual elements. The expression is evaluated + // on the fly. + float at_0 = ref(0, 0, 0); + cout << ref(0, 1, 0); + +Only use TensorRef when you need a subset of the values of the expression. +TensorRef only computes the values you access. However note that if you are +going to access all the values it will be much faster to materialize the +results in a Tensor first. + +In some cases, if the full Tensor result would be very large, you may save +memory by accessing it as a TensorRef. But not always. So don't count on it. + + +### Controlling How Expressions Are Evaluated + +The tensor library provides several implementations of the various operations +such as contractions and convolutions. The implementations are optimized for +different environments: single threaded on CPU, multi threaded on CPU, or on a +GPU using cuda. Additional implementations may be added later. + +You can choose which implementation to use with the ```device()``` call. If +you do not choose an implementation explicitly the default implementation that +uses a single thread on the CPU is used. + +The default implementation has been optimized for recent Intel CPUs, taking +advantage of SSE, AVX, and FMA instructions. Work is ongoing to tune the +library on ARM CPUs. Note that you need to pass compiler-dependent flags +to enable the use of SSE, AVX, and other instructions. + +For example, the following code adds two tensors using the default +single-threaded CPU implementation: + + Tensor a(30, 40); + Tensor b(30, 40); + Tensor c = a + b; + +To choose a different implementation you have to insert a ```device()``` call +before the assignment of the result. For technical C++ reasons this requires +that the Tensor for the result be declared on its own. This means that you +have to know the size of the result. + + Eigen::Tensor c(30, 40); + c.device(...) = a + b; + +The call to ```device()``` must be the last call on the left of the operator=. + +You must pass to the ```device()``` call an Eigen device object. There are +presently three devices you can use: DefaultDevice, ThreadPoolDevice and +GpuDevice. + + +#### Evaluating With the DefaultDevice + +This is exactly the same as not inserting a ```device()``` call. + + DefaultDevice my_device; + c.device(my_device) = a + b; + +#### Evaluating with a Thread Pool + + // Create the Eigen ThreadPoolDevice. + Eigen::ThreadPoolDevice my_device(4 /* number of threads to use */); + + // Now just use the device when evaluating expressions. + Eigen::Tensor c(30, 50); + c.device(my_device) = a.contract(b, dot_product_dims); + + +#### Evaluating On GPU + +This is presently a bit more complicated than just using a thread pool device. +You need to create a GPU device but you also need to explicitly allocate the +memory for tensors with cuda. + + +## API Reference + +### Datatypes + +In the documentation of the tensor methods and Operation we mention datatypes +that are tensor-type specific: + +#### ::Dimensions + +Acts like an array of ints. Has an ```int size``` attribute, and can be +indexed like an array to access individual values. Used to represent the +dimensions of a tensor. See ```dimensions()```. + +#### ::Index + +Acts like an ```int```. Used for indexing tensors along their dimensions. See +```operator()```, ```dimension()```, and ```size()```. + +#### ::Scalar + +Represents the datatype of individual tensor elements. For example, for a +```Tensor```, ```Scalar``` is the type ```float```. See +```setConstant()```. + +#### + +We use this pseudo type to indicate that a tensor Operation is returned by a +method. We indicate in the text the type and dimensions of the tensor that the +Operation returns after evaluation. + +The Operation will have to be evaluated, for example by assigning it to a +tensor, before you can access the values of the resulting tensor. You can also +access the values through a TensorRef. + + +## Built-in Tensor Methods + +These are usual C++ methods that act on tensors immediately. They are not +Operations which provide delayed evaluation of their results. Unless specified +otherwise, all the methods listed below are available on all tensor classes: +Tensor, TensorFixedSize, and TensorMap. + +## Metadata + +### int NumDimensions + +Constant value indicating the number of dimensions of a Tensor. This is also +known as the tensor "rank". + + Eigen::Tensor a(3, 4); + cout << "Dims " << a.NumDimensions; + => Dims 2 + +### Dimensions dimensions() + +Returns an array-like object representing the dimensions of the tensor. +The actual type of the dimensions() result is ::Dimensions. + + Eigen::Tensor a(3, 4); + const Eigen::Tensor::Dimensions& d = a.dimensions(); + cout << "Dim size: " << d.size << ", dim 0: " << d[0] + << ", dim 1: " << d[1]; + => Dim size: 2, dim 0: 3, dim 1: 4 + +If you use a C++11 compiler, you can use ```auto``` to simplify the code: + + const auto& d = a.dimensions(); + cout << "Dim size: " << d.size << ", dim 0: " << d[0] + << ", dim 1: " << d[1]; + => Dim size: 2, dim 0: 3, dim 1: 4 + +### Index dimension(Index n) + +Returns the n-th dimension of the tensor. The actual type of the +```dimension()``` result is ```::Index```, but you can +always use it like an int. + + Eigen::Tensor a(3, 4); + int dim1 = a.dimension(1); + cout << "Dim 1: " << dim1; + => Dim 1: 4 + +### Index size() + +Returns the total number of elements in the tensor. This is the product of all +the tensor dimensions. The actual type of the ```size()``` result is +```::Index```, but you can always use it like an int. + + Eigen::Tensor a(3, 4); + cout << "Size: " << a.size(); + => Size: 12 + + +### Getting Dimensions From An Operation + +A few operations provide ```dimensions()``` directly, +e.g. ```TensorReslicingOp```. Most operations defer calculating dimensions +until the operation is being evaluated. If you need access to the dimensions +of a deferred operation, you can wrap it in a TensorRef (see Assigning to a +TensorRef above), which provides ```dimensions()``` and ```dimension()``` as +above. + +TensorRef can also wrap the plain Tensor types, so this is a useful idiom in +templated contexts where the underlying object could be either a raw Tensor +or some deferred operation (e.g. a slice of a Tensor). In this case, the +template code can wrap the object in a TensorRef and reason about its +dimensionality while remaining agnostic to the underlying type. + + +## Constructors + +### Tensor + +Creates a tensor of the specified size. The number of arguments must be equal +to the rank of the tensor. The content of the tensor is not initialized. + + Eigen::Tensor a(3, 4); + cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; + => NumRows: 3 NumCols: 4 + +### TensorFixedSize + +Creates a tensor of the specified size. The number of arguments in the Size<> +template parameter determines the rank of the tensor. The content of the tensor +is not initialized. + + Eigen::TensorFixedSize> a; + cout << "Rank: " << a.rank() << endl; + => Rank: 2 + cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; + => NumRows: 3 NumCols: 4 + +### TensorMap + +Creates a tensor mapping an existing array of data. The data must not be freed +until the TensorMap is discarded, and the size of the data must be large enough +to accomodate of the coefficients of the tensor. + + float data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + Eigen::TensorMap a(data, 3, 4); + cout << "NumRows: " << a.dimension(0) << " NumCols: " << a.dimension(1) << endl; + => NumRows: 3 NumCols: 4 + cout << "a(1, 2): " << a(1, 2) << endl; + => a(1, 2): 9 + + +## Contents Initialization + +When a new Tensor or a new TensorFixedSize are created, memory is allocated to +hold all the tensor elements, but the memory is not initialized. Similarly, +when a new TensorMap is created on top of non-initialized memory the memory its +contents are not initialized. + +You can use one of the methods below to initialize the tensor memory. These +have an immediate effect on the tensor and return the tensor itself as a +result. These are not tensor Operations which delay evaluation. + +### setConstant(const Scalar& val) + +Sets all elements of the tensor to the constant value ```val```. ```Scalar``` +is the type of data stored in the tensor. You can pass any value that is +convertible to that type. + +Returns the tensor itself in case you want to chain another call. + + a.setConstant(12.3f); + cout << "Constant: " << endl << a << endl << endl; + => + Constant: + 12.3 12.3 12.3 12.3 + 12.3 12.3 12.3 12.3 + 12.3 12.3 12.3 12.3 + +Note that ```setConstant()``` can be used on any tensor where the element type +has a copy constructor and an ```operator=()```: + + Eigen::Tensor a(2, 3); + a.setConstant("yolo"); + cout << "String tensor: " << endl << a << endl << endl; + => + String tensor: + yolo yolo yolo + yolo yolo yolo + + +### setZero() + +Fills the tensor with zeros. Equivalent to ```setConstant(Scalar(0))```. +Returns the tensor itself in case you want to chain another call. + + a.setZero(); + cout << "Zeros: " << endl << a << endl << endl; + => + Zeros: + 0 0 0 0 + 0 0 0 0 + 0 0 0 0 + + +### setValues({..initializer_list}) + +Fills the tensor with explicit values specified in a std::initializer_list. +The type of the initializer list depends on the type and rank of the tensor. + +If the tensor has rank N, the initializer list must be nested N times. The +most deeply nested lists must contains P scalars of the Tensor type where P is +the size of the last dimension of the Tensor. + +For example, for a ```TensorFixedSize``` the initializer list must +contains 2 lists of 3 floats each. + +```setValues()``` returns the tensor itself in case you want to chain another +call. + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 1.0f, 2.0f}, {3.0f, 4.0f, 5.0f}}); + cout << "a" << endl << a << endl << endl; + => + a + 0 1 2 + 3 4 5 + +If a list is too short, the corresponding elements of the tensor will not be +changed. This is valid at each level of nesting. For example the following +code only sets the values of the first row of the tensor. + + Eigen::Tensor a(2, 3); + a.setConstant(1000); + a.setValues({{10, 20, 30}}); + cout << "a" << endl << a << endl << endl; + => + a + 10 20 30 + 1000 1000 1000 + +### setRandom() + +Fills the tensor with random values. Returns the tensor itself in case you +want to chain another call. + + a.setRandom(); + cout << "Random: " << endl << a << endl << endl; + => + Random: + 0.680375 0.59688 -0.329554 0.10794 + -0.211234 0.823295 0.536459 -0.0452059 + 0.566198 -0.604897 -0.444451 0.257742 + +You can customize ```setRandom()``` by providing your own random number +generator as a template argument: + + a.setRandom(); + +Here, ```MyRandomGenerator``` must be a struct with the following member +functions, where Scalar and Index are the same as ```::Scalar``` +and ```::Index```. + +See ```struct UniformRandomGenerator``` in TensorFunctors.h for an example. + + // Custom number generator for use with setRandom(). + struct MyRandomGenerator { + // Default and copy constructors. Both are needed + MyRandomGenerator() { } + MyRandomGenerator(const MyRandomGenerator& ) { } + + // Return a random value to be used. "element_location" is the + // location of the entry to set in the tensor, it can typically + // be ignored. + Scalar operator()(Eigen::DenseIndex element_location, + Eigen::DenseIndex /*unused*/ = 0) const { + return ; + } + + // Same as above but generates several numbers at a time. + typename internal::packet_traits::type packetOp( + Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const { + return ; + } + }; + +You can also use one of the 2 random number generators that are part of the +tensor library: +* UniformRandomGenerator +* NormalRandomGenerator + + +## Data Access + +The Tensor, TensorFixedSize, and TensorRef classes provide the following +accessors to access the tensor coefficients: + + const Scalar& operator()(const array& indices) + const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + Scalar& operator()(const array& indices) + Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + +The number of indices must be equal to the rank of the tensor. Moreover, these +accessors are not available on tensor expressions. In order to access the +values of a tensor expression, the expression must either be evaluated or +wrapped in a TensorRef. + + +### Scalar* data() and const Scalar* data() const + +Returns a pointer to the storage for the tensor. The pointer is const if the +tensor was const. This allows direct access to the data. The layout of the +data depends on the tensor layout: RowMajor or ColMajor. + +This access is usually only needed for special cases, for example when mixing +Eigen Tensor code with other libraries. + +Scalar is the type of data stored in the tensor. + + Eigen::Tensor a(3, 4); + float* a_data = a.data(); + a_data[0] = 123.45f; + cout << "a(0, 0): " << a(0, 0); + => a(0, 0): 123.45 + + +## Tensor Operations + +All the methods documented below return non evaluated tensor ```Operations```. +These can be chained: you can apply another Tensor Operation to the value +returned by the method. + +The chain of Operation is evaluated lazily, typically when it is assigned to a +tensor. See "Controlling when Expression are Evaluated" for more details about +their evaluation. + +### constant(const Scalar& val) + +Returns a tensor of the same type and dimensions as the original tensor but +where all elements have the value ```val```. + +This is useful, for example, when you want to add or subtract a constant from a +tensor, or multiply every element of a tensor by a scalar. + + Eigen::Tensor a(2, 3); + a.setConstant(1.0f); + Eigen::Tensor b = a + a.constant(2.0f); + Eigen::Tensor c = b * b.constant(0.2f); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + cout << "c" << endl << c << endl << endl; + => + a + 1 1 1 + 1 1 1 + + b + 3 3 3 + 3 3 3 + + c + 0.6 0.6 0.6 + 0.6 0.6 0.6 + +### random() + +Returns a tensor of the same type and dimensions as the current tensor +but where all elements have random values. + +This is for example useful to add random values to an existing tensor. +The generation of random values can be customized in the same manner +as for ```setRandom()```. + + Eigen::Tensor a(2, 3); + a.setConstant(1.0f); + Eigen::Tensor b = a + a.random(); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 1 1 + 1 1 1 + + b + 1.68038 1.5662 1.82329 + 0.788766 1.59688 0.395103 + + +## Unary Element Wise Operations + +All these operations take a single input tensor as argument and return a tensor +of the same type and dimensions as the tensor to which they are applied. The +requested operations are applied to each element independently. + +### operator-() + +Returns a tensor of the same type and dimensions as the original tensor +containing the opposite values of the original tensor. + + Eigen::Tensor a(2, 3); + a.setConstant(1.0f); + Eigen::Tensor b = -a; + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 1 1 + 1 1 1 + + b + -1 -1 -1 + -1 -1 -1 + +### sqrt() + +Returns a tensor of the same type and dimensions as the original tensor +containing the square roots of the original tensor. + +### rsqrt() + +Returns a tensor of the same type and dimensions as the original tensor +containing the inverse square roots of the original tensor. + +### square() + +Returns a tensor of the same type and dimensions as the original tensor +containing the squares of the original tensor values. + +### inverse() + +Returns a tensor of the same type and dimensions as the original tensor +containing the inverse of the original tensor values. + +### exp() + +Returns a tensor of the same type and dimensions as the original tensor +containing the exponential of the original tensor. + +### log() + +Returns a tensor of the same type and dimensions as the original tensor +containing the natural logarithms of the original tensor. + +### abs() + +Returns a tensor of the same type and dimensions as the original tensor +containing the absolute values of the original tensor. + +### pow(Scalar exponent) + +Returns a tensor of the same type and dimensions as the original tensor +containing the coefficients of the original tensor to the power of the +exponent. + +The type of the exponent, Scalar, is always the same as the type of the +tensor coefficients. For example, only integer exponents can be used in +conjuntion with tensors of integer values. + +You can use cast() to lift this restriction. For example this computes +cubic roots of an int Tensor: + + Eigen::Tensor a(2, 3); + a.setValues({{0, 1, 8}, {27, 64, 125}}); + Eigen::Tensor b = a.cast().pow(1.0 / 3.0); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 0 1 8 + 27 64 125 + + b + 0 1 2 + 3 4 5 + +### operator * (Scalar scale) + +Multiplies all the coefficients of the input tensor by the provided scale. + +### cwiseMax(Scalar threshold) +TODO + +### cwiseMin(Scalar threshold) +TODO + +### unaryExpr(const CustomUnaryOp& func) +TODO + + +## Binary Element Wise Operations + +These operations take two input tensors as arguments. The 2 input tensors should +be of the same type and dimensions. The result is a tensor of the same +dimensions as the tensors to which they are applied, and unless otherwise +specified it is also of the same type. The requested operations are applied to +each pair of elements independently. + +### operator+(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise sums of the inputs. + +### operator-(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise differences of the inputs. + +### operator*(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise products of the inputs. + +### operator/(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise quotients of the inputs. + +This operator is not supported for integer types. + +### cwiseMax(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise maximums of the inputs. + +### cwiseMin(const OtherDerived& other) + +Returns a tensor of the same type and dimensions as the input tensors +containing the coefficient wise mimimums of the inputs. + +### Logical operators + +The following logical operators are supported as well: + +* operator&&(const OtherDerived& other) +* operator||(const OtherDerived& other) +* operator<(const OtherDerived& other) +* operator<=(const OtherDerived& other) +* operator>(const OtherDerived& other) +* operator>=(const OtherDerived& other) +* operator==(const OtherDerived& other) +* operator!=(const OtherDerived& other) + +They all return a tensor of boolean values. + + +## Selection (select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) + +Selection is a coefficient-wise ternary operator that is the tensor equivalent +to the if-then-else operation. + + Tensor if = ...; + Tensor then = ...; + Tensor else = ...; + Tensor result = if.select(then, else); + +The 3 arguments must be of the same dimensions, which will also be the dimension +of the result. The 'if' tensor must be of type boolean, the 'then' and the +'else' tensor must be of the same type, which will also be the type of the +result. + +Each coefficient in the result is equal to the corresponding coefficient in the +'then' tensor if the corresponding value in the 'if' tensor is true. If not, the +resulting coefficient will come from the 'else' tensor. + + +## Contraction + +Tensor *contractions* are a generalization of the matrix product to the +multidimensional case. + + // Create 2 matrices using tensors of rank 2 + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {6, 5, 4}}); + Eigen::Tensor b(3, 2); + a.setValues({{1, 2}, {4, 5}, {5, 6}}); + + // Compute the traditional matrix product + array, 1> product_dims = { IndexPair(1, 0) }; + Eigen::Tensor AB = a.contract(b, product_dims); + + // Compute the product of the transpose of the matrices + array, 1> transpose_product_dims = { IndexPair(0, 1) }; + Eigen::Tensor AtBt = a.contract(b, transposed_product_dims); + + +## Reduction Operations + +A *Reduction* operation returns a tensor with fewer dimensions than the +original tensor. The values in the returned tensor are computed by applying a +*reduction operator* to slices of values from the original tensor. You specify +the dimensions along which the slices are made. + +The Eigen Tensor library provides a set of predefined reduction operators such +as ```maximum()``` and ```sum()``` and lets you define additional operators by +implementing a few methods from a reductor template. + +### Reduction Dimensions + +All reduction operations take a single parameter of type +```::Dimensions``` which can always be specified as an array of +ints. These are called the "reduction dimensions." The values are the indices +of the dimensions of the input tensor over which the reduction is done. The +parameter can have at most as many element as the rank of the input tensor; +each element must be less than the tensor rank, as it indicates one of the +dimensions to reduce. + +Each dimension of the input tensor should occur at most once in the reduction +dimensions as the implementation does not remove duplicates. + +The order of the values in the reduction dimensions does not affect the +results, but the code may execute faster if you list the dimensions in +increasing order. + +Example: Reduction along one dimension. + + // Create a tensor of 2 dimensions + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {6, 5, 4}}); + // Reduce it along the second dimension (1)... + Eigen::array dims({1 /* dimension to reduce */}); + // ...using the "maximum" operator. + // The result is a tensor with one dimension. The size of + // that dimension is the same as the first (non-reduced) dimension of a. + Eigen::Tensor b = a.maximum(dims); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 2 3 + 6 5 4 + + b + 3 + 6 + +Example: Reduction along two dimensions. + + Eigen::Tensor a(2, 3, 4); + a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, + {7.0f, 6.0f, 5.0f, 4.0f}, + {8.0f, 9.0f, 10.0f, 11.0f}}, + {{12.0f, 13.0f, 14.0f, 15.0f}, + {19.0f, 18.0f, 17.0f, 16.0f}, + {20.0f, 21.0f, 22.0f, 23.0f}}}); + // The tensor a has 3 dimensions. We reduce along the + // first 2, resulting in a tensor with a single dimension + // of size 4 (the last dimension of a.) + // Note that we pass the array of reduction dimensions + // directly to the maximum() call. + Eigen::Tensor b = + a.maximum(Eigen::array({0, 1})); + cout << "b" << endl << b << endl << endl; + => + b + 20 + 21 + 22 + 23 + +#### Reduction along all dimensions + +As a special case, if you pass no parameter to a reduction operation the +original tensor is reduced along *all* its dimensions. The result is a +scalar, represented as a zero-dimension tensor. + + Eigen::Tensor a(2, 3, 4); + a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, + {7.0f, 6.0f, 5.0f, 4.0f}, + {8.0f, 9.0f, 10.0f, 11.0f}}, + {{12.0f, 13.0f, 14.0f, 15.0f}, + {19.0f, 18.0f, 17.0f, 16.0f}, + {20.0f, 21.0f, 22.0f, 23.0f}}}); + // Reduce along all dimensions using the sum() operator. + Eigen::Tensor b = a.sum(); + cout << "b" << endl << b << endl << endl; + => + b + 276 + + +### sum(const Dimensions& new_dims) +### sum() + +Reduce a tensor using the sum() operator. The resulting values +are the sum of the reduced values. + +### mean(const Dimensions& new_dims) +### mean() + +Reduce a tensor using the mean() operator. The resulting values +are the mean of the reduced values. + +### maximum(const Dimensions& new_dims) +### maximum() + +Reduce a tensor using the maximum() operator. The resulting values are the +largest of the reduced values. + +### minimum(const Dimensions& new_dims) +### minimum() + +Reduce a tensor using the minimum() operator. The resulting values +are the smallest of the reduced values. + +### prod(const Dimensions& new_dims) +### prod() + +Reduce a tensor using the prod() operator. The resulting values +are the product of the reduced values. + +### all(const Dimensions& new_dims) +### all() +Reduce a tensor using the all() operator. Casts tensor to bool and then checks +whether all elements are true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + +### any(const Dimensions& new_dims) +### any() +Reduce a tensor using the any() operator. Casts tensor to bool and then checks +whether any element is true. Runs through all elements rather than +short-circuiting, so may be significantly inefficient. + + +### reduce(const Dimensions& new_dims, const Reducer& reducer) + +Reduce a tensor using a user-defined reduction operator. See ```SumReducer``` +in TensorFunctors.h for information on how to implement a reduction operator. + + +## Scan Operations + +A *Scan* operation returns a tensor with the same dimensions as the original +tensor. The operation performs an inclusive scan along the specified +axis, which means it computes a running total along the axis for a given +reduction operation. +If the reduction operation corresponds to summation, then this computes the +prefix sum of the tensor along the given axis. + +Example: +dd a comment to this line + + // Create a tensor of 2 dimensions + Eigen::Tensor a(2, 3); + a.setValues({{1, 2, 3}, {4, 5, 6}}); + // Scan it along the second dimension (1) using summation + Eigen::Tensor b = a.cumsum(1); + // The result is a tensor with the same size as the input + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 1 2 3 + 6 5 4 + + b + 1 3 6 + 4 9 15 + +### cumsum(const Index& axis) + +Perform a scan by summing consecutive entries. + +### cumprod(const Index& axis) + +Perform a scan by multiplying consecutive entries. + + +## Convolutions + +### convolve(const Kernel& kernel, const Dimensions& dims) + +Returns a tensor that is the output of the convolution of the input tensor with the kernel, +along the specified dimensions of the input tensor. The dimension size for dimensions of the output tensor +which were part of the convolution will be reduced by the formula: +output_dim_size = input_dim_size - kernel_dim_size + 1 (requires: input_dim_size >= kernel_dim_size). +The dimension sizes for dimensions that were not part of the convolution will remain the same. +Performance of the convolution can depend on the length of the stride(s) of the input tensor dimension(s) along which the +convolution is computed (the first dimension has the shortest stride for ColMajor, whereas RowMajor's shortest stride is +for the last dimension). + + // Compute convolution along the second and third dimension. + Tensor input(3, 3, 7, 11); + Tensor kernel(2, 2); + Tensor output(3, 2, 6, 11); + input.setRandom(); + kernel.setRandom(); + + Eigen::array dims({1, 2}); // Specify second and third dimension for convolution. + output = input.convolve(kernel, dims); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 6; ++k) { + for (int l = 0; l < 11; ++l) { + const float result = output(i,j,k,l); + const float expected = input(i,j+0,k+0,l) * kernel(0,0) + + input(i,j+1,k+0,l) * kernel(1,0) + + input(i,j+0,k+1,l) * kernel(0,1) + + input(i,j+1,k+1,l) * kernel(1,1); + VERIFY_IS_APPROX(result, expected); + } + } + } + } + + +## Geometrical Operations + +These operations return a Tensor with different dimensions than the original +Tensor. They can be used to access slices of tensors, see them with different +dimensions, or pad tensors with additional data. + +### reshape(const Dimensions& new_dims) + +Returns a view of the input tensor that has been reshaped to the specified +new dimensions. The argument new_dims is an array of Index values. The +rank of the resulting tensor is equal to the number of elements in new_dims. + +The product of all the sizes in the new dimension array must be equal to +the number of elements in the input tensor. + + // Increase the rank of the input tensor by introducing a new dimension + // of size 1. + Tensor input(7, 11); + array three_dims{{7, 11, 1}}; + Tensor result = input.reshape(three_dims); + + // Decrease the rank of the input tensor by merging 2 dimensions; + array one_dim{{7 * 11}}; + Tensor result = input.reshape(one_dim); + +This operation does not move any data in the input tensor, so the resulting +contents of a reshaped Tensor depend on the data layout of the original Tensor. + +For example this is what happens when you ```reshape()``` a 2D ColMajor tensor +to one dimension: + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); + Eigen::array one_dim({3 * 2}); + Eigen::Tensor b = a.reshape(one_dim); + cout << "b" << endl << b << endl; + => + b + 0 + 300 + 100 + 400 + 200 + 500 + +This is what happens when the 2D Tensor is RowMajor: + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); + Eigen::array one_dim({3 * 2}); + Eigen::Tensor b = a.reshape(one_dim); + cout << "b" << endl << b << endl; + => + b + 0 + 100 + 200 + 300 + 400 + 500 + +The reshape operation is a lvalue. In other words, it can be used on the left +side of the assignment operator. + +The previous example can be rewritten as follow: + + Eigen::Tensor a(2, 3); + a.setValues({{0.0f, 100.0f, 200.0f}, {300.0f, 400.0f, 500.0f}}); + Eigen::array two_dim({2, 3}); + Eigen::Tensor b; + b.reshape(two_dim) = a; + cout << "b" << endl << b << endl; + => + b + 0 + 300 + 100 + 400 + 200 + 500 + +Note that "b" itself was not reshaped but that instead the assignment is done to +the reshape view of b. + + +### shuffle(const Shuffle& shuffle) + +Returns a copy of the input tensor whose dimensions have been +reordered according to the specified permutation. The argument shuffle +is an array of Index values. Its size is the rank of the input +tensor. It must contain a permutation of 0, 1, ..., rank - 1. The i-th +dimension of the output tensor equals to the size of the shuffle[i]-th +dimension of the input tensor. For example: + + // Shuffle all dimensions to the left by 1. + Tensor input(20, 30, 50); + // ... set some values in input. + Tensor output = input.shuffle({1, 2, 0}) + + eigen_assert(output.dimension(0) == 30); + eigen_assert(output.dimension(1) == 50); + eigen_assert(output.dimension(2) == 20); + +Indices into the output tensor are shuffled accordingly to formulate +indices into the input tensor. For example, one can assert in the above +code snippet that: + + eigen_assert(output(3, 7, 11) == input(11, 3, 7)); + +In general, one can assert that + + eigen_assert(output(..., indices[shuffle[i]], ...) == + input(..., indices[i], ...)) + +The shuffle operation results in a lvalue, which means that it can be assigned +to. In other words, it can be used on the left side of the assignment operator. + +Let's rewrite the previous example to take advantage of this feature: + + // Shuffle all dimensions to the left by 1. + Tensor input(20, 30, 50); + // ... set some values in input. + Tensor output(30, 50, 20); + output.shuffle({2, 0, 1}) = input; + + +### stride(const Strides& strides) + +Returns a view of the input tensor that strides (skips stride-1 +elements) along each of the dimensions. The argument strides is an +array of Index values. The dimensions of the resulting tensor are +ceil(input_dimensions[i] / strides[i]). + +For example this is what happens when you ```stride()``` a 2D tensor: + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, {600, 700, 800}, {900, 1000, 1100}}); + Eigen::array strides({3, 2}); + Eigen::Tensor b = a.stride(strides); + cout << "b" << endl << b << endl; + => + b + 0 200 + 900 1100 + +It is possible to assign a tensor to a stride: + Tensor input(20, 30, 50); + // ... set some values in input. + Tensor output(40, 90, 200); + output.stride({2, 3, 4}) = input; + + +### slice(const StartIndices& offsets, const Sizes& extents) + +Returns a sub-tensor of the given tensor. For each dimension i, the slice is +made of the coefficients stored between offset[i] and offset[i] + extents[i] in +the input tensor. + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, + {600, 700, 800}, {900, 1000, 1100}}); + Eigen::array offsets = {1, 0}; + Eigen::array extents = {2, 2}; + Eigen::Tensor slice = a.slice(offsets, extents); + cout << "a" << endl << a << endl; + => + a + 0 100 200 + 300 400 500 + 600 700 800 + 900 1000 1100 + cout << "slice" << endl << slice << endl; + => + slice + 300 400 + 600 700 + + +### chip(const Index offset, const Index dim) + +A chip is a special kind of slice. It is the subtensor at the given offset in +the dimension dim. The returned tensor has one fewer dimension than the input +tensor: the dimension dim is removed. + +For example, a matrix chip would be either a row or a column of the input +matrix. + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, + {600, 700, 800}, {900, 1000, 1100}}); + Eigen::Tensor row_3 = a.chip(2, 0); + Eigen::Tensor col_2 = a.chip(1, 1); + cout << "a" << endl << a << endl; + => + a + 0 100 200 + 300 400 500 + 600 700 800 + 900 1000 1100 + cout << "row_3" << endl << row_3 << endl; + => + row_3 + 600 700 800 + cout << "col_2" << endl << col_2 << endl; + => + col_2 + 100 400 700 1000 + +It is possible to assign values to a tensor chip since the chip operation is a +lvalue. For example: + + Eigen::Tensor a(3); + a.setValues({{100, 200, 300}}); + Eigen::Tensor b(2, 3); + b.setZero(); + b.chip(0, 0) = a; + cout << "a" << endl << a << endl; + => + a + 100 + 200 + 300 + cout << "b" << endl << b << endl; + => + b + 100 200 300 + 0 0 0 + + +### reverse(const ReverseDimensions& reverse) + +Returns a view of the input tensor that reverses the order of the coefficients +along a subset of the dimensions. The argument reverse is an array of boolean +values that indicates whether or not the order of the coefficients should be +reversed along each of the dimensions. This operation preserves the dimensions +of the input tensor. + +For example this is what happens when you ```reverse()``` the first dimension +of a 2D tensor: + + Eigen::Tensor a(4, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}, + {600, 700, 800}, {900, 1000, 1100}}); + Eigen::array reverse({true, false}); + Eigen::Tensor b = a.reverse(reverse); + cout << "a" << endl << a << endl << "b" << endl << b << endl; + => + a + 0 100 200 + 300 400 500 + 600 700 800 + 900 1000 1100 + b + 900 1000 1100 + 600 700 800 + 300 400 500 + 0 100 200 + + +### broadcast(const Broadcast& broadcast) + +Returns a view of the input tensor in which the input is replicated one to many +times. +The broadcast argument specifies how many copies of the input tensor need to be +made in each of the dimensions. + + Eigen::Tensor a(2, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}}); + Eigen::array bcast({3, 2}); + Eigen::Tensor b = a.broadcast(bcast); + cout << "a" << endl << a << endl << "b" << endl << b << endl; + => + a + 0 100 200 + 300 400 500 + b + 0 100 200 0 100 200 + 300 400 500 300 400 500 + 0 100 200 0 100 200 + 300 400 500 300 400 500 + 0 100 200 0 100 200 + 300 400 500 300 400 500 + +### concatenate(const OtherDerived& other, Axis axis) + +TODO + +### pad(const PaddingDimensions& padding) + +Returns a view of the input tensor in which the input is padded with zeros. + + Eigen::Tensor a(2, 3); + a.setValues({{0, 100, 200}, {300, 400, 500}}); + Eigen::array, 2> paddings; + paddings[0] = make_pair(0, 1); + paddings[1] = make_pair(2, 3); + Eigen::Tensor b = a.pad(paddings); + cout << "a" << endl << a << endl << "b" << endl << b << endl; + => + a + 0 100 200 + 300 400 500 + b + 0 0 0 0 + 0 0 0 0 + 0 100 200 0 + 300 400 500 0 + 0 0 0 0 + 0 0 0 0 + 0 0 0 0 + + +### extract_patches(const PatchDims& patch_dims) + +Returns a tensor of coefficient patches extracted from the input tensor, where +each patch is of dimension specified by 'patch_dims'. The returned tensor has +one greater dimension than the input tensor, which is used to index each patch. +The patch index in the output tensor depends on the data layout of the input +tensor: the patch index is the last dimension ColMajor layout, and the first +dimension in RowMajor layout. + +For example, given the following input tensor: + + Eigen::Tensor tensor(3,4); + tensor.setValues({{0.0f, 1.0f, 2.0f, 3.0f}, + {4.0f, 5.0f, 6.0f, 7.0f}, + {8.0f, 9.0f, 10.0f, 11.0f}}); + + cout << "tensor: " << endl << tensor << endl; +=> +tensor: + 0 1 2 3 + 4 5 6 7 + 8 9 10 11 + +Six 2x2 patches can be extracted and indexed using the following code: + + Eigen::Tensor patch; + Eigen::array patch_dims; + patch_dims[0] = 2; + patch_dims[1] = 2; + patch = tensor.extract_patches(patch_dims); + for (int k = 0; k < 6; ++k) { + cout << "patch index: " << k << endl; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + if (DataLayout == ColMajor) { + cout << patch(i, j, k) << " "; + } else { + cout << patch(k, i, j) << " "; + } + } + cout << endl; + } + } + +This code results in the following output when the data layout is ColMajor: + +patch index: 0 +0 1 +4 5 +patch index: 1 +4 5 +8 9 +patch index: 2 +1 2 +5 6 +patch index: 3 +5 6 +9 10 +patch index: 4 +2 3 +6 7 +patch index: 5 +6 7 +10 11 + +This code results in the following output when the data layout is RowMajor: +(NOTE: the set of patches is the same as in ColMajor, but are indexed differently). + +patch index: 0 +0 1 +4 5 +patch index: 1 +1 2 +5 6 +patch index: 2 +2 3 +6 7 +patch index: 3 +4 5 +8 9 +patch index: 4 +5 6 +9 10 +patch index: 5 +6 7 +10 11 + +### extract_image_patches(const Index patch_rows, const Index patch_cols, + const Index row_stride, const Index col_stride, + const PaddingType padding_type) + +Returns a tensor of coefficient image patches extracted from the input tensor, +which is expected to have dimensions ordered as follows (depending on the data +layout of the input tensor, and the number of additional dimensions 'N'): + +*) ColMajor +1st dimension: channels (of size d) +2nd dimension: rows (of size r) +3rd dimension: columns (of size c) +4th-Nth dimension: time (for video) or batch (for bulk processing). + +*) RowMajor (reverse order of ColMajor) +1st-Nth dimension: time (for video) or batch (for bulk processing). +N+1'th dimension: columns (of size c) +N+2'th dimension: rows (of size r) +N+3'th dimension: channels (of size d) + +The returned tensor has one greater dimension than the input tensor, which is +used to index each patch. The patch index in the output tensor depends on the +data layout of the input tensor: the patch index is the 4'th dimension in +ColMajor layout, and the 4'th from the last dimension in RowMajor layout. + +For example, given the following input tensor with the following dimension +sizes: + *) depth: 2 + *) rows: 3 + *) columns: 5 + *) batch: 7 + + Tensor tensor(2,3,5,7); + Tensor tensor_row_major = tensor.swap_layout(); + +2x2 image patches can be extracted and indexed using the following code: + +*) 2D patch: ColMajor (patch indexed by second-to-last dimension) + Tensor twod_patch; + twod_patch = tensor.extract_image_patches<2, 2>(); + // twod_patch.dimension(0) == 2 + // twod_patch.dimension(1) == 2 + // twod_patch.dimension(2) == 2 + // twod_patch.dimension(3) == 3*5 + // twod_patch.dimension(4) == 7 + +*) 2D patch: RowMajor (patch indexed by the second dimension) + Tensor twod_patch_row_major; + twod_patch_row_major = tensor_row_major.extract_image_patches<2, 2>(); + // twod_patch_row_major.dimension(0) == 7 + // twod_patch_row_major.dimension(1) == 3*5 + // twod_patch_row_major.dimension(2) == 2 + // twod_patch_row_major.dimension(3) == 2 + // twod_patch_row_major.dimension(4) == 2 + +## Special Operations + +### cast() + +Returns a tensor of type T with the same dimensions as the original tensor. +The returned tensor contains the values of the original tensor converted to +type T. + + Eigen::Tensor a(2, 3); + Eigen::Tensor b = a.cast(); + +This can be useful for example if you need to do element-wise division of +Tensors of integers. This is not currently supported by the Tensor library +but you can easily cast the tensors to floats to do the division: + + Eigen::Tensor a(2, 3); + a.setValues({{0, 1, 2}, {3, 4, 5}}); + Eigen::Tensor b = + (a.cast() / a.constant(2).cast()).cast(); + cout << "a" << endl << a << endl << endl; + cout << "b" << endl << b << endl << endl; + => + a + 0 1 2 + 3 4 5 + + b + 0 0 1 + 1 2 2 + + +### eval() + +TODO + + +## Representation of scalar values + +Scalar values are often represented by tensors of size 1 and rank 1. It would be +more logical and user friendly to use tensors of rank 0 instead. For example +Tensor::maximum() currently returns a Tensor. Similarly, the inner +product of 2 1d tensors (through contractions) returns a 1d tensor. In the +future these operations might be updated to return 0d tensors instead. + +## Limitations + +* The number of tensor dimensions is currently limited to 250 when using a + compiler that supports cxx11. It is limited to only 5 for older compilers. +* The IndexList class requires a cxx11 compliant compiler. You can use an + array of indices instead if you don't have access to a modern compiler. +* On GPUs only floating point values are properly tested and optimized for. +* Complex and integer values are known to be broken on GPUs. If you try to use + them you'll most likely end up triggering a static assertion failure such as + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + + diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h new file mode 100644 index 0000000..1940a96 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/Tensor.h @@ -0,0 +1,527 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2013 Christian Seiler +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_H + +namespace Eigen { + +/** \class Tensor + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor class. + * + * The %Tensor class is the work-horse for all \em dense tensors within Eigen. + * + * The %Tensor class encompasses only dynamic-size objects so far. + * + * The first two template parameters are required: + * \tparam Scalar_ \anchor tensor_tparam_scalar Numeric type, e.g. float, double, int or std::complex. + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam NumIndices_ Number of indices (i.e. rank of the tensor) + * + * The remaining template parameters are optional -- in most cases you don't have to worry about them. + * \tparam Options_ \anchor tensor_tparam_options A combination of either \b #RowMajor or \b #ColMajor, and of either + * \b #AutoAlign or \b #DontAlign. + * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter controls alignment, which is required + * for vectorization. It defaults to aligning tensors. Note that tensors currently do not support any operations that profit from vectorization. + * Support for such operations (i.e. adding two tensors etc.) is planned. + * + * You can access elements of tensors using normal subscripting: + * + * \code + * Eigen::Tensor t(10, 10, 10, 10); + * t(0, 1, 2, 3) = 42.0; + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizingEigen by defining the preprocessor symbol \c EIGEN_TENSOR_PLUGIN. + * + * Some notes: + * + *

+ *
Relation to other parts of Eigen:
+ *
The midterm developement goal for this class is to have a similar hierarchy as Eigen uses for matrices, so that + * taking blocks or using tensors in expressions is easily possible, including an interface with the vector/matrix code + * by providing .asMatrix() and .asVector() (or similar) methods for rank 2 and 1 tensors. However, currently, the %Tensor + * class does not provide any of these features and is only available as a stand-alone class that just allows for + * coefficient access. Also, when fixed-size tensors are implemented, the number of template arguments is likely to + * change dramatically.
+ *
+ * + * \ref TopicStorageOrders + */ + +template +class Tensor : public TensorBase > +{ + public: + typedef Tensor Self; + typedef TensorBase > Base; + typedef typename Eigen::internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef Scalar_ Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename Base::CoeffReturnType CoeffReturnType; + + enum { + IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) & !(Options_&DontAlign), + Layout = Options_ & RowMajor ? RowMajor : ColMajor, + CoordAccess = true, + RawAccess = true + }; + + static const int Options = Options_; + static const int NumIndices = NumIndices_; + typedef DSizes Dimensions; + + protected: + TensorStorage m_storage; + +#ifdef EIGEN_HAS_SFINAE + template + struct isOfNormalIndex{ + static const bool is_array = internal::is_base_of, CustomIndices>::value; + static const bool is_int = NumTraits::IsInteger; + static const bool value = is_array | is_int; + }; +#endif + + public: + // Metadata + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } + + // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + // work, because that uses base().coeffRef() - and we don't yet + // implement a similar class hierarchy + inline Self& base() { return *this; } + inline const Self& base() const { return *this; } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeff(array{{firstIndex, secondIndex, otherIndices...}}); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(const array& indices) const + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(CustomIndices& indices) const + { + return coeff(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + inline Scalar& coeffRef(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(array{{firstIndex, secondIndex, otherIndices...}}); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array& indices) + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(CustomIndices& indices) + { + return coeffRef(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + inline const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return this->operator()(array{{firstIndex, secondIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + return coeff(array(i0, i1)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + return coeff(array(i0, i1, i2)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + return coeff(array(i0, i1, i2, i3)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + return coeff(array(i0, i1, i2, i3, i4)); + } +#endif + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(CustomIndices& indices) const + { + return coeff(internal::customIndices2Array(indices)); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const + { + return coeff(indices); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return coeff(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const + { + // The bracket operator is only for vectors, use the parenthesis operator instead. + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(index); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + inline Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return operator()(array{{firstIndex, secondIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + return coeffRef(array(i0, i1)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + return coeffRef(array(i0, i1, i2)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + return coeffRef(array(i0, i1, i2, i3)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + return coeffRef(array(i0, i1, i2, i3, i4)); + } +#endif + + // normal indices + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(const array& indices) + { + return coeffRef(indices); + } + + // custom indices +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(CustomIndices& indices) + { + return coeffRef(internal::customIndices2Array(indices)); + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index index) + { + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeffRef(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator[](Index index) + { + // The bracket operator is only for vectors, use the parenthesis operator instead + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor() + : m_storage() + { + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(const Self& other) + : m_storage(other.m_storage) + { + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index firstDimension, IndexTypes... otherDimensions) + : m_storage(firstDimension, otherDimensions...) + { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(Index dim1) + : m_storage(dim1, array(dim1)) + { + EIGEN_STATIC_ASSERT(1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2) + : m_storage(dim1*dim2, array(dim1, dim2)) + { + EIGEN_STATIC_ASSERT(2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3) + : m_storage(dim1*dim2*dim3, array(dim1, dim2, dim3)) + { + EIGEN_STATIC_ASSERT(3 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4) + : m_storage(dim1*dim2*dim3*dim4, array(dim1, dim2, dim3, dim4)) + { + EIGEN_STATIC_ASSERT(4 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Tensor(Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) + : m_storage(dim1*dim2*dim3*dim4*dim5, array(dim1, dim2, dim3, dim4, dim5)) + { + EIGEN_STATIC_ASSERT(5 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#endif + + /** Normal Dimension */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Tensor(const array& dimensions) + : m_storage(internal::array_prod(dimensions), dimensions) + { + EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor& operator=(const Tensor& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Tensor& operator=(const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + resize(TensorEvaluator(assign, DefaultDevice()).dimensions()); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + void resize(Index firstDimension, IndexTypes... otherDimensions) + { + // The number of dimensions used to resize a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + resize(array{{firstDimension, otherDimensions...}}); + } +#endif + + /** Normal Dimension */ + EIGEN_DEVICE_FUNC void resize(const array& dimensions) + { + int i; + Index size = Index(1); + for (i = 0; i < NumIndices; i++) { + internal::check_rows_cols_for_overflow::run(size, dimensions[i]); + size *= dimensions[i]; + } + #ifdef EIGEN_INITIALIZE_COEFFS + bool size_changed = size != this->size(); + m_storage.resize(size, dimensions); + if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + #else + m_storage.resize(size, dimensions); + #endif + } + + // Why this overload, DSizes is derived from array ??? // + EIGEN_DEVICE_FUNC void resize(const DSizes& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = dimensions[i]; + } + resize(dims); + } + + EIGEN_DEVICE_FUNC + void resize() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + // Nothing to do: rank 0 tensors have fixed size + } + + /** Custom Dimension */ +#ifdef EIGEN_HAS_SFINAE + template::value) ) + > + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(CustomDimension& dimensions) + { + resize(internal::customIndices2Array(dimensions)); + } +#endif + +#ifndef EIGEN_EMULATE_CXX11_META_H + template + EIGEN_DEVICE_FUNC + void resize(const Sizes& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = static_cast(dimensions[i]); + } + resize(dims); + } +#else + template + EIGEN_DEVICE_FUNC + void resize(const Sizes& dimensions) { + array dims; + for (int i = 0; i < NumIndices; ++i) { + dims[i] = static_cast(dimensions[i]); + } + resize(dims); + } +#endif + + protected: + + bool checkIndexRange(const array& indices) const + { + using internal::array_apply_and_reduce; + using internal::array_zip_and_reduce; + using internal::greater_equal_zero_op; + using internal::logical_and_op; + using internal::lesser_op; + + return + // check whether the indices are all >= 0 + array_apply_and_reduce(indices) && + // check whether the indices fit in the dimensions + array_zip_and_reduce(indices, m_storage.dimensions()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index linearizedIndex(const array& indices) const + { + if (Options&RowMajor) { + return m_storage.dimensions().IndexOfRowMajor(indices); + } else { + return m_storage.dimensions().IndexOfColMajor(indices); + } + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h new file mode 100644 index 0000000..d06f40c --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorArgMax.h @@ -0,0 +1,299 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo +// Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H +#define EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H + +namespace Eigen { +namespace internal { + +/** \class TensorIndexTuple + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor + Index Tuple class. + * + * + */ +template +struct traits > : public traits +{ + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Tuple Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorIndexTupleOp& type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorIndexTupleOp type; +}; + +} // end namespace internal + +template +class TensorIndexTupleOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + typedef Tuple CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIndexTupleOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorIndexTupleOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size::value; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = /*TensorEvaluator::PacketAccess*/ false, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return CoeffReturnType(index, m_impl.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, 1); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + TensorEvaluator m_impl; +}; + +namespace internal { + +/** \class TensorTupleIndex + * \ingroup CXX11_Tensor_Module + * + * \brief Converts to Tensor > and reduces to Tensor. + * + */ +template +struct traits > : public traits +{ + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef Index Scalar; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions - array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorTupleReducerOp& type; +}; + +template +struct nested, 1, + typename eval >::type> +{ + typedef TensorTupleReducerOp type; +}; + +} // end namespace internal + +template +class TensorTupleReducerOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + typedef Index CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorTupleReducerOp(const XprType& expr, + const ReduceOp& reduce_op, + const int return_dim, + const Dims& reduce_dims) + : m_xpr(expr), m_reduce_op(reduce_op), m_return_dim(return_dim), m_reduce_dims(reduce_dims) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + const ReduceOp& reduce_op() const { return m_reduce_op; } + + EIGEN_DEVICE_FUNC + const Dims& reduce_dims() const { return m_reduce_dims; } + + EIGEN_DEVICE_FUNC + int return_dim() const { return m_return_dim; } + + protected: + typename XprType::Nested m_xpr; + const ReduceOp m_reduce_op; + const int m_return_dim; + const Dims m_reduce_dims; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorTupleReducerOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename TensorIndexTupleOp::CoeffReturnType TupleType; + typedef typename TensorEvaluator >, Device>::Dimensions Dimensions; + typedef typename TensorEvaluator , Device>::Dimensions InputDimensions; + static const int NumDims = internal::array_size::value; + typedef array StrideDims; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = /*TensorEvaluator::PacketAccess*/ false, + BlockAccess = false, + Layout = TensorEvaluator >, Device>::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_orig_impl(op.expression(), device), + m_impl(op.expression().index_tuples().reduce(op.reduce_dims(), op.reduce_op()), device), + m_return_dim(op.return_dim()) { + + gen_strides(m_orig_impl.dimensions(), m_strides); + if (Layout == static_cast(ColMajor)) { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim < NumDims - 1) ? m_strides[m_return_dim + 1] : total_size; + } else { + const Index total_size = internal::array_prod(m_orig_impl.dimensions()); + m_stride_mod = (m_return_dim > 0) ? m_strides[m_return_dim - 1] : total_size; + } + m_stride_div = m_strides[m_return_dim]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + const TupleType v = m_impl.coeff(index); + return (m_return_dim < 0) ? v.first : (v.first % m_stride_mod) / m_stride_div; + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double compute_cost = 1.0 + + (m_return_dim < 0 ? 0.0 : (TensorOpCost::ModCost() + TensorOpCost::DivCost())); + return m_orig_impl.costPerCoeff(vectorized) + + m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, compute_cost); + } + + private: + EIGEN_DEVICE_FUNC void gen_strides(const InputDimensions& dims, StrideDims& strides) { + if (m_return_dim < 0) { + return; // Won't be using the strides. + } + eigen_assert(m_return_dim < NumDims && + "Asking to convert index to a dimension outside of the rank"); + + // Calculate m_stride_div and m_stride_mod, which are used to + // calculate the value of an index w.r.t. the m_return_dim. + if (Layout == static_cast(ColMajor)) { + strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + strides[i] = strides[i-1] * dims[i-1]; + } + } else { + strides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + strides[i] = strides[i+1] * dims[i+1]; + } + } + } + + protected: + TensorEvaluator, Device> m_orig_impl; + TensorEvaluator >, Device> m_impl; + const int m_return_dim; + StrideDims m_strides; + Index m_stride_mod; + Index m_stride_div; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_ARG_MAX_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h new file mode 100644 index 0000000..166be20 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -0,0 +1,181 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H +#define EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H + +namespace Eigen { + +/** \class TensorAssign + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor assignment class. + * + * This class is represents the assignment of the values resulting from the evaluation of + * the rhs expression to the memory locations denoted by the lhs expression. + */ +namespace internal { +template +struct traits > +{ + typedef typename LhsXprType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const std::size_t NumDimensions = internal::traits::NumDimensions; + static const int Layout = internal::traits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorAssignOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorAssignOp type; +}; + +} // end namespace internal + + + +template +class TensorAssignOp : public TensorBase > +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename LhsXprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorAssignOp(LhsXprType& lhs, const RhsXprType& rhs) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs) {} + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + typename internal::remove_all::type& + lhsExpression() const { return *((typename internal::remove_all::type*)&m_lhs_xpr); } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename internal::remove_all::type& m_lhs_xpr; + const typename internal::remove_all::type& m_rhs_xpr; +}; + + +template +struct TensorEvaluator, Device> +{ + typedef TensorAssignOp XprType; + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename TensorEvaluator::Dimensions Dimensions; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + RawAccess = TensorEvaluator::RawAccess + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : + m_leftImpl(op.lhsExpression(), device), + m_rightImpl(op.rhsExpression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + } + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // The dimensions of the lhs and the rhs tensors should be equal to prevent + // overflows and ensure the result is fully initialized. + // TODO: use left impl instead if right impl dimensions are known at compile time. + return m_rightImpl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); + m_leftImpl.evalSubExprsIfNeeded(NULL); + // If the lhs provides raw access to its storage area (i.e. if m_leftImpl.data() returns a non + // null value), attempt to evaluate the rhs expression in place. Returns true iff in place + // evaluation isn't supported and the caller still needs to manually assign the values generated + // by the rhs to the lhs. + return m_rightImpl.evalSubExprsIfNeeded(m_leftImpl.data()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { + m_leftImpl.coeffRef(i) = m_rightImpl.coeff(i); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { + const int LhsStoreMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + const int RhsLoadMode = TensorEvaluator::IsAligned ? Aligned : Unaligned; + m_leftImpl.template writePacket(i, m_rightImpl.template packet(i)); + } + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_leftImpl.coeff(index); + } + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + return m_leftImpl.template packet(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // We assume that evalPacket or evalScalar is called to perform the + // assignment and account for the cost of the write here, but reduce left + // cost by one load because we are using m_leftImpl.coeffRef. + TensorOpCost left = m_leftImpl.costPerCoeff(vectorized); + return m_rightImpl.costPerCoeff(vectorized) + + TensorOpCost( + numext::maxi(0.0, left.bytes_loaded() - sizeof(CoeffReturnType)), + left.bytes_stored(), left.compute_cycles()) + + TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); + } + + /// required by sycl in order to extract the accessor + const TensorEvaluator& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& right_impl() const { return m_rightImpl; } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); } + + private: + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; +}; + +} + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_ASSIGN_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h new file mode 100644 index 0000000..7a45a5c --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -0,0 +1,1010 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_BASE_H +#define EIGEN_CXX11_TENSOR_TENSOR_BASE_H + +// clang-format off + +namespace Eigen { + +/** \class TensorBase + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor base class. + * + * This class is the common parent of the Tensor and TensorMap class, thus + * making it possible to use either class interchangably in expressions. + */ + +template +class TensorBase +{ + public: + typedef internal::traits DerivedTraits; + typedef typename DerivedTraits::Scalar Scalar; + typedef typename DerivedTraits::Index Index; + typedef typename internal::remove_const::type CoeffReturnType; + static const int NumDimensions = DerivedTraits::NumDimensions; + + // Generic nullary operation support. + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp + nullaryExpr(const CustomNullaryOp& func) const { + return TensorCwiseNullaryOp(derived(), func); + } + + // Coefficient-wise nullary operators + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp, const Derived> + constant(const Scalar& value) const { + return nullaryExpr(internal::scalar_constant_op(value)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp, const Derived> + random() const { + return nullaryExpr(internal::UniformRandomGenerator()); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseNullaryOp + random(const RandomGenerator& gen = RandomGenerator()) const { + return nullaryExpr(gen); + } + + // Tensor generation + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorGeneratorOp + generate(const Generator& generator) const { + return TensorGeneratorOp(derived(), generator); + } + + // Generic unary operation support. + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp + unaryExpr(const CustomUnaryOp& func) const { + return TensorCwiseUnaryOp(derived(), func); + } + + // Coefficient-wise unary operators + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + operator-() const { + return unaryExpr(internal::scalar_opposite_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sqrt() const { + return unaryExpr(internal::scalar_sqrt_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sign() const { + return unaryExpr(internal::scalar_sign_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + rsqrt() const { + return unaryExpr(internal::scalar_rsqrt_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + square() const { + return unaryExpr(internal::scalar_square_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + cube() const { + return unaryExpr(internal::scalar_cube_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + inverse() const { + return unaryExpr(internal::scalar_inverse_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + tanh() const { + return unaryExpr(internal::scalar_tanh_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + lgamma() const { + return unaryExpr(internal::scalar_lgamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + digamma() const { + return unaryExpr(internal::scalar_digamma_op()); + } + + // igamma(a = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igamma(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igamma_op()); + } + + // igammac(a = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + igammac(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_igammac_op()); + } + + // zeta(x = this, q = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + zeta(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_zeta_op()); + } + + // polygamma(n = this, x = other) + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + polygamma(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_polygamma_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erf() const { + return unaryExpr(internal::scalar_erf_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + erfc() const { + return unaryExpr(internal::scalar_erfc_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + sigmoid() const { + return unaryExpr(internal::scalar_sigmoid_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + exp() const { + return unaryExpr(internal::scalar_exp_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + log() const { + return unaryExpr(internal::scalar_log_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + log1p() const { + return unaryExpr(internal::scalar_log1p_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + abs() const { + return unaryExpr(internal::scalar_abs_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + conjugate() const { + return unaryExpr(internal::scalar_conjugate_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + pow(Scalar exponent) const { + return unaryExpr(internal::bind2nd_op >(exponent)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + real() const { + return unaryExpr(internal::scalar_real_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + imag() const { + return unaryExpr(internal::scalar_imag_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator+ (Scalar rhs) const { + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator+ (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator- (Scalar rhs) const { + EIGEN_STATIC_ASSERT((NumTraits::IsSigned || internal::is_same >::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator- (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator* (Scalar rhs) const { + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator* (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> + operator/ (Scalar rhs) const { + return unaryExpr(internal::bind2nd_op >(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE friend + const TensorCwiseUnaryOp >, const Derived> + operator/ (Scalar lhs, const Derived& rhs) { + return rhs.unaryExpr(internal::bind1st_op >(lhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + operator% (Scalar rhs) const { + EIGEN_STATIC_ASSERT(NumTraits::IsInteger, YOU_MADE_A_PROGRAMMING_MISTAKE_TRY_MOD); + return unaryExpr(internal::scalar_mod_op(rhs)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + cwiseMax(Scalar threshold) const { + return cwiseMax(constant(threshold)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + cwiseMin(Scalar threshold) const { + return cwiseMin(constant(threshold)); + } + + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorConversionOp + cast() const { + return TensorConversionOp(derived()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + round() const { + return unaryExpr(internal::scalar_round_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + ceil() const { + return unaryExpr(internal::scalar_ceil_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + floor() const { + return unaryExpr(internal::scalar_floor_op()); + } + + // Generic binary operation support. + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp + binaryExpr(const OtherDerived& other, const CustomBinaryOp& func) const { + return TensorCwiseBinaryOp(derived(), other, func); + } + + // Coefficient-wise binary operators. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator+(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_sum_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator-(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_difference_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator*(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_product_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator/(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_quotient_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + cwiseMax(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_max_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + cwiseMin(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_min_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp + operator&&(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_boolean_and_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp + operator||(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_boolean_or_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp + operator^(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_boolean_xor_op()); + } + + // Comparisons and tests. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator<(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator<=(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator>(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator>=(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator==(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCwiseBinaryOp, const Derived, const OtherDerived> + operator!=(const OtherDerived& other) const { + return binaryExpr(other.derived(), internal::scalar_cmp_op()); + } + + // comparisons and tests for Scalars + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<(Scalar threshold) const { + return operator<(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator<=(Scalar threshold) const { + return operator<=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>(Scalar threshold) const { + return operator>(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator>=(Scalar threshold) const { + return operator>=(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator==(Scalar threshold) const { + return operator==(constant(threshold)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseBinaryOp, const Derived, const TensorCwiseNullaryOp, const Derived> > + operator!=(Scalar threshold) const { + return operator!=(constant(threshold)); + } + + // Checks + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + (isnan)() const { + return unaryExpr(internal::scalar_isnan_op()); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + (isinf)() const { + return unaryExpr(internal::scalar_isinf_op()); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + (isfinite)() const { + return unaryExpr(internal::scalar_isfinite_op()); + } + + // Coefficient-wise ternary operators. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSelectOp + select(const ThenDerived& thenTensor, const ElseDerived& elseTensor) const { + return TensorSelectOp(derived(), thenTensor.derived(), elseTensor.derived()); + } + + // Contractions. + typedef Eigen::IndexPair DimensionPair; + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorContractionOp + contract(const OtherDerived& other, const Dimensions& dims) const { + return TensorContractionOp(derived(), other.derived(), dims); + } + + // Convolutions. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConvolutionOp + convolve(const KernelDerived& kernel, const Dimensions& dims) const { + return TensorConvolutionOp(derived(), kernel.derived(), dims); + } + + // Fourier transforms + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorFFTOp + fft(const FFT& fft) const { + return TensorFFTOp(derived(), fft); + } + + // Scan. + typedef TensorScanOp, const Derived> TensorScanSumOp; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanSumOp + cumsum(const Index& axis, bool exclusive = false) const { + return TensorScanSumOp(derived(), axis, exclusive); + } + + typedef TensorScanOp, const Derived> TensorScanProdOp; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanProdOp + cumprod(const Index& axis, bool exclusive = false) const { + return TensorScanProdOp(derived(), axis, exclusive); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorScanOp + scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const { + return TensorScanOp(derived(), axis, exclusive, reducer); + } + + // Reductions. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + sum(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::SumReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + sum() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::SumReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + mean(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MeanReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + mean() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MeanReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + prod(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::ProdReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + prod() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::ProdReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + maximum(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MaxReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + maximum() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MaxReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const Dims, const Derived> + minimum(const Dims& dims) const { + return TensorReductionOp, const Dims, const Derived>(derived(), dims, internal::MinReducer()); + } + + const TensorReductionOp, const DimensionList, const Derived> + minimum() const { + DimensionList in_dims; + return TensorReductionOp, const DimensionList, const Derived>(derived(), in_dims, internal::MinReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp > + all(const Dims& dims) const { + return cast().reduce(dims, internal::AndReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const TensorConversionOp > + all() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::AndReducer()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp > + any(const Dims& dims) const { + return cast().reduce(dims, internal::OrReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp, const TensorConversionOp > + any() const { + DimensionList in_dims; + return cast().reduce(in_dims, internal::OrReducer()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, const Derived> + argmax() const { + array in_dims; + for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMaxTupleReducer >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, const Derived> + argmin() const { + array in_dims; + for (int d = 0; d < NumDimensions; ++d) in_dims[d] = d; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMinTupleReducer >(), -1, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, const Derived> + argmax(const int return_dim) const { + array in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMaxTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMaxTupleReducer >(), return_dim, in_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, const Derived> + argmin(const int return_dim) const { + array in_dims; + in_dims[0] = return_dim; + return TensorTupleReducerOp< + internal::ArgMinTupleReducer >, + const array, + const Derived>(derived(), internal::ArgMinTupleReducer >(), return_dim, in_dims); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReductionOp + reduce(const Dims& dims, const Reducer& reducer) const { + return TensorReductionOp(derived(), dims, reducer); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorBroadcastingOp + broadcast(const Broadcast& broadcast) const { + return TensorBroadcastingOp(derived(), broadcast); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConcatenationOp + concatenate(const OtherDerived& other, Axis axis) const { + return TensorConcatenationOp(derived(), other.derived(), axis); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorPatchOp + extract_patches(const PatchDims& patch_dims) const { + return TensorPatchOp(derived(), patch_dims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorImagePatchOp + extract_image_patches(const Index patch_rows = 1, const Index patch_cols = 1, + const Index row_stride = 1, const Index col_stride = 1, + const Index in_row_stride = 1, const Index in_col_stride = 1, + const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const { + return TensorImagePatchOp(derived(), patch_rows, patch_cols, row_stride, col_stride, + in_row_stride, in_col_stride, 1, 1, padding_type, padding_value); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorImagePatchOp + extract_image_patches(const Index patch_rows, const Index patch_cols, + const Index row_stride, const Index col_stride, + const Index in_row_stride, const Index in_col_stride, + const Index row_inflate_stride, const Index col_inflate_stride, + const Index padding_top, const Index padding_bottom, + const Index padding_left,const Index padding_right, + const Scalar padding_value) const { + return TensorImagePatchOp(derived(), patch_rows, patch_cols, row_stride, col_stride, + in_row_stride, in_col_stride, row_inflate_stride, col_inflate_stride, + padding_top, padding_bottom, padding_left, padding_right, padding_value); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorVolumePatchOp + extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, + const Index plane_stride = 1, const Index row_stride = 1, const Index col_stride = 1, + const PaddingType padding_type = PADDING_SAME, const Scalar padding_value = Scalar(0)) const { + return TensorVolumePatchOp(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, 1, 1, 1, padding_type, padding_value); + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorVolumePatchOp + extract_volume_patches(const Index patch_planes, const Index patch_rows, const Index patch_cols, + const Index plane_stride, const Index row_stride, const Index col_stride, + const Index plane_inflate_stride, const Index row_inflate_stride, const Index col_inflate_stride, + const Index padding_top_z, const Index padding_bottom_z, + const Index padding_top, const Index padding_bottom, + const Index padding_left, const Index padding_right, const Scalar padding_value = Scalar(0)) const { + return TensorVolumePatchOp(derived(), patch_planes, patch_rows, patch_cols, plane_stride, row_stride, col_stride, 1, 1, 1, plane_inflate_stride, row_inflate_stride, col_inflate_stride, padding_top_z, padding_bottom_z, padding_top, padding_bottom, padding_left, padding_right, padding_value); + } + + // Morphing operators. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorLayoutSwapOp + swap_layout() const { + return TensorLayoutSwapOp(derived()); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReshapingOp + reshape(const NewDimensions& newDimensions) const { + return TensorReshapingOp(derived(), newDimensions); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) const { + return TensorSlicingOp(derived(), startIndices, sizes); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset) const { + return TensorChippingOp(derived(), offset, DimId); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset, const Index dim) const { + return TensorChippingOp(derived(), offset, dim); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReverseOp + reverse(const ReverseDimensions& rev) const { + return TensorReverseOp(derived(), rev); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorPaddingOp + pad(const PaddingDimensions& padding) const { + return TensorPaddingOp(derived(), padding, internal::scalar_cast_op()(0)); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorPaddingOp + pad(const PaddingDimensions& padding, const Scalar padding_value) const { + return TensorPaddingOp(derived(), padding, padding_value); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorShufflingOp + shuffle(const Shuffle& shuffle) const { + return TensorShufflingOp(derived(), shuffle); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingOp + stride(const Strides& strides) const { + return TensorStridingOp(derived(), strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorInflationOp + inflate(const Strides& strides) const { + return TensorInflationOp(derived(), strides); + } + + // Returns a tensor containing index/value tuples + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorIndexTupleOp + index_tuples() const { + return TensorIndexTupleOp(derived()); + } + + // Support for custom unary and binary operations + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCustomUnaryOp customOp(const CustomUnaryFunc& op) const { + return TensorCustomUnaryOp(derived(), op); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorCustomBinaryOp customOp(const OtherDerived& other, const CustomBinaryFunc& op) const { + return TensorCustomBinaryOp(derived(), other, op); + } + + // Force the evaluation of the expression. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorForcedEvalOp eval() const { + return TensorForcedEvalOp(derived()); + } + + protected: + template friend class Tensor; + template friend class TensorFixedSize; + template friend class TensorBase; + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } +}; + +template::value> +class TensorBase : public TensorBase { + public: + typedef internal::traits DerivedTraits; + typedef typename DerivedTraits::Scalar Scalar; + typedef typename DerivedTraits::Index Index; + typedef Scalar CoeffReturnType; + static const int NumDimensions = DerivedTraits::NumDimensions; + + template friend class Tensor; + template friend class TensorFixedSize; + template friend class TensorBase; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setZero() { + return setConstant(Scalar(0)); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setConstant(const Scalar& val) { + return derived() = this->constant(val); + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setRandom() { + return derived() = this->random(); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setRandom() { + return derived() = this->template random(); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& setValues( + const typename internal::Initializer::InitList& vals) { + TensorEvaluator eval(derived(), DefaultDevice()); + internal::initialize_tensor(eval, vals); + return derived(); + } +#endif // EIGEN_HAS_VARIADIC_TEMPLATES + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator+=(const OtherDerived& other) { + return derived() = derived() + other.derived(); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator-=(const OtherDerived& other) { + return derived() = derived() - other.derived(); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator*=(const OtherDerived& other) { + return derived() = derived() * other.derived(); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Derived& operator/=(const OtherDerived& other) { + return derived() = derived() / other.derived(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorLayoutSwapOp + swap_layout() const { + return TensorLayoutSwapOp(derived()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorLayoutSwapOp + swap_layout() { + return TensorLayoutSwapOp(derived()); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorConcatenationOp + concatenate(const OtherDerived& other, const Axis& axis) const { + return TensorConcatenationOp(derived(), other, axis); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorConcatenationOp + concatenate(const OtherDerived& other, const Axis& axis) { + return TensorConcatenationOp(derived(), other, axis); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReshapingOp + reshape(const NewDimensions& newDimensions) const { + return TensorReshapingOp(derived(), newDimensions); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReshapingOp + reshape(const NewDimensions& newDimensions) { + return TensorReshapingOp(derived(), newDimensions); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) const { + return TensorSlicingOp(derived(), startIndices, sizes); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorSlicingOp + slice(const StartIndices& startIndices, const Sizes& sizes) { + return TensorSlicingOp(derived(), startIndices, sizes); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) const { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorStridingSlicingOp + stridedSlice(const StartIndices& startIndices, const StopIndices& stopIndices, const Strides& strides) { + return TensorStridingSlicingOp(derived(), startIndices, stopIndices, strides); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset) const { + return TensorChippingOp(derived(), offset, DimId); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorChippingOp + chip(const Index offset) { + return TensorChippingOp(derived(), offset, DimId); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorChippingOp + chip(const Index offset, const Index dim) const { + return TensorChippingOp(derived(), offset, dim); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorChippingOp + chip(const Index offset, const Index dim) { + return TensorChippingOp(derived(), offset, dim); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorReverseOp + reverse(const ReverseDimensions& rev) const { + return TensorReverseOp(derived(), rev); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReverseOp + reverse(const ReverseDimensions& rev) { + return TensorReverseOp(derived(), rev); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorShufflingOp + shuffle(const Shuffle& shuffle) const { + return TensorShufflingOp(derived(), shuffle); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorShufflingOp + shuffle(const Shuffle& shuffle) { + return TensorShufflingOp(derived(), shuffle); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const TensorStridingOp + stride(const Strides& strides) const { + return TensorStridingOp(derived(), strides); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorStridingOp + stride(const Strides& strides) { + return TensorStridingOp(derived(), strides); + } + + // Select the device on which to evaluate the expression. + template + TensorDevice device(const DeviceType& device) { + return TensorDevice(device, derived()); + } + + protected: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_BASE_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h new file mode 100644 index 0000000..4cfe300 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -0,0 +1,392 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H +#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H + +namespace Eigen { + +/** \class TensorBroadcasting + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor broadcasting class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorBroadcastingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorBroadcastingOp type; +}; + +template +struct is_input_scalar { + static const bool value = false; +}; +template <> +struct is_input_scalar > { + static const bool value = true; +}; +#ifndef EIGEN_EMULATE_CXX11_META_H +template +struct is_input_scalar > { + static const bool value = (Sizes::total_size == 1); +}; +#endif + +} // end namespace internal + + + +template +class TensorBroadcastingOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType& expr, const Broadcast& broadcast) + : m_xpr(expr), m_broadcast(broadcast) {} + + EIGEN_DEVICE_FUNC + const Broadcast& broadcast() const { return m_broadcast; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Broadcast m_broadcast; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorBroadcastingOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions InputDimensions; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = true, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_broadcast(op.broadcast()),m_impl(op.expression(), device) + { + // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar + // and store the result in a scalar. Instead one should reshape the scalar into a a N-D + // tensor with N >= 1 of 1 element first and then broadcast. + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + const InputDimensions& input_dims = m_impl.dimensions(); + const Broadcast& broadcast = op.broadcast(); + for (int i = 0; i < NumDims; ++i) { + eigen_assert(input_dims[i] > 0); + m_dimensions[i] = input_dims[i] * broadcast[i]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + } else { + m_inputStrides[NumDims-1] = 1; + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims-2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const + { + if (internal::is_input_scalar::type>::value) { + return m_impl.coeff(0); + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + return coeffColMajor(index); + } else { + return coeffRowMajor(index); + } + } + + // TODO: attempt to speed this up. The integer divisions and modulo are slow + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const + { + Index inputIndex = 0; + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index < m_impl.dimensions()[0]); + inputIndex += index; + } else { + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index % m_impl.dimensions()[0] == 0); + } else { + inputIndex += (index % m_impl.dimensions()[0]); + } + } + return m_impl.coeff(inputIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const + { + Index inputIndex = 0; + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index < m_impl.dimensions()[NumDims-1]); + inputIndex += index; + } else { + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); + } else { + inputIndex += (index % m_impl.dimensions()[NumDims-1]); + } + } + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const + { + if (internal::is_input_scalar::type>::value) { + return internal::pset1(m_impl.coeff(0)); + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + return packetColMajor(index); + } else { + return packetRowMajor(index); + } + } + + // Ignore the LoadMode and always use unaligned loads since we can't guarantee + // the alignment at compile time. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index originalIndex = index; + + Index inputIndex = 0; + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + Index innermostLoc; + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index < m_impl.dimensions()[0]); + innermostLoc = index; + } else { + if (internal::index_statically_eq(0, 1)) { + eigen_assert(index % m_impl.dimensions()[0] == 0); + innermostLoc = 0; + } else { + innermostLoc = index % m_impl.dimensions()[0]; + } + } + inputIndex += innermostLoc; + + // Todo: this could be extended to the second dimension if we're not + // broadcasting alongside the first dimension, and so on. + if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) { + return m_impl.template packet(inputIndex); + } else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + values[0] = m_impl.coeff(inputIndex); + for (int i = 1; i < PacketSize; ++i) { + values[i] = coeffColMajor(originalIndex+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index originalIndex = index; + + Index inputIndex = 0; + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx < m_impl.dimensions()[i]); + inputIndex += idx * m_inputStrides[i]; + } else { + if (internal::index_statically_eq(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } + } + index -= idx * m_outputStrides[i]; + } + Index innermostLoc; + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index < m_impl.dimensions()[NumDims-1]); + innermostLoc = index; + } else { + if (internal::index_statically_eq(NumDims-1, 1)) { + eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0); + innermostLoc = 0; + } else { + innermostLoc = index % m_impl.dimensions()[NumDims-1]; + } + } + inputIndex += innermostLoc; + + // Todo: this could be extended to the second dimension if we're not + // broadcasting alongside the first dimension, and so on. + if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) { + return m_impl.template packet(inputIndex); + } else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + values[0] = m_impl.coeff(inputIndex); + for (int i = 1; i < PacketSize; ++i) { + values[i] = coeffRowMajor(originalIndex+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + double compute_cost = TensorOpCost::AddCost(); + if (NumDims > 0) { + for (int i = NumDims - 1; i > 0; --i) { + compute_cost += TensorOpCost::DivCost(); + if (internal::index_statically_eq(i, 1)) { + compute_cost += + TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } else { + if (!internal::index_statically_eq(i, 1)) { + compute_cost += TensorOpCost::MulCost() + + TensorOpCost::ModCost() + + TensorOpCost::AddCost(); + } + } + compute_cost += + TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } + } + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + const TensorEvaluator& impl() const { return m_impl; } + + Broadcast functor() const { return m_broadcast; } + + protected: + const Broadcast m_broadcast; + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h new file mode 100644 index 0000000..1ba7ef1 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -0,0 +1,384 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H +#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H + +namespace Eigen { + +/** \class TensorKChippingReshaping + * \ingroup CXX11_Tensor_Module + * + * \brief A chip is a thin slice, corresponding to a column or a row in a 2-d tensor. + * + * + */ + +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions - 1; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorChippingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorChippingOp type; +}; + +template +struct DimensionId +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) { + eigen_assert(dim == DimId); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { + return DimId; + } +}; +template <> +struct DimensionId +{ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim) : actual_dim(dim) { + eigen_assert(dim >= 0); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const { + return actual_dim; + } + private: + const DenseIndex actual_dim; +}; + + +} // end namespace internal + + + +template +class TensorChippingOp : public TensorBase > +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType& expr, const Index offset, const Index dim) + : m_xpr(expr), m_offset(offset), m_dim(dim) { + } + + EIGEN_DEVICE_FUNC + const Index offset() const { return m_offset; } + EIGEN_DEVICE_FUNC + const Index dim() const { return m_dim.actualDim(); } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorChippingOp& operator = (const TensorChippingOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorChippingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + typename XprType::Nested m_xpr; + const Index m_offset; + const internal::DimensionId m_dim; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorChippingOp XprType; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims-1; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets. + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_dim(op.dim()), m_device(device) + { + EIGEN_STATIC_ASSERT((NumInputDims >= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(NumInputDims > m_dim.actualDim()); + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + eigen_assert(op.offset() < input_dims[m_dim.actualDim()]); + + int j = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (i != m_dim.actualDim()) { + m_dimensions[j] = input_dims[i]; + ++j; + } + } + + m_stride = 1; + m_inputStride = 1; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < m_dim.actualDim(); ++i) { + m_stride *= input_dims[i]; + m_inputStride *= input_dims[i]; + } + } else { + for (int i = NumInputDims-1; i > m_dim.actualDim(); --i) { + m_stride *= input_dims[i]; + m_inputStride *= input_dims[i]; + } + } + m_inputStride *= input_dims[m_dim.actualDim()]; + m_inputOffset = m_stride * op.offset(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(srcCoeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == 0) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(m_stride == 1); + Index inputIndex = index * m_inputStride + m_inputOffset; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = m_impl.coeff(inputIndex); + inputIndex += m_inputStride; + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } else if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumInputDims - 1) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer division. + eigen_assert(m_stride > index); + return m_impl.template packet(index + m_inputOffset); + } else { + const Index idx = index / m_stride; + const Index rem = index - idx * m_stride; + if (rem + PacketSize <= m_stride) { + Index inputIndex = idx * m_inputStride + m_inputOffset + rem; + return m_impl.template packet(inputIndex); + } else { + // Cross the stride boundary. Fallback to slow path. + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index); + ++index; + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + double cost = 0; + if ((static_cast(Layout) == static_cast(ColMajor) && + m_dim.actualDim() == 0) || + (static_cast(Layout) == static_cast(RowMajor) && + m_dim.actualDim() == NumInputDims - 1)) { + cost += TensorOpCost::MulCost() + TensorOpCost::AddCost(); + } else if ((static_cast(Layout) == static_cast(ColMajor) && + m_dim.actualDim() == NumInputDims - 1) || + (static_cast(Layout) == static_cast(RowMajor) && + m_dim.actualDim() == 0)) { + cost += TensorOpCost::AddCost(); + } else { + cost += 3 * TensorOpCost::MulCost() + TensorOpCost::DivCost() + + 3 * TensorOpCost::AddCost(); + } + + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { + CoeffReturnType* result = const_cast(m_impl.data()); + if (((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumDims) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) && + result) { + return result + m_inputOffset; + } else { + return NULL; + } + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex; + if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == 0) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(m_stride == 1); + inputIndex = index * m_inputStride + m_inputOffset; + } else if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumInputDims-1) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer division. + eigen_assert(m_stride > index); + inputIndex = index + m_inputOffset; + } else { + const Index idx = index / m_stride; + inputIndex = idx * m_inputStride + m_inputOffset; + index -= idx * m_stride; + inputIndex += index; + } + return inputIndex; + } + + Dimensions m_dimensions; + Index m_stride; + Index m_inputOffset; + Index m_inputStride; + TensorEvaluator m_impl; + const internal::DimensionId m_dim; + const Device& m_device; +}; + + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorChippingOp XprType; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims-1; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + + if ((static_cast(this->Layout) == static_cast(ColMajor) && this->m_dim.actualDim() == 0) || + (static_cast(this->Layout) == static_cast(RowMajor) && this->m_dim.actualDim() == NumInputDims-1)) { + // m_stride is equal to 1, so let's avoid the integer division. + eigen_assert(this->m_stride == 1); + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + internal::pstore(values, x); + Index inputIndex = index * this->m_inputStride + this->m_inputOffset; + for (int i = 0; i < PacketSize; ++i) { + this->m_impl.coeffRef(inputIndex) = values[i]; + inputIndex += this->m_inputStride; + } + } else if ((static_cast(this->Layout) == static_cast(ColMajor) && this->m_dim.actualDim() == NumInputDims-1) || + (static_cast(this->Layout) == static_cast(RowMajor) && this->m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer division. + eigen_assert(this->m_stride > index); + this->m_impl.template writePacket(index + this->m_inputOffset, x); + } else { + const Index idx = index / this->m_stride; + const Index rem = index - idx * this->m_stride; + if (rem + PacketSize <= this->m_stride) { + const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem; + this->m_impl.template writePacket(inputIndex, x); + } else { + // Cross stride boundary. Fallback to slow path. + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + internal::pstore(values, x); + for (int i = 0; i < PacketSize; ++i) { + this->coeffRef(index) = values[i]; + ++index; + } + } + } + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h new file mode 100644 index 0000000..59bf90d --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h @@ -0,0 +1,361 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H + +namespace Eigen { + +/** \class TensorConcatenationOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor concatenation class. + * + * + */ +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename promote_storage_type::ret Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + enum { Flags = 0 }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConcatenationOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConcatenationOp type; +}; + +} // end namespace internal + + +template +class TensorConcatenationOp : public TensorBase, WriteAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::nested::type Nested; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename NumTraits::Real RealScalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + EIGEN_DEVICE_FUNC const Axis& axis() const { return m_axis; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const TensorConcatenationOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorConcatenationOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const Axis m_axis; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorConcatenationOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + static const int RightNumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis()) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || NumDims == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims == RightNumDims), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + + eigen_assert(0 <= m_axis && m_axis < NumDims); + const Dimensions& lhs_dims = m_leftImpl.dimensions(); + const Dimensions& rhs_dims = m_rightImpl.dimensions(); + { + int i = 0; + for (; i < m_axis; ++i) { + eigen_assert(lhs_dims[i] > 0); + eigen_assert(lhs_dims[i] == rhs_dims[i]); + m_dimensions[i] = lhs_dims[i]; + } + eigen_assert(lhs_dims[i] > 0); // Now i == m_axis. + eigen_assert(rhs_dims[i] > 0); + m_dimensions[i] = lhs_dims[i] + rhs_dims[i]; + for (++i; i < NumDims; ++i) { + eigen_assert(lhs_dims[i] > 0); + eigen_assert(lhs_dims[i] == rhs_dims[i]); + m_dimensions[i] = lhs_dims[i]; + } + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_leftStrides[0] = 1; + m_rightStrides[0] = 1; + m_outputStrides[0] = 1; + + for (int j = 1; j < NumDims; ++j) { + m_leftStrides[j] = m_leftStrides[j-1] * lhs_dims[j-1]; + m_rightStrides[j] = m_rightStrides[j-1] * rhs_dims[j-1]; + m_outputStrides[j] = m_outputStrides[j-1] * m_dimensions[j-1]; + } + } else { + m_leftStrides[NumDims - 1] = 1; + m_rightStrides[NumDims - 1] = 1; + m_outputStrides[NumDims - 1] = 1; + + for (int j = NumDims - 2; j >= 0; --j) { + m_leftStrides[j] = m_leftStrides[j+1] * lhs_dims[j+1]; + m_rightStrides[j] = m_rightStrides[j+1] * rhs_dims[j+1]; + m_outputStrides[j] = m_outputStrides[j+1] * m_dimensions[j+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear? + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) + { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() + { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow. + // See CL/76180724 comments for more ideas. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Collect dimension-wise indices (subs). + array subs; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + subs[i] = index / m_outputStrides[i]; + index -= subs[i] * m_outputStrides[i]; + } + subs[0] = index; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + subs[i] = index / m_outputStrides[i]; + index -= subs[i] * m_outputStrides[i]; + } + subs[NumDims - 1] = index; + } + + const Dimensions& left_dims = m_leftImpl.dimensions(); + if (subs[m_axis] < left_dims[m_axis]) { + Index left_index; + if (static_cast(Layout) == static_cast(ColMajor)) { + left_index = subs[0]; + for (int i = 1; i < NumDims; ++i) { + left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; + } + } else { + left_index = subs[NumDims - 1]; + for (int i = NumDims - 2; i >= 0; --i) { + left_index += (subs[i] % left_dims[i]) * m_leftStrides[i]; + } + } + return m_leftImpl.coeff(left_index); + } else { + subs[m_axis] -= left_dims[m_axis]; + const Dimensions& right_dims = m_rightImpl.dimensions(); + Index right_index; + if (static_cast(Layout) == static_cast(ColMajor)) { + right_index = subs[0]; + for (int i = 1; i < NumDims; ++i) { + right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; + } + } else { + right_index = subs[NumDims - 1]; + for (int i = NumDims - 2; i >= 0; --i) { + right_index += (subs[i] % right_dims[i]) * m_rightStrides[i]; + } + } + return m_rightImpl.coeff(right_index); + } + } + + // TODO(phli): Add a real vectorization. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + packetSize - 1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (2 * TensorOpCost::AddCost() + + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost() + + TensorOpCost::ModCost()); + const double lhs_size = m_leftImpl.dimensions().TotalSize(); + const double rhs_size = m_rightImpl.dimensions().TotalSize(); + return (lhs_size / (lhs_size + rhs_size)) * + m_leftImpl.costPerCoeff(vectorized) + + (rhs_size / (lhs_size + rhs_size)) * + m_rightImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + Dimensions m_dimensions; + array m_outputStrides; + array m_leftStrides; + array m_rightStrides; + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; + const Axis m_axis; +}; + +// Eval as lvalue +template + struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorConcatenationOp XprType; + typedef typename Base::Dimensions Dimensions; + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(XprType& op, const Device& device) + : Base(op, device) + { + EIGEN_STATIC_ASSERT((static_cast(Layout) == static_cast(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + // Collect dimension-wise indices (subs). + array subs; + for (int i = Base::NumDims - 1; i > 0; --i) { + subs[i] = index / this->m_outputStrides[i]; + index -= subs[i] * this->m_outputStrides[i]; + } + subs[0] = index; + + const Dimensions& left_dims = this->m_leftImpl.dimensions(); + if (subs[this->m_axis] < left_dims[this->m_axis]) { + Index left_index = subs[0]; + for (int i = 1; i < Base::NumDims; ++i) { + left_index += (subs[i] % left_dims[i]) * this->m_leftStrides[i]; + } + return this->m_leftImpl.coeffRef(left_index); + } else { + subs[this->m_axis] -= left_dims[this->m_axis]; + const Dimensions& right_dims = this->m_rightImpl.dimensions(); + Index right_index = subs[0]; + for (int i = 1; i < Base::NumDims; ++i) { + right_index += (subs[i] % right_dims[i]) * this->m_rightStrides[i]; + } + return this->m_rightImpl.coeffRef(right_index); + } + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + packetSize - 1 < this->dimensions().TotalSize()); + + EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + internal::pstore(values, x); + for (int i = 0; i < packetSize; ++i) { + coeffRef(index+i) = values[i]; + } + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h new file mode 100644 index 0000000..20b29e5 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -0,0 +1,628 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H + +namespace Eigen { + +/** \class TensorContraction + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor contraction class. + * + * + */ +namespace internal { + +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename gebp_traits::type, + typename remove_const::type>::ResScalar Scalar; + + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + + // From NumDims below. + static const int NumDimensions = traits::NumDimensions + traits::NumDimensions - 2 * array_size::value; + static const int Layout = traits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorContractionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorContractionOp type; +}; + +template +struct traits, Device_> > { + typedef Indices_ Indices; + typedef LeftArgType_ LeftArgType; + typedef RightArgType_ RightArgType; + typedef Device_ Device; + + // From NumDims below. + static const int NumDimensions = traits::NumDimensions + traits::NumDimensions - 2 * array_size::value; +}; + +} // end namespace internal + +template +class TensorContractionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename internal::gebp_traits::ResScalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp( + const LhsXprType& lhs, const RhsXprType& rhs, const Indices& dims) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_indices(dims) {} + + EIGEN_DEVICE_FUNC + const Indices& indices() const { return m_indices; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const Indices m_indices; +}; + + +template +struct TensorContractionEvaluatorBase +{ + typedef typename internal::traits::Indices Indices; + typedef typename internal::traits::LeftArgType LeftArgType; + typedef typename internal::traits::RightArgType RightArgType; + typedef typename internal::traits::Device Device; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + IsAligned = true, + PacketAccess = (internal::unpacket_traits::size > 1), + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = true + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + typedef DSizes Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorContractionEvaluatorBase(const XprType& op, const Device& device) + : m_leftImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), + op.lhsExpression(), op.rhsExpression()), device), + m_rightImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), + op.rhsExpression(), op.lhsExpression()), device), + m_device(device), + m_result(NULL) { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == + static_cast(TensorEvaluator::Layout)), + YOU_MADE_A_PROGRAMMING_MISTAKE); + + + DSizes eval_left_dims; + DSizes eval_right_dims; + array, ContractDims> eval_op_indices; + if (static_cast(Layout) == static_cast(ColMajor)) { + // For ColMajor, we keep using the existing dimensions + for (int i = 0; i < LDims; i++) { + eval_left_dims[i] = m_leftImpl.dimensions()[i]; + } + for (int i = 0; i < RDims; i++) { + eval_right_dims[i] = m_rightImpl.dimensions()[i]; + } + // We keep the pairs of contracting indices. + for (int i = 0; i < ContractDims; i++) { + eval_op_indices[i].first = op.indices()[i].first; + eval_op_indices[i].second = op.indices()[i].second; + } + } else { + // For RowMajor, we need to reverse the existing dimensions + for (int i = 0; i < LDims; i++) { + eval_left_dims[i] = m_leftImpl.dimensions()[LDims - i - 1]; + } + for (int i = 0; i < RDims; i++) { + eval_right_dims[i] = m_rightImpl.dimensions()[RDims - i - 1]; + } + // We need to flip all the pairs of contracting indices as well as + // reversing the dimensions. + for (int i = 0; i < ContractDims; i++) { + eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second; + eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first; + } + } + + // Check for duplicate axes and make sure the first index in eval_op_indices + // is increasing. Using O(n^2) sorting is OK since ContractDims is small + for (int i = 0; i < ContractDims; i++) { + for (int j = i + 1; j < ContractDims; j++) { + eigen_assert(eval_op_indices[j].first != eval_op_indices[i].first && + eval_op_indices[j].second != eval_op_indices[i].second && + "contraction axes should be unique"); + if (eval_op_indices[j].first < eval_op_indices[i].first) { + numext::swap(eval_op_indices[j], eval_op_indices[i]); + } + } + } + + array lhs_strides; + lhs_strides[0] = 1; + for (int i = 0; i < LDims-1; ++i) { + lhs_strides[i+1] = lhs_strides[i] * eval_left_dims[i]; + } + + array rhs_strides; + rhs_strides[0] = 1; + for (int i = 0; i < RDims-1; ++i) { + rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i]; + } + + if (m_i_strides.size() > 0) m_i_strides[0] = 1; + if (m_j_strides.size() > 0) m_j_strides[0] = 1; + if (m_k_strides.size() > 0) m_k_strides[0] = 1; + + m_i_size = 1; + m_j_size = 1; + m_k_size = 1; + + // To compute the dimension, we simply concatenate the non-contracting + // dimensions of the left and then the right tensor. Additionally, we also + // compute the strides corresponding to the left non-contracting + // dimensions and right non-contracting dimensions. + m_lhs_inner_dim_contiguous = true; + int dim_idx = 0; + unsigned int nocontract_idx = 0; + + for (int i = 0; i < LDims; i++) { + // find if we are contracting on index i of left tensor + bool contracting = false; + for (int j = 0; j < ContractDims; j++) { + if (eval_op_indices[j].first == i) { + contracting = true; + break; + } + } + if (!contracting) { + // add dimension size to output dimensions + m_dimensions[dim_idx] = eval_left_dims[i]; + m_left_nocontract_strides[nocontract_idx] = lhs_strides[i]; + if (dim_idx != i) { + m_lhs_inner_dim_contiguous = false; + } + if (nocontract_idx+1 < internal::array_size::value) { + m_i_strides[nocontract_idx+1] = + m_i_strides[nocontract_idx] * eval_left_dims[i]; + } else { + m_i_size = m_i_strides[nocontract_idx] * eval_left_dims[i]; + } + dim_idx++; + nocontract_idx++; + } + } + + nocontract_idx = 0; + for (int i = 0; i < RDims; i++) { + bool contracting = false; + // find if we are contracting on index i of right tensor + for (int j = 0; j < ContractDims; j++) { + if (eval_op_indices[j].second == i) { + contracting = true; + break; + } + } + if (!contracting) { + m_dimensions[dim_idx] = eval_right_dims[i]; + if (nocontract_idx+1 < internal::array_size::value) { + m_j_strides[nocontract_idx+1] = + m_j_strides[nocontract_idx] * eval_right_dims[i]; + } else { + m_j_size = m_j_strides[nocontract_idx] * eval_right_dims[i]; + } + m_right_nocontract_strides[nocontract_idx] = rhs_strides[i]; + dim_idx++; + nocontract_idx++; + } + } + + // Now compute the strides corresponding to the contracting dimensions. We + // assumed above that non-contracting axes are represented in the same order + // in the matrix as they are in the tensor. This is not the case for + // contracting axes. As the contracting axes must be of the same size in + // each tensor, we'll only look at the first tensor here. + m_rhs_inner_dim_contiguous = true; + m_rhs_inner_dim_reordered = false; + for (int i = 0; i < ContractDims; i++) { + Index left = eval_op_indices[i].first; + Index right = eval_op_indices[i].second; + + Index size = eval_left_dims[left]; + eigen_assert(size == eval_right_dims[right] && + "Contraction axes must be same size"); + + if (i+1 < static_cast(internal::array_size::value)) { + m_k_strides[i+1] = m_k_strides[i] * size; + } else { + m_k_size = m_k_strides[i] * size; + } + m_left_contracting_strides[i] = lhs_strides[left]; + m_right_contracting_strides[i] = rhs_strides[right]; + + if (i > 0 && right < eval_op_indices[i-1].second) { + m_rhs_inner_dim_reordered = true; + } + if (right != i) { + m_rhs_inner_dim_contiguous = false; + } + } + + // If the layout is RowMajor, we need to reverse the m_dimensions + if (static_cast(Layout) == static_cast(RowMajor)) { + for (int i = 0, j = NumDims - 1; i < j; i++, j--) { + numext::swap(m_dimensions[i], m_dimensions[j]); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalTo(m_result); + return true; + } + } + + EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const { + if (this->m_lhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + static_cast(this)->template evalProduct(buffer); + } + else { + static_cast(this)->template evalProduct(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + static_cast(this)->template evalProduct(buffer); + } + else { + static_cast(this)->template evalProduct(buffer); + } + } + } + else { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + static_cast(this)->template evalProduct(buffer); + } + else { + static_cast(this)->template evalProduct(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + static_cast(this)->template evalProduct(buffer); + } + else { + static_cast(this)->template evalProduct(buffer); + } + } + } + } + + template + EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const { + const Index rows = m_i_size; + const Index cols = m_k_size; + + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + const Index lhs_packet_size = internal::unpacket_traits::size; + const Index rhs_packet_size = internal::unpacket_traits::size; + const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned; + const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned; + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides, + m_left_contracting_strides, m_k_strides); + RhsMapper rhs(m_rightImpl, m_right_nocontract_strides, m_j_strides, + m_right_contracting_strides, m_k_strides); + + const Scalar alpha(1); + const Index resIncr(1); + + // zero out the result buffer (which must be of size at least rows * sizeof(Scalar) + m_device.memset(buffer, 0, rows * sizeof(Scalar)); + + internal::general_matrix_vector_product::run( + rows, cols, lhs, rhs, + buffer, resIncr, alpha); + } + + template + EIGEN_DEVICE_FUNC void evalGemm(Scalar* buffer) const { + // columns in left side, rows in right side + const Index k = this->m_k_size; + + // rows in left side + const Index m = this->m_i_size; + + // columns in right side + const Index n = this->m_j_size; + + // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) + this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); + + // define mr, nr, and all of my data mapper types + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + typedef typename internal::gebp_traits Traits; + + const Index nr = Traits::nr; + const Index mr = Traits::mr; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + + const Index lhs_packet_size = internal::unpacket_traits::size; + const Index rhs_packet_size = internal::unpacket_traits::size; + + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + typedef internal::blas_data_mapper OutputMapper; + + // Declare GEBP packing and kernel structs + internal::gemm_pack_lhs pack_lhs; + internal::gemm_pack_rhs pack_rhs; + + internal::gebp_kernel gebp; + + // initialize data mappers + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, + this->m_left_contracting_strides, this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, + this->m_right_contracting_strides, this->m_k_strides); + + OutputMapper output(buffer, m); + + // Sizes of the blocks to load in cache. See the Goto paper for details. + internal::TensorContractionBlocking blocking(k, m, n, 1); + const Index kc = blocking.kc(); + const Index mc = numext::mini(m, blocking.mc()); + const Index nc = numext::mini(n, blocking.nc()); + const Index sizeA = mc * kc; + const Index sizeB = kc * nc; + + LhsScalar* blockA = static_cast(this->m_device.allocate(sizeA * sizeof(LhsScalar))); + RhsScalar* blockB = static_cast(this->m_device.allocate(sizeB * sizeof(RhsScalar))); + + for(Index i2=0; i2m_device.deallocate(blockA); + this->m_device.deallocate(blockB); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + + if (m_result != NULL) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { return m_result; } + + protected: + // Prevent assignment + TensorContractionEvaluatorBase& operator = (const TensorContractionEvaluatorBase&); + Dimensions m_dimensions; + + contract_t m_k_strides; + contract_t m_left_contracting_strides; + contract_t m_right_contracting_strides; + + bool m_lhs_inner_dim_contiguous; + bool m_rhs_inner_dim_contiguous; + bool m_rhs_inner_dim_reordered; + + left_nocontract_t m_i_strides; + right_nocontract_t m_j_strides; + left_nocontract_t m_left_nocontract_strides; + right_nocontract_t m_right_nocontract_strides; + + Index m_i_size; + Index m_j_size; + Index m_k_size; + + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; + const Device& m_device; + Scalar* m_result; +}; + + +// evaluator for default device +template +struct TensorEvaluator, Device> : + public TensorContractionEvaluatorBase< + TensorEvaluator, Device> > { + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + Layout = TensorEvaluator::Layout + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + // Could we use NumDimensions here? + typedef DSizes Dimensions; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : + Base(op, device) { } + + template + EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const { + if (this->m_j_size == 1) { + this->template evalGemv(buffer); + return; + } + + this->template evalGemm(buffer); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h new file mode 100644 index 0000000..5cf7b4f --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h @@ -0,0 +1,56 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H + + +namespace Eigen { +namespace internal { + +enum { + ShardByRow = 0, + ShardByCol = 1 +}; + + +// Default Blocking Strategy +template +class TensorContractionBlocking { + public: + + typedef typename LhsMapper::Scalar LhsScalar; + typedef typename RhsMapper::Scalar RhsScalar; + + EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) : + kc_(k), mc_(m), nc_(n) + { + if (ShardingType == ShardByCol) { + computeProductBlockingSizes(kc_, mc_, nc_, num_threads); + } + else { + computeProductBlockingSizes(kc_, nc_, mc_, num_threads); + } + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } + + private: + Index kc_; + Index mc_; + Index nc_; +}; + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_BLOCKING_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h new file mode 100644 index 0000000..d65dbb4 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -0,0 +1,1391 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014-2015 Benoit Steiner +// Copyright (C) 2015 Navdeep Jaitly +// Copyright (C) 2014 Eric Martin +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + +namespace Eigen { + +template +__device__ EIGEN_STRONG_INLINE void +EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, Scalar* lhs_shmem, Scalar* rhs_shmem, + const Index m_size, const Index n_size, const Index k_size) { + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 64 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + // declare and initialize 64 registers for output 8x8 block + + // prefetch registers + Scalar lhs_pf0; + Scalar lhs_pf1; + Scalar lhs_pf2; + Scalar lhs_pf3; + Scalar lhs_pf4; + Scalar lhs_pf5; + Scalar lhs_pf6; + Scalar lhs_pf7; + + Scalar rhs_pf0; + Scalar rhs_pf1; + Scalar rhs_pf2; + Scalar rhs_pf3; + Scalar rhs_pf4; + Scalar rhs_pf5; + Scalar rhs_pf6; + Scalar rhs_pf7; + + // shared memory is formatted + // (contract idx in block, nocontract idx in block, block idx) + // where block idx is column major. This transposition limits the number of + // bank conflicts when reading the LHS. The core idea is that since the contracting + // index is shared by both sides, then the contracting index should be in threadIdx.x. + + // On the LHS, we pad each row inside of each block with an extra element. This makes + // each block 8 rows of 9 elements, which is 72 elements. This gives no bank conflicts + // on writes and very few 2-way conflicts on reads. There is an 8x8 grid of these blocks. + + // On the RHS we just add 8 padding elements to the end of each block. This gives no bank + // conflicts on writes and also none on reads. + + // storage indices + const Index lhs_store_idx_base = threadIdx.y * 72 + threadIdx.x * 9 + threadIdx.z; + const Index rhs_store_idx_base = threadIdx.y * 72 + threadIdx.z * 8 + threadIdx.x; + + const Index lhs_store_idx_0 = lhs_store_idx_base + 576 * 0; + const Index lhs_store_idx_1 = lhs_store_idx_base + 576 * 1; + const Index lhs_store_idx_2 = lhs_store_idx_base + 576 * 2; + const Index lhs_store_idx_3 = lhs_store_idx_base + 576 * 3; + const Index lhs_store_idx_4 = lhs_store_idx_base + 576 * 4; + const Index lhs_store_idx_5 = lhs_store_idx_base + 576 * 5; + const Index lhs_store_idx_6 = lhs_store_idx_base + 576 * 6; + const Index lhs_store_idx_7 = lhs_store_idx_base + 576 * 7; + + const Index rhs_store_idx_0 = rhs_store_idx_base + 576 * 0; + const Index rhs_store_idx_1 = rhs_store_idx_base + 576 * 1; + const Index rhs_store_idx_2 = rhs_store_idx_base + 576 * 2; + const Index rhs_store_idx_3 = rhs_store_idx_base + 576 * 3; + const Index rhs_store_idx_4 = rhs_store_idx_base + 576 * 4; + const Index rhs_store_idx_5 = rhs_store_idx_base + 576 * 5; + const Index rhs_store_idx_6 = rhs_store_idx_base + 576 * 6; + const Index rhs_store_idx_7 = rhs_store_idx_base + 576 * 7; + + // in the loading code, the following variables are important: + // threadIdx.x: the vertical position in an 8x8 block + // threadIdx.y: the vertical index of the 8x8 block in the grid + // threadIdx.z: the horizontal position in an 8x8 block + // k: the horizontal index of the 8x8 block in the grid + // + // The k parameter is implicit (it was the loop counter for a loop that went + // from 0 to <8, but now that loop is unrolled in the below code. + + const Index load_idx_vert = threadIdx.x + 8 * threadIdx.y; + const Index lhs_vert = base_m + load_idx_vert; + +#define prefetchIntoRegisters(base_k) \ + { \ + lhs_pf0 = conv(0); \ + lhs_pf1 = conv(0); \ + lhs_pf2 = conv(0); \ + lhs_pf3 = conv(0); \ + lhs_pf4 = conv(0); \ + lhs_pf5 = conv(0); \ + lhs_pf6 = conv(0); \ + lhs_pf7 = conv(0); \ + \ + rhs_pf0 = conv(0); \ + rhs_pf1 = conv(0); \ + rhs_pf2 = conv(0); \ + rhs_pf3 = conv(0); \ + rhs_pf4 = conv(0); \ + rhs_pf5 = conv(0); \ + rhs_pf6 = conv(0); \ + rhs_pf7 = conv(0); \ + \ + if (!needs_edge_check || lhs_vert < m_size) { \ + const Index lhs_horiz_0 = base_k + threadIdx.z + 0 * 8; \ + const Index lhs_horiz_1 = base_k + threadIdx.z + 1 * 8; \ + const Index lhs_horiz_2 = base_k + threadIdx.z + 2 * 8; \ + const Index lhs_horiz_3 = base_k + threadIdx.z + 3 * 8; \ + const Index lhs_horiz_4 = base_k + threadIdx.z + 4 * 8; \ + const Index lhs_horiz_5 = base_k + threadIdx.z + 5 * 8; \ + const Index lhs_horiz_6 = base_k + threadIdx.z + 6 * 8; \ + const Index lhs_horiz_7 = base_k + threadIdx.z + 7 * 8; \ + \ + if (!needs_edge_check || lhs_horiz_7 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ + lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ + lhs_pf7 = lhs(lhs_vert, lhs_horiz_7); \ + } else if (lhs_horiz_6 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ + lhs_pf6 = lhs(lhs_vert, lhs_horiz_6); \ + } else if (lhs_horiz_5 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + lhs_pf5 = lhs(lhs_vert, lhs_horiz_5); \ + } else if (lhs_horiz_4 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + lhs_pf4 = lhs(lhs_vert, lhs_horiz_4); \ + } else if (lhs_horiz_3 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + lhs_pf3 = lhs(lhs_vert, lhs_horiz_3); \ + } else if (lhs_horiz_2 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + lhs_pf2 = lhs(lhs_vert, lhs_horiz_2); \ + } else if (lhs_horiz_1 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + lhs_pf1 = lhs(lhs_vert, lhs_horiz_1); \ + } else if (lhs_horiz_0 < k_size) { \ + lhs_pf0 = lhs(lhs_vert, lhs_horiz_0); \ + } \ + } \ + \ + const Index rhs_vert = base_k + load_idx_vert; \ + if (!needs_edge_check || rhs_vert < k_size) { \ + const Index rhs_horiz_0 = base_n + threadIdx.z + 0 * 8; \ + const Index rhs_horiz_1 = base_n + threadIdx.z + 1 * 8; \ + const Index rhs_horiz_2 = base_n + threadIdx.z + 2 * 8; \ + const Index rhs_horiz_3 = base_n + threadIdx.z + 3 * 8; \ + const Index rhs_horiz_4 = base_n + threadIdx.z + 4 * 8; \ + const Index rhs_horiz_5 = base_n + threadIdx.z + 5 * 8; \ + const Index rhs_horiz_6 = base_n + threadIdx.z + 6 * 8; \ + const Index rhs_horiz_7 = base_n + threadIdx.z + 7 * 8; \ + \ + if (rhs_horiz_7 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ + rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ + rhs_pf7 = rhs(rhs_vert, rhs_horiz_7); \ + } else if (rhs_horiz_6 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ + rhs_pf6 = rhs(rhs_vert, rhs_horiz_6); \ + } else if (rhs_horiz_5 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + rhs_pf5 = rhs(rhs_vert, rhs_horiz_5); \ + } else if (rhs_horiz_4 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + rhs_pf4 = rhs(rhs_vert, rhs_horiz_4); \ + } else if (rhs_horiz_3 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + rhs_pf3 = rhs(rhs_vert, rhs_horiz_3); \ + } else if (rhs_horiz_2 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + rhs_pf2 = rhs(rhs_vert, rhs_horiz_2); \ + } else if (rhs_horiz_1 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + rhs_pf1 = rhs(rhs_vert, rhs_horiz_1); \ + } else if (rhs_horiz_0 < n_size) { \ + rhs_pf0 = rhs(rhs_vert, rhs_horiz_0); \ + } \ + } \ + } \ + +#define writeRegToShmem(_) \ + lhs_shmem[lhs_store_idx_0] = lhs_pf0; \ + rhs_shmem[rhs_store_idx_0] = rhs_pf0; \ + \ + lhs_shmem[lhs_store_idx_1] = lhs_pf1; \ + rhs_shmem[rhs_store_idx_1] = rhs_pf1; \ + \ + lhs_shmem[lhs_store_idx_2] = lhs_pf2; \ + rhs_shmem[rhs_store_idx_2] = rhs_pf2; \ + \ + lhs_shmem[lhs_store_idx_3] = lhs_pf3; \ + rhs_shmem[rhs_store_idx_3] = rhs_pf3; \ + \ + lhs_shmem[lhs_store_idx_4] = lhs_pf4; \ + rhs_shmem[rhs_store_idx_4] = rhs_pf4; \ + \ + lhs_shmem[lhs_store_idx_5] = lhs_pf5; \ + rhs_shmem[rhs_store_idx_5] = rhs_pf5; \ + \ + lhs_shmem[lhs_store_idx_6] = lhs_pf6; \ + rhs_shmem[rhs_store_idx_6] = rhs_pf6; \ + \ + lhs_shmem[lhs_store_idx_7] = lhs_pf7; \ + rhs_shmem[rhs_store_idx_7] = rhs_pf7; \ + + // declare and initialize result array +#define res(i, j) _res_##i##j +#define initResultRow(i) \ + Scalar res(i, 0) = conv(0); \ + Scalar res(i, 1) = conv(0); \ + Scalar res(i, 2) = conv(0); \ + Scalar res(i, 3) = conv(0); \ + Scalar res(i, 4) = conv(0); \ + Scalar res(i, 5) = conv(0); \ + Scalar res(i, 6) = conv(0); \ + Scalar res(i, 7) = conv(0); \ + + internal::scalar_cast_op conv; + initResultRow(0); + initResultRow(1); + initResultRow(2); + initResultRow(3); + initResultRow(4); + initResultRow(5); + initResultRow(6); + initResultRow(7); +#undef initResultRow + + for (Index base_k = 0; base_k < k_size; base_k += 64) { + // wait for previous iteration to finish with shmem. Despite common sense, + // the code is a bit faster with this here then at bottom of loop + __syncthreads(); + + prefetchIntoRegisters(base_k); + writeRegToShmem(); + + #undef prefetchIntoRegisters + #undef writeRegToShmem + + // wait for shared mem packing to be done before starting computation + __syncthreads(); + + // compute 8x8 matrix product by outer product. This involves packing one column + // of LHS and one row of RHS into registers (takes 16 registers). + +#define lcol(i) _lcol##i + Scalar lcol(0); + Scalar lcol(1); + Scalar lcol(2); + Scalar lcol(3); + Scalar lcol(4); + Scalar lcol(5); + Scalar lcol(6); + Scalar lcol(7); + +#define rrow(j) _rrow##j + Scalar rrow(0); + Scalar rrow(1); + Scalar rrow(2); + Scalar rrow(3); + Scalar rrow(4); + Scalar rrow(5); + Scalar rrow(6); + Scalar rrow(7); + + // Now x corresponds to k, y to m, and z to n + const Scalar* lhs_block = &lhs_shmem[threadIdx.x + 9 * threadIdx.y]; + const Scalar* rhs_block = &rhs_shmem[threadIdx.x + 8 * threadIdx.z]; + +#define lhs_element(i, j) lhs_block[72 * ((i) + 8 * (j))] +#define rhs_element(i, j) rhs_block[72 * ((i) + 8 * (j))] + +#define loadData(i, j) \ + lcol(0) = lhs_element(0, j); \ + rrow(0) = rhs_element(i, 0); \ + lcol(1) = lhs_element(1, j); \ + rrow(1) = rhs_element(i, 1); \ + lcol(2) = lhs_element(2, j); \ + rrow(2) = rhs_element(i, 2); \ + lcol(3) = lhs_element(3, j); \ + rrow(3) = rhs_element(i, 3); \ + lcol(4) = lhs_element(4, j); \ + rrow(4) = rhs_element(i, 4); \ + lcol(5) = lhs_element(5, j); \ + rrow(5) = rhs_element(i, 5); \ + lcol(6) = lhs_element(6, j); \ + rrow(6) = rhs_element(i, 6); \ + lcol(7) = lhs_element(7, j); \ + rrow(7) = rhs_element(i, 7); \ + +#define computeCol(j) \ + res(0, j) += lcol(0) * rrow(j); \ + res(1, j) += lcol(1) * rrow(j); \ + res(2, j) += lcol(2) * rrow(j); \ + res(3, j) += lcol(3) * rrow(j); \ + res(4, j) += lcol(4) * rrow(j); \ + res(5, j) += lcol(5) * rrow(j); \ + res(6, j) += lcol(6) * rrow(j); \ + res(7, j) += lcol(7) * rrow(j); \ + +#define computePass(i) \ + loadData(i, i); \ + \ + computeCol(0); \ + computeCol(1); \ + computeCol(2); \ + computeCol(3); \ + computeCol(4); \ + computeCol(5); \ + computeCol(6); \ + computeCol(7); \ + + computePass(0); + computePass(1); + computePass(2); + computePass(3); + computePass(4); + computePass(5); + computePass(6); + computePass(7); + +#undef lcol +#undef rrow +#undef lhs_element +#undef rhs_element +#undef loadData +#undef computeCol +#undef computePass + } // end loop over k + + // we've now iterated over all of the large (ie width 64) k blocks and + // accumulated results in registers. At this point thread (x, y, z) contains + // the sum across all big k blocks of the product of little k block of index (x, y) + // with block of index (y, z). To compute the final output, we need to reduce + // the 8 threads over y by summation. +#define shuffleInc(i, j, mask) res(i, j) += __shfl_xor(res(i, j), mask) + +#define reduceRow(i, mask) \ + shuffleInc(i, 0, mask); \ + shuffleInc(i, 1, mask); \ + shuffleInc(i, 2, mask); \ + shuffleInc(i, 3, mask); \ + shuffleInc(i, 4, mask); \ + shuffleInc(i, 5, mask); \ + shuffleInc(i, 6, mask); \ + shuffleInc(i, 7, mask); \ + +#define reduceMatrix(mask) \ + reduceRow(0, mask); \ + reduceRow(1, mask); \ + reduceRow(2, mask); \ + reduceRow(3, mask); \ + reduceRow(4, mask); \ + reduceRow(5, mask); \ + reduceRow(6, mask); \ + reduceRow(7, mask); \ + + // actually perform the reduction, now each thread of index (_, y, z) + // contains the correct values in its registers that belong in the output + // block + reduceMatrix(1); + reduceMatrix(2); + reduceMatrix(4); + +#undef shuffleInc +#undef reduceRow +#undef reduceMatrix + + // now we need to copy the 64 values into main memory. We can't split work + // among threads because all variables are in registers. There's 2 ways + // to do this: + // (1) have 1 thread do 64 writes from registers into global memory + // (2) have 1 thread do 64 writes into shared memory, and then 8 threads + // each do 8 writes into global memory. We can just overwrite the shared + // memory from the problem we just solved. + // (2) is slightly faster than (1) due to less branching and more ILP + + // TODO: won't yield much gain, but could just use currently unused shared mem + // and then we won't have to sync + // wait for shared mem to be out of use + __syncthreads(); + +#define writeResultShmem(i, j) \ + lhs_shmem[i + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j] = res(i, j); \ + +#define writeRow(i) \ + writeResultShmem(i, 0); \ + writeResultShmem(i, 1); \ + writeResultShmem(i, 2); \ + writeResultShmem(i, 3); \ + writeResultShmem(i, 4); \ + writeResultShmem(i, 5); \ + writeResultShmem(i, 6); \ + writeResultShmem(i, 7); \ + + if (threadIdx.x == 0) { + writeRow(0); + writeRow(1); + writeRow(2); + writeRow(3); + writeRow(4); + writeRow(5); + writeRow(6); + writeRow(7); + } +#undef writeResultShmem +#undef writeRow + + const int max_i_write = numext::mini((int)((m_size - base_m - threadIdx.y + 7) / 8), 8); + const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); + + if (threadIdx.x < max_i_write) { + if (max_j_write == 8) { + // TODO: can i trade bank conflicts for coalesced writes? + Scalar val0 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 0]; + Scalar val1 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 1]; + Scalar val2 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 2]; + Scalar val3 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 3]; + Scalar val4 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 4]; + Scalar val5 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 5]; + Scalar val6 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 6]; + Scalar val7 = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * 7]; + + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 0) = val0; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 1) = val1; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 2) = val2; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 3) = val3; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 4) = val4; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 5) = val5; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 6) = val6; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * 7) = val7; + } else { +#pragma unroll 7 + for (int j = 0; j < max_j_write; j++) { + Scalar val = lhs_shmem[threadIdx.x + 8 * threadIdx.y + 64 * threadIdx.z + 512 * j]; + output(base_m + threadIdx.y + 8 * threadIdx.x, base_n + threadIdx.z + 8 * j) = val; + } + } + } +#undef res +} + + +template +__global__ void +__launch_bounds__(512) +EigenContractionKernel(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, + const Index m_size, const Index n_size, const Index k_size) { + __shared__ Scalar lhs_shmem[72 * 64]; + __shared__ Scalar rhs_shmem[72 * 64]; + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 64 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + if (base_m + 63 < m_size && base_n + 63 < n_size) { + EigenContractionKernelInternal(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); + } else { + EigenContractionKernelInternal(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size); + } +} + + +template +__device__ EIGEN_STRONG_INLINE void +EigenFloatContractionKernelInternal16x16(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, float2 lhs_shmem2[][16], + float2 rhs_shmem2[][8], const Index m_size, + const Index n_size, const Index k_size, + const Index base_m, const Index base_n) { + typedef float Scalar; + + // prefetch registers + float4 lhs_pf0, rhs_pf0; + + float4 results[4]; + for (int i=0; i < 4; i++) { + results[i].x = results[i].y = results[i].z = results[i].w = 0; + } + + +#define prefetch_lhs(reg, row, col) \ + if (!CHECK_LHS_BOUNDARY) { \ + if (col < k_size) { \ + reg =lhs.loadPacket(row, col); \ + } \ + } else { \ + if (col < k_size) { \ + if (row + 3 < m_size) { \ + reg =lhs.loadPacket(row, col); \ + } else if (row + 2 < m_size) { \ + reg.x =lhs(row + 0, col); \ + reg.y =lhs(row + 1, col); \ + reg.z =lhs(row + 2, col); \ + } else if (row + 1 < m_size) { \ + reg.x =lhs(row + 0, col); \ + reg.y =lhs(row + 1, col); \ + } else if (row < m_size) { \ + reg.x =lhs(row + 0, col); \ + } \ + } \ + } \ + + + Index lhs_vert = base_m+threadIdx.x*4; + + for (Index k = 0; k < k_size; k += 16) { + lhs_pf0 = internal::pset1(0); + rhs_pf0 = internal::pset1(0); + + Index lhs_horiz = threadIdx.y+k; + prefetch_lhs(lhs_pf0, lhs_vert, lhs_horiz) + + Index rhs_vert = k+(threadIdx.x%4)*4; + Index rhs_horiz0 = (threadIdx.x>>2)+threadIdx.y*4+base_n; + + if (!CHECK_RHS_BOUNDARY) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + } else if (rhs_vert + 2 < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + } else if (rhs_vert + 1 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + } + } else { + if (rhs_horiz0 < n_size) { + if ((rhs_vert + 3) < k_size) { + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + } else if ((rhs_vert + 2) < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + } else if ((rhs_vert + 1) < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + } + } + } + float x1, x2 ; + // the following can be a bitwise operation..... some day. + if((threadIdx.x%8) < 4) { + x1 = rhs_pf0.y; + x2 = rhs_pf0.w; + } else { + x1 = rhs_pf0.x; + x2 = rhs_pf0.z; + } + x1 = __shfl_xor(x1, 4); + x2 = __shfl_xor(x2, 4); + if((threadIdx.x%8) < 4) { + rhs_pf0.y = x1; + rhs_pf0.w = x2; + } else { + rhs_pf0.x = x1; + rhs_pf0.z = x2; + } + + // We have 64 features. + // Row 0 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 0, 1. + // Row 1 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 2, 3. + // ... + // Row 31 -> times (0, 4, 8, 12, 1, 5, 9, 13) for features 62, 63 + // Row 32 -> times (2, 6, 10, 14, 3, 7, 11, 15) for features 0, 1 + // ... + rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2][threadIdx.x%8] = make_float2(rhs_pf0.x, rhs_pf0.y); + rhs_shmem2[(threadIdx.x>>3)+ threadIdx.y*2+32][threadIdx.x%8] = make_float2(rhs_pf0.z, rhs_pf0.w); + + // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) + // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) + // ... + // Row 15 (time 15) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) + // Row 16 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) + // ... + + lhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(lhs_pf0.x, lhs_pf0.y); + lhs_shmem2[threadIdx.y+16][threadIdx.x] = make_float2(lhs_pf0.z, lhs_pf0.w); + + +#define add_vals(fl1, fl2, fr1, fr2)\ + results[0].x += fl1.x * fr1.x;\ + results[0].y += fl1.y * fr1.x;\ + results[0].z += fl2.x * fr1.x;\ + results[0].w += fl2.y * fr1.x;\ +\ + results[1].x += fl1.x * fr1.y;\ + results[1].y += fl1.y * fr1.y;\ + results[1].z += fl2.x * fr1.y;\ + results[1].w += fl2.y * fr1.y;\ +\ + results[2].x += fl1.x * fr2.x;\ + results[2].y += fl1.y * fr2.x;\ + results[2].z += fl2.x * fr2.x;\ + results[2].w += fl2.y * fr2.x;\ +\ + results[3].x += fl1.x * fr2.y;\ + results[3].y += fl1.y * fr2.y;\ + results[3].z += fl2.x * fr2.y;\ + results[3].w += fl2.y * fr2.y;\ + + __syncthreads(); + + // Do the multiplies. + #pragma unroll + for (int koff = 0; koff < 16; koff ++) { + // 32 x threads. + float2 fl1 = lhs_shmem2[koff][threadIdx.x]; + float2 fl2 = lhs_shmem2[koff + 16][threadIdx.x]; + + int start_feature = threadIdx.y * 4; + float2 fr1 = rhs_shmem2[(start_feature>>1) + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; + float2 fr2 = rhs_shmem2[(start_feature>>1) + 1 + 32*((koff%4)/2)][koff/4 + (koff%2)*4]; + + add_vals(fl1, fl2, fr1, fr2) + } + __syncthreads(); + } + +#undef prefetch_lhs +#undef add_vals + + Index horiz_base = threadIdx.y*4+base_n; + if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (!CHECK_RHS_BOUNDARY) { + // CHECK LHS + if (lhs_vert + 3 < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (lhs_vert + 2 < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + } + } else if (lhs_vert + 1 < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + } + } else if (lhs_vert < m_size) { + for (int i = 0; i < 4; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + } + } + } else if (!CHECK_LHS_BOUNDARY) { + // CHECK RHS + /* + int ncols_rem = fminf(n_size- horiz_base, 4); + for (int i = 0; i < ncols_rem; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + }*/ + for (int i = 0; i < 4; i++) { + if (horiz_base+i < n_size) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } else { + // CHECK both boundaries. + for (int i = 0; i < 4; i++) { + if (horiz_base+i < n_size) { + if (lhs_vert < m_size) + output(lhs_vert, horiz_base + i) = results[i].x; + if (lhs_vert + 1 < m_size) + output(lhs_vert + 1, horiz_base + i) = results[i].y; + if (lhs_vert + 2 < m_size) + output(lhs_vert + 2, horiz_base + i) = results[i].z; + if (lhs_vert + 3 < m_size) + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } +} + + +template +__device__ EIGEN_STRONG_INLINE void +EigenFloatContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, float2 lhs_shmem2[][32], + float2 rhs_shmem2[][8], const Index m_size, + const Index n_size, const Index k_size, + const Index base_m, const Index base_n) { + typedef float Scalar; + + // prefetch registers + float4 lhs_pf0, lhs_pf1, lhs_pf2, lhs_pf3; + float4 rhs_pf0, rhs_pf1; + + float4 results[8]; + for (int i=0; i < 8; i++) { + results[i].x = results[i].y = results[i].z = results[i].w = 0; + } + + + Index lhs_vert = base_m+threadIdx.x*4+(threadIdx.y%4)*32; + for (Index k = 0; k < k_size; k += 32) { + lhs_pf0 = internal::pset1(0); + lhs_pf1 = internal::pset1(0); + lhs_pf2 = internal::pset1(0); + lhs_pf3 = internal::pset1(0); + + rhs_pf0 = internal::pset1(0); + rhs_pf1 = internal::pset1(0); + + if (!CHECK_LHS_BOUNDARY) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + } + } else { + // just CHECK_LHS_BOUNDARY + if (lhs_vert + 3 < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + lhs_pf3 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + lhs_pf2 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + lhs_pf1 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0 =lhs.loadPacket(lhs_vert, (threadIdx.y/4+k)); + } + } else if (lhs_vert + 2 < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); + lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); + lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); + lhs_pf3.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + lhs_pf2.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf1.z =lhs(lhs_vert + 2, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf0.z =lhs(lhs_vert + 2, (threadIdx.y/4+k)); + } + } else if (lhs_vert + 1 < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); + lhs_pf3.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf2.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf1.y =lhs(lhs_vert + 1, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf0.y =lhs(lhs_vert + 1, (threadIdx.y/4+k)); + } + } else if (lhs_vert < m_size) { + if ((threadIdx.y/4+k+24) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + lhs_pf3.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+24)); + } else if ((threadIdx.y/4+k+16) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + lhs_pf2.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+16)); + } else if ((threadIdx.y/4+k+8) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + lhs_pf1.x =lhs(lhs_vert + 0, (threadIdx.y/4+k+8)); + } else if ((threadIdx.y/4+k) < k_size) { + lhs_pf0.x =lhs(lhs_vert + 0, (threadIdx.y/4+k)); + } + } + } + __syncthreads(); + Index rhs_vert = k+threadIdx.x*4; + Index rhs_horiz0 = threadIdx.y*2+base_n; + Index rhs_horiz1 = threadIdx.y*2+1+base_n; + if (!CHECK_RHS_BOUNDARY) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); + } else if (rhs_vert + 2 < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); + } else if (rhs_vert + 1 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + } + } else { + if (rhs_horiz1 < n_size) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + rhs_pf1 = rhs.loadPacket(rhs_vert, rhs_horiz1); + } else if (rhs_vert + 2 < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + rhs_pf1.z = rhs(rhs_vert + 2, rhs_horiz1); + } else if (k+threadIdx.x*4 + 1 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + rhs_pf1.y = rhs(rhs_vert + 1, rhs_horiz1); + } else if (k+threadIdx.x*4 < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf1.x = rhs(rhs_vert, rhs_horiz1); + } + } else if (rhs_horiz0 < n_size) { + if ((rhs_vert + 3) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0 = rhs.loadPacket(rhs_vert, rhs_horiz0); + } else if ((rhs_vert + 2) < k_size) { + // just CHECK_RHS_BOUNDARY + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + rhs_pf0.z = rhs(rhs_vert + 2, rhs_horiz0); + } else if ((rhs_vert + 1) < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + rhs_pf0.y = rhs(rhs_vert + 1, rhs_horiz0); + } else if (rhs_vert < k_size) { + rhs_pf0.x = rhs(rhs_vert, rhs_horiz0); + } + } + } + __syncthreads(); + // Loaded. Do computation + // Row 0 -> times (0, 4, 8, .. 28) for features 0, 1. + // Row 1 -> times (0, 4, 8, .. 28) for features 2, 3. + // .. + // Row 31 -> times (0, 4, 8, .. 28) for features 62, 63 + rhs_shmem2[threadIdx.y][threadIdx.x] = make_float2(rhs_pf0.x, rhs_pf1.x); + // Row 32 -> times (1, 5, 9, .. 29) for features 0, 1. + // Row 33 -> times (1, 5, 9, .. 29) for features 2, 3. + // .. + rhs_shmem2[threadIdx.y+32][threadIdx.x] = make_float2(rhs_pf0.y, rhs_pf1.y); + // Row 64 -> times (2, 6, 10, .. 30) for features 0, 1. + // Row 65 -> times (2, 6, 10, .. 30) for features 2, 3. + rhs_shmem2[threadIdx.y+64][threadIdx.x] = make_float2(rhs_pf0.z, rhs_pf1.z); + // Row 96 -> times (3, 7, 11, .. 31) for features 0, 1. + // Row 97 -> times (3, 7, 11, .. 31) for features 2, 3. + rhs_shmem2[threadIdx.y+96][threadIdx.x] = make_float2(rhs_pf0.w, rhs_pf1.w); + + // LHS. + // Row 0 (time 0) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) + // Row 1 (time 1) -> features (0, 1), (4, 5), .. (28, 29), (32, 33), .. (60, 61) .. (124, 125) + // ... + // Row 8 (time 0) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) + // Row 15 (time 7) -> features (2, 3), (6, 7), .. (30, 31), (34, 35), .. (62, 63) .. (126, 127) + + +#define add_vals(a_feat1, a_feat2, f1, f2, f3, f4)\ + results[0].x += a_feat1.x * f1.x;\ + results[1].x += a_feat1.x * f1.y;\ + results[2].x += a_feat1.x * f2.x;\ + results[3].x += a_feat1.x * f2.y;\ + results[4].x += a_feat1.x * f3.x;\ + results[5].x += a_feat1.x * f3.y;\ + results[6].x += a_feat1.x * f4.x;\ + results[7].x += a_feat1.x * f4.y;\ +\ + results[0].y += a_feat1.y * f1.x;\ + results[1].y += a_feat1.y * f1.y;\ + results[2].y += a_feat1.y * f2.x;\ + results[3].y += a_feat1.y * f2.y;\ + results[4].y += a_feat1.y * f3.x;\ + results[5].y += a_feat1.y * f3.y;\ + results[6].y += a_feat1.y * f4.x;\ + results[7].y += a_feat1.y * f4.y;\ +\ + results[0].z += a_feat2.x * f1.x;\ + results[1].z += a_feat2.x * f1.y;\ + results[2].z += a_feat2.x * f2.x;\ + results[3].z += a_feat2.x * f2.y;\ + results[4].z += a_feat2.x * f3.x;\ + results[5].z += a_feat2.x * f3.y;\ + results[6].z += a_feat2.x * f4.x;\ + results[7].z += a_feat2.x * f4.y;\ +\ + results[0].w += a_feat2.y * f1.x;\ + results[1].w += a_feat2.y * f1.y;\ + results[2].w += a_feat2.y * f2.x;\ + results[3].w += a_feat2.y * f2.y;\ + results[4].w += a_feat2.y * f3.x;\ + results[5].w += a_feat2.y * f3.y;\ + results[6].w += a_feat2.y * f4.x;\ + results[7].w += a_feat2.y * f4.y;\ + + lhs_shmem2[threadIdx.y/4][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.x, lhs_pf0.y); + lhs_shmem2[threadIdx.y/4+8][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.x, lhs_pf1.y); + lhs_shmem2[threadIdx.y/4+16][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.x, lhs_pf2.y); + lhs_shmem2[threadIdx.y/4+24][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.x, lhs_pf3.y); + + lhs_shmem2[threadIdx.y/4 + 32][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf0.z, lhs_pf0.w); + lhs_shmem2[threadIdx.y/4 + 40][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf1.z, lhs_pf1.w); + lhs_shmem2[threadIdx.y/4 + 48][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf2.z, lhs_pf2.w); + lhs_shmem2[threadIdx.y/4 + 56][threadIdx.x+(threadIdx.y%4)*8] = make_float2(lhs_pf3.z, lhs_pf3.w); + + __syncthreads(); + + // Do the multiplies. + #pragma unroll + for (int koff = 0; koff < 32; koff ++) { + float2 a3 = lhs_shmem2[koff][threadIdx.x + (threadIdx.y % 4) * 8]; + float2 a4 = lhs_shmem2[koff + 32][threadIdx.x + (threadIdx.y % 4) * 8]; + + // first feature is at (threadIdx.y/4) * 8 last is at start + 8. + int start_feature = (threadIdx.y / 4) * 8; + + float2 br1 = rhs_shmem2[start_feature/2 + (koff % 4) * 32][koff/4]; + float2 br2 = rhs_shmem2[start_feature/2 + 1 + (koff % 4) * 32][koff/4]; + float2 br3 = rhs_shmem2[start_feature/2 + 2 + (koff % 4) * 32][koff/4]; + float2 br4 = rhs_shmem2[start_feature/2 + 3 + (koff % 4) * 32][koff/4]; + + add_vals(a3, a4, br1, br2, br3, br4) + } + __syncthreads(); + } // end loop over k + + + __syncthreads(); + Index horiz_base = (threadIdx.y/4)*8+base_n; + if (!CHECK_LHS_BOUNDARY && !CHECK_RHS_BOUNDARY) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (!CHECK_RHS_BOUNDARY) { + if (lhs_vert + 3 < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } else if (lhs_vert + 2 < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + } + } else if (lhs_vert + 1 < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + } + } else if (lhs_vert < m_size) { + for (int i = 0; i < 8; i++) { + output(lhs_vert, horiz_base + i) = results[i].x; + } + } + } else if (!CHECK_LHS_BOUNDARY) { + // CHECK BOUNDARY_B + for (int i = 0; i < 8; i++) { + if (horiz_base + i < n_size) { + output(lhs_vert, horiz_base + i) = results[i].x; + output(lhs_vert + 1, horiz_base + i) = results[i].y; + output(lhs_vert + 2, horiz_base + i) = results[i].z; + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } else { + // CHECK both boundaries. + for (int i = 0; i < 8; i++) { + if (horiz_base + i < n_size) { + if (lhs_vert < m_size) + output(lhs_vert, horiz_base + i) = results[i].x; + if (lhs_vert + 1 < m_size) + output(lhs_vert + 1, horiz_base + i) = results[i].y; + if (lhs_vert + 2 < m_size) + output(lhs_vert + 2, horiz_base + i) = results[i].z; + if (lhs_vert + 3 < m_size) + output(lhs_vert + 3, horiz_base + i) = results[i].w; + } + } + } +} + + +template +__global__ void +__launch_bounds__(256) +EigenFloatContractionKernel(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, + const Index m_size, const Index n_size, const Index k_size) { + __shared__ float2 lhs_shmem[64*32]; + __shared__ float2 rhs_shmem[128*8]; + + typedef float2 LHS_MEM[64][32]; + typedef float2 RHS_MEM[128][8]; + + typedef float2 LHS_MEM16x16[32][16]; + typedef float2 RHS_MEM16x16[64][8]; + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 128 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + bool check_rhs = (base_n + 63) >= n_size; + bool check_lhs128 = (base_m + 127) >= m_size; + + if (!check_rhs) { + if (!check_lhs128) { + // >= 128 rows left + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } + } else { + if (!check_lhs128) { + // >= 128 rows left + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal( + lhs, rhs, output, *((LHS_MEM *) lhs_shmem), *((RHS_MEM *) rhs_shmem), m_size, n_size, k_size, base_m, base_n); + } + } +} + +template +__global__ void +__launch_bounds__(256) +EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs, + const OutputMapper output, + const Index m_size, const Index n_size, const Index k_size) { + __shared__ float2 lhs_shmem[32][16]; + __shared__ float2 rhs_shmem[64][8]; + + const Index m_block_idx = blockIdx.x; + const Index n_block_idx = blockIdx.y; + + const Index base_m = 64 * m_block_idx; + const Index base_n = 64 * n_block_idx; + + if (base_m + 63 < m_size) { + if (base_n + 63 < n_size) { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } + } else { + if (base_n + 63 < n_size) { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } else { + EigenFloatContractionKernelInternal16x16(lhs, rhs, output, lhs_shmem, rhs_shmem, m_size, n_size, k_size, base_m, base_n); + } + } +} + + +template +struct TensorEvaluator, GpuDevice> : + public TensorContractionEvaluatorBase, GpuDevice> > { + + typedef GpuDevice Device; + + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + Layout = TensorEvaluator::Layout, + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + + typedef array left_dim_mapper_t; + typedef array right_dim_mapper_t; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef DSizes Dimensions; + + // typedefs needed in evalTo + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + + typedef typename LeftEvaluator::Dimensions LeftDimensions; + typedef typename RightEvaluator::Dimensions RightDimensions; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) : + Base(op, device) {} + + // We need to redefine this method to make nvcc happy + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + this->m_leftImpl.evalSubExprsIfNeeded(NULL); + this->m_rightImpl.evalSubExprsIfNeeded(NULL); + if (data) { + evalTo(data); + return false; + } else { + this->m_result = static_cast(this->m_device.allocate(this->dimensions().TotalSize() * sizeof(Scalar))); + evalTo(this->m_result); + return true; + } + } + + void evalTo(Scalar* buffer) const { + if (this->m_lhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + } + else { + if (this->m_rhs_inner_dim_contiguous) { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + else { + if (this->m_rhs_inner_dim_reordered) { + evalTyped(buffer); + } + else { + evalTyped(buffer); + } + } + } + } + + template struct LaunchKernels { + static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) { + const Index m_blocks = (m + 63) / 64; + const Index n_blocks = (n + 63) / 64; + const dim3 num_blocks(m_blocks, n_blocks, 1); + const dim3 block_size(8, 8, 8); + LAUNCH_CUDA_KERNEL((EigenContractionKernel), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); + } + }; + + template struct LaunchKernels { + static void Run(const LhsMapper& lhs, const RhsMapper& rhs, const OutputMapper& output, Index m, Index n, Index k, const GpuDevice& device) { + if (m < 768 || n < 768) { + const Index m_blocks = (m + 63) / 64; + const Index n_blocks = (n + 63) / 64; + const dim3 num_blocks(m_blocks, n_blocks, 1); + const dim3 block_size(16, 16, 1); + LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel16x16), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); + } else { + const Index m_blocks = (m + 127) / 128; + const Index n_blocks = (n + 63) / 64; + const dim3 num_blocks(m_blocks, n_blocks, 1); + const dim3 block_size(8, 32, 1); + LAUNCH_CUDA_KERNEL((EigenFloatContractionKernel), num_blocks, block_size, 0, device, lhs, rhs, output, m, n, k); + } + } + }; + + template + void evalTyped(Scalar* buffer) const { + // columns in left side, rows in right side + const Index k = this->m_k_size; + EIGEN_UNUSED_VARIABLE(k) + + // rows in left side + const Index m = this->m_i_size; + + // columns in right side + const Index n = this->m_j_size; + + // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) + this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); + + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + typedef internal::blas_data_mapper OutputMapper; + + + // initialize data mappers + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, + this->m_left_contracting_strides, this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, + this->m_right_contracting_strides, this->m_k_strides); + + OutputMapper output(buffer, m); + + setCudaSharedMemConfig(cudaSharedMemBankSizeEightByte); + LaunchKernels::Run(lhs, rhs, output, m, n, k, this->m_device); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_USE_GPU and __CUDACC__ +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_CUDA_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h new file mode 100644 index 0000000..9b2cb3f --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -0,0 +1,467 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H + +namespace Eigen { + +namespace internal { + +enum { + Rhs = 0, + Lhs = 1 +}; + +/* + * Implementation of the Eigen blas_data_mapper class for tensors. + */ + +template struct CoeffLoader { + enum { + DirectOffsets = false + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_tensor(tensor) { } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index) { + eigen_assert(false && "unsupported"); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return m_tensor.coeff(index); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename Tensor::PacketReturnType packet(typename Tensor::Index index) const + { + return m_tensor.template packet(index); + } + + + private: + const Tensor m_tensor; +}; + +template struct CoeffLoader { + enum { + DirectOffsets = true + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffLoader(const Tensor& tensor) : m_data(tensor.data()) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { + m_data += offset; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE typename Tensor::Scalar coeff(typename Tensor::Index index) const { return loadConstant(m_data+index); } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename Tensor::PacketReturnType packet(typename Tensor::Index index) const + { + return internal::ploadt_ro(m_data + index); + } + private: + typedef typename Tensor::Scalar Scalar; + const Scalar* m_data; +}; + +template +class SimpleTensorContractionMapper { + public: + EIGEN_DEVICE_FUNC + SimpleTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + m_tensor(tensor), + m_nocontract_strides(nocontract_strides), + m_ij_strides(ij_strides), + m_contract_strides(contract_strides), + m_k_strides(k_strides) { } + + enum { + DirectOffsets = CoeffLoader::DirectOffsets + }; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void offsetBuffer(typename Tensor::Index offset) { + m_tensor.offsetBuffer(offset); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE void prefetch(Index /*i*/) { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar operator()(Index row) const { + // column major assumption + return operator()(row, 0); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar operator()(Index row, Index col) const { + return m_tensor.coeff(computeIndex(row, col)); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index computeIndex(Index row, Index col) const { + const bool left = (side == Lhs); + Index nocontract_val = left ? row : col; + Index linidx = 0; + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx = nocontract_val / m_ij_strides[i]; + linidx += idx * m_nocontract_strides[i]; + nocontract_val -= idx * m_ij_strides[i]; + } + if (array_size::value > array_size::value) { + if (side == Lhs && inner_dim_contiguous) { + eigen_assert(m_nocontract_strides[0] == 1); + linidx += nocontract_val; + } else { + linidx += nocontract_val * m_nocontract_strides[0]; + } + } + + Index contract_val = left ? col : row; + if(array_size::value > 0) { + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx = contract_val / m_k_strides[i]; + linidx += idx * m_contract_strides[i]; + contract_val -= idx * m_k_strides[i]; + } + + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx += contract_val; + } else { + linidx += contract_val * m_contract_strides[0]; + } + } + + return linidx; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE IndexPair computeIndexPair(Index row, Index col, const Index distance) const { + const bool left = (side == Lhs); + Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; + Index linidx[2] = {0, 0}; + if (array_size::value > array_size::value) { + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx0 = nocontract_val[0] / m_ij_strides[i]; + const Index idx1 = nocontract_val[1] / m_ij_strides[i]; + linidx[0] += idx0 * m_nocontract_strides[i]; + linidx[1] += idx1 * m_nocontract_strides[i]; + nocontract_val[0] -= idx0 * m_ij_strides[i]; + nocontract_val[1] -= idx1 * m_ij_strides[i]; + } + if (side == Lhs && inner_dim_contiguous) { + eigen_assert(m_nocontract_strides[0] == 1); + linidx[0] += nocontract_val[0]; + linidx[1] += nocontract_val[1]; + } else { + linidx[0] += nocontract_val[0] * m_nocontract_strides[0]; + linidx[1] += nocontract_val[1] * m_nocontract_strides[0]; + } + } + + Index contract_val[2] = {left ? col : row, left ? col : row + distance}; + if (array_size::value> 0) { + for (int i = static_cast(array_size::value) - 1; i > 0; i--) { + const Index idx0 = contract_val[0] / m_k_strides[i]; + const Index idx1 = contract_val[1] / m_k_strides[i]; + linidx[0] += idx0 * m_contract_strides[i]; + linidx[1] += idx1 * m_contract_strides[i]; + contract_val[0] -= idx0 * m_k_strides[i]; + contract_val[1] -= idx1 * m_k_strides[i]; + } + + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx[0] += contract_val[0]; + linidx[1] += contract_val[1]; + } else { + linidx[0] += contract_val[0] * m_contract_strides[0]; + linidx[1] += contract_val[1] * m_contract_strides[0]; + } + } + return IndexPair(linidx[0], linidx[1]); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index firstAligned(Index size) const { + // Only claim alignment when we can compute the actual stride (ie when we're + // dealing with the lhs with inner_dim_contiguous. This is because the + // matrix-vector product relies on the stride when dealing with aligned inputs. + return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size; + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const { + return ((side == Lhs) && inner_dim_contiguous && array_size::value > 0) ? m_contract_strides[0] : 1; + } + + protected: + CoeffLoader m_tensor; + const nocontract_t m_nocontract_strides; + const nocontract_t m_ij_strides; + const contract_t m_contract_strides; + const contract_t m_k_strides; +}; + + +template +class BaseTensorContractionMapper : public SimpleTensorContractionMapper +{ + public: + typedef SimpleTensorContractionMapper ParentMapper; + + EIGEN_DEVICE_FUNC + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + typedef typename Tensor::PacketReturnType Packet; + typedef typename unpacket_traits::half HalfPacket; + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { + // whole method makes column major assumption + + // don't need to add offsets for now (because operator handles that) + // current code assumes packet size must be a multiple of 2 + EIGEN_STATIC_ASSERT(packet_size % 2 == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + + if (Tensor::PacketAccess && inner_dim_contiguous && !inner_dim_reordered) { + const Index index = this->computeIndex(i, j); + eigen_assert(this->computeIndex(i+packet_size-1, j) == index + packet_size-1); + return this->m_tensor.template packet(index); + } + + const IndexPair indexPair = this->computeIndexPair(i, j, packet_size - 1); + const Index first = indexPair.first; + const Index last = indexPair.second; + + // We can always do optimized packet reads from left hand side right now, because + // the vertical matrix dimension on the left hand side is never contracting. + // On the right hand side we need to check if the contracting dimensions may have + // been shuffled first. + if (Tensor::PacketAccess && + (side == Lhs || internal::array_size::value <= 1 || !inner_dim_reordered) && + (last - first) == (packet_size - 1)) { + + return this->m_tensor.template packet(first); + } + + EIGEN_ALIGN_MAX Scalar data[packet_size]; + + data[0] = this->m_tensor.coeff(first); + for (Index k = 1; k < packet_size - 1; k += 2) { + const IndexPair internal_pair = this->computeIndexPair(i + k, j, 1); + data[k] = this->m_tensor.coeff(internal_pair.first); + data[k + 1] = this->m_tensor.coeff(internal_pair.second); + } + data[packet_size - 1] = this->m_tensor.coeff(last); + + return pload(data); + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { + // whole method makes column major assumption + + // don't need to add offsets for now (because operator handles that) + const Index half_packet_size = unpacket_traits::size; + if (half_packet_size == packet_size) { + return loadPacket(i, j); + } + EIGEN_ALIGN_MAX Scalar data[half_packet_size]; + for (Index k = 0; k < half_packet_size; k++) { + data[k] = operator()(i + k, j); + } + return pload(data); + } +}; + + +template +class BaseTensorContractionMapper : public SimpleTensorContractionMapper +{ + public: + typedef SimpleTensorContractionMapper ParentMapper; + + EIGEN_DEVICE_FUNC + BaseTensorContractionMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) : + ParentMapper(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + typedef typename Tensor::PacketReturnType Packet; + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Packet loadPacket(Index i, Index j) const { + EIGEN_ALIGN_MAX Scalar data[1]; + data[0] = this->m_tensor.coeff(this->computeIndex(i, j)); + return pload(data); + } + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Packet loadHalfPacket(Index i, Index j) const { + return loadPacket(i, j); + } +}; + + +template +class TensorContractionSubMapper { + public: + typedef typename Tensor::PacketReturnType Packet; + typedef typename unpacket_traits::half HalfPacket; + + typedef BaseTensorContractionMapper ParentMapper; + typedef TensorContractionSubMapper Self; + typedef Self LinearMapper; + + enum { + // We can use direct offsets iff the parent mapper supports then and we can compute the strides. + // TODO: we should also enable direct offsets for the Rhs case. + UseDirectOffsets = ParentMapper::DirectOffsets && (side == Lhs) && inner_dim_contiguous && (array_size::value > 0) + }; + + EIGEN_DEVICE_FUNC TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper), m_vert_offset(vert_offset), m_horiz_offset(horiz_offset) { + // Bake the offsets into the buffer used by the base mapper whenever possible. This avoids the need to recompute + // this offset every time we attempt to access a coefficient. + if (UseDirectOffsets) { + Index stride = m_base_mapper.stride(); + m_base_mapper.offsetBuffer(vert_offset + horiz_offset * stride); + } + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + if (UseDirectOffsets) { + return m_base_mapper(i, 0); + } + return m_base_mapper(i + m_vert_offset, m_horiz_offset); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { + if (UseDirectOffsets) { + return m_base_mapper(i, j); + } + return m_base_mapper(i + m_vert_offset, j + m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket(i, 0); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, m_horiz_offset); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket(i, j); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, j + m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { + if (UseDirectOffsets) { + return m_base_mapper.template loadHalfPacket(i, 0); + } + return m_base_mapper.template loadHalfPacket(i + m_vert_offset, m_horiz_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, Packet p) const { + if (UseDirectOffsets) { + m_base_mapper.storePacket(i, 0, p); + } + m_base_mapper.storePacket(i + m_vert_offset, m_horiz_offset, p); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + if (UseDirectOffsets) { + return LinearMapper(m_base_mapper, i, j); + } + return LinearMapper(m_base_mapper, i + m_vert_offset, j + m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i) const { + EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MADE_A_PROGRAMMING_MISTAKE); + const int ActualAlignment = (AlignmentType == Aligned) && (Alignment == Aligned) ? Aligned : Unaligned; + if (UseDirectOffsets) { + return m_base_mapper.template loadPacket(i, 0); + } + return m_base_mapper.template loadPacket(i + m_vert_offset, m_horiz_offset); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool aligned(Index) const { + return false; + } + + private: + ParentMapper m_base_mapper; + const Index m_vert_offset; + const Index m_horiz_offset; +}; + + +template +class TensorContractionInputMapper + : public BaseTensorContractionMapper { + + public: + typedef Scalar_ Scalar; + typedef BaseTensorContractionMapper Base; + typedef TensorContractionSubMapper SubMapper; + typedef SubMapper VectorMapper; + + EIGEN_DEVICE_FUNC TensorContractionInputMapper(const Tensor& tensor, + const nocontract_t& nocontract_strides, + const nocontract_t& ij_strides, + const contract_t& contract_strides, + const contract_t& k_strides) + : Base(tensor, nocontract_strides, ij_strides, contract_strides, k_strides) { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { + return SubMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { + return VectorMapper(*this, i, j); + } +}; + + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_MAPPER_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h new file mode 100644 index 0000000..ee16cde --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h @@ -0,0 +1,1052 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H + +// evaluator for thread pool device +#ifdef EIGEN_USE_THREADS + +namespace Eigen { + +#ifdef EIGEN_USE_SIMPLE_THREAD_POOL +namespace internal { + +template +struct packLhsArg { + LhsScalar* blockA; + const LhsMapper& lhs; + const Index m_start; + const Index k_start; + const Index mc; + const Index kc; +}; + +template +struct packRhsAndKernelArg { + const MaxSizeVector* blockAs; + RhsScalar* blockB; + const RhsMapper& rhs; + OutputMapper& output; + const Index m; + const Index k; + const Index n; + const Index mc; + const Index kc; + const Index nc; + const Index num_threads; + const Index num_blockAs; + const Index max_m; + const Index k_block_idx; + const Index m_block_idx; + const Index n_block_idx; + const Index m_blocks; + const Index n_blocks; + MaxSizeVector* kernel_notifications; + const MaxSizeVector* lhs_notifications; + const bool need_to_pack; +}; + +} // end namespace internal +#endif // EIGEN_USE_SIMPLE_THREAD_POOL + +template +struct TensorEvaluator, ThreadPoolDevice> : + public TensorContractionEvaluatorBase, ThreadPoolDevice> > { + + typedef ThreadPoolDevice Device; + + typedef TensorEvaluator, Device> Self; + typedef TensorContractionEvaluatorBase Base; + + typedef TensorContractionOp XprType; + typedef typename internal::remove_const::type Scalar; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + enum { + Layout = TensorEvaluator::Layout, + }; + + // Most of the code is assuming that both input tensors are ColMajor. If the + // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS: + // If we want to compute A * B = C, where A is LHS and B is RHS, the code + // will pretend B is LHS and A is RHS. + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType; + typedef typename internal::conditional< + static_cast(Layout) == static_cast(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType; + + static const int LDims = + internal::array_size::Dimensions>::value; + static const int RDims = + internal::array_size::Dimensions>::value; + static const int ContractDims = internal::array_size::value; + + typedef array left_dim_mapper_t; + typedef array right_dim_mapper_t; + + typedef array contract_t; + typedef array left_nocontract_t; + typedef array right_nocontract_t; + + static const int NumDims = LDims + RDims - 2 * ContractDims; + + typedef DSizes Dimensions; + + // typedefs needed in evalTo + typedef typename internal::remove_const::type LhsScalar; + typedef typename internal::remove_const::type RhsScalar; + typedef typename internal::gebp_traits Traits; + + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + + TensorEvaluator(const XprType& op, const Device& device) : + Base(op, device) {} + +#ifndef EIGEN_USE_SIMPLE_THREAD_POOL + template + void evalProduct(Scalar* buffer) const { + typedef + typename internal::remove_const::type + LhsScalar; + typedef + typename internal::remove_const::type + RhsScalar; + typedef typename internal::gebp_traits Traits; + typedef TensorEvaluator LeftEvaluator; + typedef TensorEvaluator RightEvaluator; + typedef internal::TensorContractionInputMapper< + LhsScalar, Index, internal::Lhs, LeftEvaluator, left_nocontract_t, + contract_t, internal::packet_traits::size, + lhs_inner_dim_contiguous, false, Unaligned> + LhsMapper; + typedef internal::TensorContractionInputMapper< + RhsScalar, Index, internal::Rhs, RightEvaluator, right_nocontract_t, + contract_t, internal::packet_traits::size, + rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Unaligned> + RhsMapper; + typedef internal::blas_data_mapper OutputMapper; + typedef internal::gemm_pack_lhs + LhsPacker; + typedef internal::gemm_pack_rhs< + RhsScalar, Index, typename RhsMapper::SubMapper, Traits::nr, ColMajor> + RhsPacker; + typedef internal::gebp_kernel + GebpKernel; + + const Index m = this->m_i_size; + const Index n = this->m_j_size; + const Index k = this->m_k_size; + if (m == 0 || n == 0 || k == 0) return; + + // Compute a set of algorithm parameters: + // - kernel block sizes (bm, bn, bk) + // - task grain sizes (number of kernels executed per task: gm, gn) + // - number of threads + // - sharding by row/column + // - parallel packing or first lhs then rhs + // and some derived parameters: + // - number of tasks (nm, nn, nk) + // - number of kernels (nm0, nn0) + // Unfortunately, all these parameters are tightly interdependent. + // So in some cases we first compute approximate values, then compute other + // values based on these approximations and then refine the approximations. + + // There are lots of heuristics here. There is some reasoning behind them, + // but ultimately they are just tuned on contraction benchmarks for + // different input configurations, thread counts and instruction sets. + // So feel free to question any of them. + + // Compute whether we want to shard by row or by column. + // This is a first approximation, it will be refined later. Since we don't + // know number of threads yet we use 2, because what's we are most + // interested in at this point is whether it makes sense to use + // parallelization at all or not. + bool shard_by_col = shardByCol(m, n, 2); + + // First approximation of kernel blocking sizes. + // Again, we don't know number of threads yet, so we use 2. + Index bm, bn, bk; + if (shard_by_col) { + internal::TensorContractionBlocking + blocking(k, m, n, 2); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } else { + internal::TensorContractionBlocking + blocking(k, m, n, 2); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } + + // Compute optimal number of threads. + // Note: we use bk instead of k here because we are interested in amount of + // _parallelizable_ computations, and computations are not parallelizable + // across k dimension. + const TensorOpCost cost = + contractionCost(m, n, bm, bn, bk, shard_by_col, false); + int num_threads = TensorCostModel::numThreads( + static_cast(n) * m, cost, this->m_device.numThreads()); + + // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost + // model is not tuned. Remove this when the cost model is tuned. + if (n == 1) num_threads = 1; + + if (num_threads == 1) { + // The single-threaded algorithm should be faster in this case. + if (n == 1) + this->template evalGemv(buffer); + else + this->template evalGemm(buffer); + return; + } + + // Now that we know number of threads, recalculate sharding and blocking. + shard_by_col = shardByCol(m, n, num_threads); + if (shard_by_col) { + internal::TensorContractionBlocking + blocking(k, m, n, num_threads); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } else { + internal::TensorContractionBlocking + blocking(k, m, n, num_threads); + bm = blocking.mc(); + bn = blocking.nc(); + bk = blocking.kc(); + } + + // Number of kernels for each dimension. + Index nm0 = divup(m, bm); + Index nn0 = divup(n, bn); + Index nk = divup(k, bk); + + // Calculate task grain size (number of kernels executed per task). + // This task size coarsening serves two purposes: + // 1. It reduces per-task overheads including synchronization overheads. + // 2. It allows to use caches better (reuse the same packed rhs in several + // consecutive kernels). + Index gm = 1; + Index gn = 1; + // If we are sharding by column, then we prefer to reduce rows first. + if (shard_by_col) { + gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); + gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); + } else { + gn = coarsenN(m, n, bm, bn, bk, gm, num_threads, shard_by_col); + gm = coarsenM(m, n, bm, bn, bk, gn, num_threads, shard_by_col); + } + // Number of tasks in each dimension. + Index nm = divup(nm0, gm); + Index nn = divup(nn0, gn); + + // Last by not least, decide whether we want to issue both lhs and rhs + // packing in parallel; or issue lhs packing first, and then issue rhs + // packing when lhs packing completes (for !shard_by_col lhs and rhs are + // swapped). Parallel packing allows more parallelism (for both packing and + // kernels), while sequential packing provides better locality (once + // a thread finishes rhs packing it proceed to kernels with that rhs). + // First, we are interested in parallel packing if there are few tasks. + bool parallel_pack = num_threads >= nm * nn; + // Also do parallel packing if all data fits into L2$. + if (m * bk * Index(sizeof(LhsScalar)) + n * bk * Index(sizeof(RhsScalar)) <= + l2CacheSize() * num_threads) + parallel_pack = true; + // But don't do it if we will use each rhs only once. Locality seems to be + // more important in this case. + if ((shard_by_col ? nm : nn) == 1) parallel_pack = false; + + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, + this->m_i_strides, this->m_left_contracting_strides, + this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, + this->m_j_strides, this->m_right_contracting_strides, + this->m_k_strides); + + Context(this->m_device, num_threads, lhs, rhs, buffer, m, n, + k, bm, bn, bk, nm, nn, nk, gm, gn, nm0, nn0, + shard_by_col, parallel_pack) + .run(); + } + + // Context coordinates a single parallel gemm operation. + template + class Context { + public: + Context(const Device& device, int num_threads, LhsMapper& lhs, + RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm, + Index bn, Index bk, Index nm, Index nn, Index nk, Index gm, + Index gn, Index nm0, Index nn0, bool shard_by_col, + bool parallel_pack) + : device_(device), + lhs_(lhs), + rhs_(rhs), + buffer_(buffer), + output_(buffer, tm), + num_threads_(num_threads), + shard_by_col_(shard_by_col), + parallel_pack_(parallel_pack), + m_(tm), + n_(tn), + k_(tk), + bm_(bm), + bn_(bn), + bk_(bk), + nm_(nm), + nn_(nn), + nk_(nk), + gm_(gm), + gn_(gn), + nm0_(nm0), + nn0_(nn0) + { + for (Index x = 0; x < P; x++) { + // Normal number of notifications for k slice switch is + // nm_ + nn_ + nm_ * nn_. However, first P - 1 slices will receive only + // nm_ + nn_ notifications, because they will not receive notifications + // from preceeding kernels. + state_switch_[x] = + x == 0 + ? 1 + : (parallel_pack_ ? nn_ + nm_ : (shard_by_col_ ? nn_ : nm_)) + + (x == P - 1 ? nm_ * nn_ : 0); + state_packing_ready_[x] = + parallel_pack_ ? 0 : (shard_by_col_ ? nm_ : nn_); + state_kernel_[x] = new std::atomic*[nm_]; + for (Index m = 0; m < nm_; m++) { + state_kernel_[x][m] = new std::atomic[nn_]; + // Kernels generally receive 3 notifications (previous kernel + 2 + // packing), but the first slice won't get notifications from previous + // kernels. + for (Index n = 0; n < nn_; n++) + state_kernel_[x][m][n].store( + (x == 0 ? 0 : 1) + (parallel_pack_ ? 2 : 1), + std::memory_order_relaxed); + } + } + + // Allocate memory for packed rhs/lhs matrices. + size_t align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1); + size_t lhs_size = + divup(bm_ * bk_ * sizeof(LhsScalar), align) * align; + size_t rhs_size = + divup(bn_ * bk_ * sizeof(RhsScalar), align) * align; + packed_mem_ = static_cast(internal::aligned_malloc( + (nm0_ * lhs_size + nn0_ * rhs_size) * std::min(nk_, P - 1))); + char* mem = static_cast(packed_mem_); + for (Index x = 0; x < numext::mini(nk_, P - 1); x++) { + packed_lhs_[x].resize(nm0_); + for (Index m = 0; m < nm0_; m++) { + packed_lhs_[x][m] = reinterpret_cast(mem); + mem += lhs_size; + } + packed_rhs_[x].resize(nn0_); + for (Index n = 0; n < nn0_; n++) { + packed_rhs_[x][n] = reinterpret_cast(mem); + mem += rhs_size; + } + } + } + + ~Context() { + for (Index x = 0; x < P; x++) { + for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m]; + delete[] state_kernel_[x]; + } + internal::aligned_free(packed_mem_); + } + + void run() { + // Kick off packing of the first slice. + signal_switch(0, 1); + // Wait for overall completion. + // TODO(dvyukov): this wait can lead to deadlock. + // If nthreads contractions are concurrently submitted from worker + // threads, this wait will block all worker threads and the system will + // deadlock. + done_.Wait(); + } + + private: + Notification done_; + const Device& device_; + LhsMapper& lhs_; + RhsMapper& rhs_; + Scalar* const buffer_; + OutputMapper output_; + const int num_threads_; + const bool shard_by_col_; + const bool parallel_pack_; + // Matrix sizes. + const Index m_; + const Index n_; + const Index k_; + // Block sizes. + const Index bm_; + const Index bn_; + const Index bk_; + // Number of tasks. + const Index nm_; + const Index nn_; + const Index nk_; + // Task grain sizes (number of kernels executed per task). + const Index gm_; + const Index gn_; + // Number of blocks (this is different from ni_/nn_ because of task size + // coarsening). + const Index nm0_; + const Index nn0_; + + // Parallelization strategy. + // + // Blocks related to the same k block can run in parallel because they write + // to different output blocks. So we parallelize within k slices, this + // gives us parallelism level of m x n. Before we can start any kernels + // related to k-th slice, we need to issue m lhs packing tasks and n rhs + // packing tasks. + // + // However, there is a bottleneck when we are finishing kernels for k-th + // slice (at the very end there is only 1 runnable kernel). To mitigate this + // bottleneck we allow kernels from k-th and k+1-th slices to run in + // parallel. Note that (m, n, k) and (m, n, k+1) kernels write to the same + // output block, so they must not run in parallel. + // + // This gives us the following dependency graph. + // On each k slice we have m x n kernel tasks, m lhs paking tasks and n rhs + // packing tasks. + // Kernel (m, n, k) can start when: + // - kernel (m, n, k-1) has finished + // - lhs packing (m, k) has finished + // - rhs packing (n, k) has finished + // Lhs/rhs packing can start when: + // - all k-1 packing has finished (artificially imposed to limit amount of + // parallel packing) + // + // On top of that we limit runnable tasks to two consecutive k slices. + // This is done to limit amount of memory we need for packed lhs/rhs + // (for each k slice we need m*bk + n*bk memory in packed_lhs_/packed_rhs_). + // + // state_switch_ tracks when we are ready to switch to the next k slice. + // state_kernel_[m][n] tracks when we are ready to kick off kernel (m, n). + // These variable are rolling over 3 consecutive k slices: first two we are + // actively executing + one to track completion of kernels in the second + // slice. + static const Index P = 3; + void* packed_mem_; + std::vector packed_lhs_[P - 1]; + std::vector packed_rhs_[P - 1]; + std::atomic** state_kernel_[P]; + // state_switch_ is frequently modified by worker threads, while other + // fields are read-only after constructor. Let's move it to a separate cache + // line to reduce cache-coherency traffic. + char pad_[128]; + std::atomic state_packing_ready_[P]; + std::atomic state_switch_[P]; + + void pack_lhs(Index m, Index k) { + const Index mend = m * gm_ + gm(m); + for (Index m1 = m * gm_; m1 < mend; m1++) + LhsPacker()(packed_lhs_[k % (P - 1)][m1], + lhs_.getSubMapper(m1 * bm_, k * bk_), bk(k), bm(m1)); + + if (!parallel_pack_ && shard_by_col_) { + signal_packing(k); + } else { + signal_switch(k + 1); + for (Index n = nn_ - 1; n >= 0; n--) signal_kernel(m, n, k, n == 0); + } + } + + void pack_rhs(Index n, Index k) { + const Index nend = n * gn_ + gn(n); + for (Index n1 = n * gn_; n1 < nend; n1++) { + if (k == 0) { + // Zero the output memory in parallel. + // On 10000x2x10000 mm zeroing can easily take half of time. + // Zero (bn x m) row. Safe to do here because all kernels that will + // write to this memory depend on completion of this task. + // Note: don't call device_.memset() here. device_.memset() blocks on + // thread pool worker thread, which can lead to underutilization and + // deadlocks. + memset(buffer_ + n1 * bn_ * m_, 0, bn(n1) * m_ * sizeof(Scalar)); + } + RhsPacker()(packed_rhs_[k % (P - 1)][n1], + rhs_.getSubMapper(k * bk_, n1 * bn_), bk(k), bn(n1)); + } + + if (parallel_pack_ || shard_by_col_) { + signal_switch(k + 1); + for (Index m = nm_ - 1; m >= 0; m--) signal_kernel(m, n, k, m == 0); + } else { + signal_packing(k); + } + } + + void kernel(Index m, Index n, Index k) { + // Note: order of iteration matters here. Iteration over m is innermost + // because we want to reuse the same packed rhs in consequetive tasks + // (rhs fits into L2$ while lhs only into L3$). + const Index nend = n * gn_ + gn(n); + const Index mend = m * gm_ + gm(m); + if (shard_by_col_) { + for (Index n1 = n * gn_; n1 < nend; n1++) { + for (Index m1 = m * gm_; m1 < mend; m1++) + GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_), + packed_lhs_[k % (P - 1)][m1], + packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1), + Scalar(1), -1, -1, 0, 0); + } + } else { + for (Index m1 = m * gm_; m1 < mend; m1++) + for (Index n1 = n * gn_; n1 < nend; n1++) { + GebpKernel()(output_.getSubMapper(m1 * bm_, n1 * bn_), + packed_lhs_[k % (P - 1)][m1], + packed_rhs_[k % (P - 1)][n1], bm(m1), bk(k), bn(n1), + Scalar(1), -1, -1, 0, 0); + } + } + signal_kernel(m, n, k + 1, false); + signal_switch(k + 2); + } + + void signal_packing(Index k) { + eigen_assert(!parallel_pack_); + Index s = state_packing_ready_[k % P].fetch_sub(1); + eigen_assert(s > 0); + if (s != 1) return; + state_packing_ready_[k % P] = shard_by_col_ ? nm_ : nn_; + enqueue_packing(k, shard_by_col_); + } + + void signal_kernel(Index m, Index n, Index k, bool sync) { + std::atomic* state = &state_kernel_[k % P][m][n]; + Index s = state->load(); + eigen_assert(s > 0); + if (s != 1 && state->fetch_sub(1) != 1) return; + state->store(parallel_pack_ ? 3 : 2, std::memory_order_relaxed); + if (sync) + kernel(m, n, k); + else + device_.enqueueNoNotification([=]() { kernel(m, n, k); }); + } + + void signal_switch(Index k, Index v = 1) { + Index s = state_switch_[k % P].fetch_sub(v); + eigen_assert(s >= v); + if (s != v) return; + + // Ready to switch to the next k slice. + // Reset counter for the next iteration. + state_switch_[k % P] = + (parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)) + + nm_ * nn_; + if (k < nk_) { + // Issue lhs/rhs packing. Their completion will in turn kick off + // kernels. + if (parallel_pack_) { + enqueue_packing(k, !shard_by_col_); + enqueue_packing(k, shard_by_col_); + } else if (shard_by_col_) { + enqueue_packing(k, false); + } else { + enqueue_packing(k, true); + } + + // Termination handling. + // Because kernel completion signals k + 2 switch, we need to finish nk + // + 2 slices without issuing any tasks on nk + 1 slice. So here we + // pretend that all nk + 1 packing tasks just finish instantly; so that + // nk + 2 switch only waits for completion of nk kernels. + } else if (k == nk_) { + signal_switch(k + 1, + parallel_pack_ ? nm_ + nn_ : (shard_by_col_ ? nn_ : nm_)); + } else { + done_.Notify(); + } + } + + // Enqueue all rhs/lhs packing for k-th slice. + void enqueue_packing(Index k, bool rhs) { + enqueue_packing_helper(0, rhs ? nn_ : nm_, k, rhs); + } + + void enqueue_packing_helper(Index start, Index end, Index k, bool rhs) { + if (end - start == 1) { + if (rhs) + pack_rhs(start, k); + else + pack_lhs(start, k); + } else { + Index mid = (start + end) / 2; + device_.enqueueNoNotification( + [=]() { enqueue_packing_helper(mid, end, k, rhs); }); + device_.enqueueNoNotification( + [=]() { enqueue_packing_helper(start, mid, k, rhs); }); + } + } + + // Block sizes with accounting for potentially incomplete last block. + Index bm(Index m) const { return m + 1 < nm0_ ? bm_ : m_ + bm_ - bm_ * nm0_; } + Index bn(Index n) const { return n + 1 < nn0_ ? bn_ : n_ + bn_ - bn_ * nn0_; } + Index bk(Index k) const { return k + 1 < nk_ ? bk_ : k_ + bk_ - bk_ * nk_; } + // Task grain sizes accounting for potentially incomplete last task. + Index gm(Index m) const { return m + 1 < nm_ ? gm_ : nm0_ + gm_ - gm_ * nm_; } + Index gn(Index n) const { return n + 1 < nn_ ? gn_ : nn0_ + gn_ - gn_ * nn_; } + + Context(const Context&) = delete; + void operator=(const Context&) = delete; + }; + + // Decide whether we want to shard m x n contraction by columns or by rows. + static bool shardByCol(Index m, Index n, Index num_threads) { + // Note: we are comparing both n and m against Traits::nr, it is not + // a mistake. We are trying to figure out how both n and m will fit into + // the main sharding dimension. + + // Sharding by column is the default + // ... unless there is enough data for vectorization over rows + if (m / num_threads >= Traits::nr && + // and not enough data for vectorization over columns + (n / num_threads < Traits::nr || + // ... or barely enough data for vectorization over columns, + // but it is not evenly dividable across threads + (n / num_threads < 4 * Traits::nr && + (n % (num_threads * Traits::nr)) != 0 && + // ... and it is evenly dividable across threads for rows + ((m % (num_threads * Traits::nr)) == 0 || + // .. or it is not evenly dividable for both dimensions but + // there is much more data over rows so that corner effects are + // mitigated. + (m / n >= 6))))) + return false; + // Wait, or if matrices are just substantially prolonged over the other + // dimension. + if (n / num_threads < 16 * Traits::nr && m > n * 32) return false; + return true; + } + + Index coarsenM(Index m, Index n, Index bm, Index bn, Index bk, Index gn, + int num_threads, bool shard_by_col) const { + Index gm = 1; + Index gm1 = 1; + Index nm0 = divup(m, bm); + Index nm1 = nm0; + for (;;) { + // Find the next candidate for m grain size. It needs to result in + // different number of blocks. E.g. if we have 10 kernels, we want to try + // 5 and 10, but not 6, 7, 8 and 9. + while (gm1 <= nm0 && nm1 == divup(nm0, gm1)) gm1++; + if (gm1 > nm0) break; + // Check the candidate. + int res = checkGrain(m, n, bm, bn, bk, gm1, gn, gm, gn, num_threads, + shard_by_col); + if (res < 0) break; + nm1 = divup(nm0, gm1); + if (res == 0) continue; + // Commit new grain size. + gm = gm1; + } + return gm; + } + + Index coarsenN(Index m, Index n, Index bm, Index bn, Index bk, Index gm, + int num_threads, bool shard_by_col) const { + Index gn = 1; + Index gn1 = 1; + Index nn0 = divup(n, bn); + Index nn1 = nn0; + for (;;) { + while (gn1 <= nn0 && nn1 == divup(nn0, gn1)) gn1++; + if (gn1 > nn0) break; + int res = checkGrain(m, n, bm, bn, bk, gm, gn1, gm, gn, num_threads, + shard_by_col); + if (res < 0) break; + nn1 = divup(nn0, gn1); + if (res == 0) continue; + gn = gn1; + } + return gn; + } + + // checkGrain checks whether grain (gm, gn) is suitable and is better than + // (oldgm, oldgn). + int checkGrain(Index m, Index n, Index bm, Index bn, Index bk, Index gm, + Index gn, Index oldgm, Index oldgn, int num_threads, + bool shard_by_col) const { + const TensorOpCost cost = + contractionCost(bm * gm, bn * gn, bm, bn, bk, shard_by_col, true); + double taskSize = TensorCostModel::taskSize( + static_cast(bm) * gm * bn * gn, cost); + // If the task is too small, then we agree on it regardless of anything + // else. Otherwise synchronization overheads will dominate. + if (taskSize < 1) return 1; + // If it is too large, then we reject it and all larger tasks. + if (taskSize > 2) return -1; + // Now we are in presumably good task size range. + // The main deciding factor here is parallelism. Consider that we have 12 + // kernels and 4 threads. Grains of 2, 3 and 4 all yield good task sizes. + // But 2/4 yield 6/3 tasks, which gives us parallelism of 0.75 (at most 3/4 + // of cores will be busy). While grain size 3 gives us 4 tasks, which gives + // us parallelism of 1 (we can load all cores). + Index nm0 = divup(m, bm); + Index nn0 = divup(n, bn); + Index new_tasks = divup(nm0, gm) * divup(nn0, gn); + double new_parallelism = static_cast(new_tasks) / + (divup(new_tasks, num_threads) * num_threads); + Index old_tasks = divup(nm0, oldgm) * divup(nn0, oldgn); + double old_parallelism = static_cast(old_tasks) / + (divup(old_tasks, num_threads) * num_threads); + if (new_parallelism > old_parallelism || new_parallelism == 1) return 1; + return 0; + } + +#else // EIGEN_USE_SIMPLE_THREAD_POOL + + template + void evalProduct(Scalar* buffer) const { + if (this->m_j_size == 1) { + this->template evalGemv(buffer); + return; + } + + evalGemm(buffer); + } + + template + void evalGemm(Scalar* buffer) const { + // columns in left side, rows in right side + const Index k = this->m_k_size; + + // rows in left side + const Index m = this->m_i_size; + + // columns in right side + const Index n = this->m_j_size; + + // zero out the result buffer (which must be of size at least m * n * sizeof(Scalar) + this->m_device.memset(buffer, 0, m * n * sizeof(Scalar)); + + + const int lhs_packet_size = internal::unpacket_traits::size; + const int rhs_packet_size = internal::unpacket_traits::size; + + typedef internal::TensorContractionInputMapper LhsMapper; + + typedef internal::TensorContractionInputMapper RhsMapper; + + typedef internal::blas_data_mapper OutputMapper; + + // TODO: packing could be faster sometimes if we supported row major tensor mappers + typedef internal::gemm_pack_lhs LhsPacker; + typedef internal::gemm_pack_rhs RhsPacker; + + // TODO: replace false, false with conjugate values? + typedef internal::gebp_kernel GebpKernel; + + typedef internal::packLhsArg packLArg; + typedef internal::packRhsAndKernelArg packRKArg; + + // initialize data mappers + LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides, + this->m_left_contracting_strides, this->m_k_strides); + + RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides, + this->m_right_contracting_strides, this->m_k_strides); + + OutputMapper output(buffer, m); + + // compute block sizes (which depend on number of threads) + const Index num_threads = this->m_device.numThreads(); + internal::TensorContractionBlocking blocking(k, m, n, num_threads); + Index mc = blocking.mc(); + Index nc = blocking.nc(); + Index kc = blocking.kc(); + eigen_assert(mc <= m); + eigen_assert(nc <= n); + eigen_assert(kc <= k); + +#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) + const Index k_blocks = CEIL_DIV(k, kc); + const Index n_blocks = CEIL_DIV(n, nc); + const Index m_blocks = CEIL_DIV(m, mc); + const Index sizeA = mc * kc; + const Index sizeB = kc * nc; + + /* cout << "m: " << m << " n: " << n << " k: " << k << endl; + cout << "mc: " << mc << " nc: " << nc << " kc: " << kc << endl; + cout << "m_blocks: " << m_blocks << " n_blocks: " << n_blocks << " k_blocks: " << k_blocks << endl; + cout << "num threads: " << num_threads << endl; + */ + + // note: m_device.allocate should return 16 byte aligned pointers, but if blockA and blockB + // aren't 16 byte aligned segfaults will happen due to SIMD instructions + // note: You can get away with allocating just a single blockA and offsets and meet the + // the alignment requirements with the assumption that + // (Traits::mr * sizeof(ResScalar)) % 16 == 0 + const Index numBlockAs = numext::mini(num_threads, m_blocks); + MaxSizeVector blockAs(num_threads); + for (int i = 0; i < num_threads; i++) { + blockAs.push_back(static_cast(this->m_device.allocate(sizeA * sizeof(LhsScalar)))); + } + + // To circumvent alignment issues, I'm just going to separately allocate the memory for each thread + // TODO: is this too much memory to allocate? This simplifies coding a lot, but is wasteful. + // Other options: (1) reuse memory when a thread finishes. con: tricky + // (2) allocate block B memory in each thread. con: overhead + MaxSizeVector blockBs(n_blocks); + for (int i = 0; i < n_blocks; i++) { + blockBs.push_back(static_cast(this->m_device.allocate(sizeB * sizeof(RhsScalar)))); + } + + // lhs_notifications starts with all null Notifications + MaxSizeVector lhs_notifications(num_threads, nullptr); + + // this should really be numBlockAs * n_blocks; + const Index num_kernel_notifications = num_threads * n_blocks; + MaxSizeVector kernel_notifications(num_kernel_notifications, + nullptr); + + for (Index k_block_idx = 0; k_block_idx < k_blocks; k_block_idx++) { + const Index k_start = k_block_idx * kc; + // make sure we don't overshoot right edge of left matrix + const Index actual_kc = numext::mini(k_start + kc, k) - k_start; + + for (Index m_block_idx = 0; m_block_idx < m_blocks; m_block_idx += numBlockAs) { + const Index num_blocks = numext::mini(m_blocks-m_block_idx, numBlockAs); + + for (Index mt_block_idx = m_block_idx; mt_block_idx < m_block_idx+num_blocks; mt_block_idx++) { + const Index m_start = mt_block_idx * mc; + const Index actual_mc = numext::mini(m_start + mc, m) - m_start; + eigen_assert(actual_mc > 0); + + Index blockAId = (k_block_idx * m_blocks + mt_block_idx) % num_threads; + + for (int i = 0; i < n_blocks; ++i) { + Index notification_id = (blockAId * n_blocks + i); + // Wait for any current kernels using this slot to complete + // before using it. + if (kernel_notifications[notification_id]) { + wait_until_ready(kernel_notifications[notification_id]); + delete kernel_notifications[notification_id]; + } + kernel_notifications[notification_id] = new Notification(); + } + const packLArg arg = { + blockAs[blockAId], // blockA + lhs, // lhs + m_start, // m + k_start, // k + actual_mc, // mc + actual_kc, // kc + }; + + // Delete any existing notification since we may be + // replacing it. The algorithm should ensure that there are + // no existing waiters on this notification. + delete lhs_notifications[blockAId]; + lhs_notifications[blockAId] = + this->m_device.enqueue(&Self::packLhs, arg); + } + + // now start kernels. + const Index m_base_start = m_block_idx * mc; + const bool need_to_pack = m_block_idx == 0; + + for (Index n_block_idx = 0; n_block_idx < n_blocks; n_block_idx++) { + const Index n_start = n_block_idx * nc; + const Index actual_nc = numext::mini(n_start + nc, n) - n_start; + + // first make sure the previous kernels are all done before overwriting rhs. Also wait if + // we're going to start new k. In both cases need_to_pack is true. + if (need_to_pack) { + for (Index i = num_blocks; i < num_threads; ++i) { + Index blockAId = (k_block_idx * m_blocks + i + m_block_idx) % num_threads; + Index future_id = (blockAId * n_blocks + n_block_idx); + wait_until_ready(kernel_notifications[future_id]); + } + } + + packRKArg arg = { + &blockAs, // blockA + blockBs[n_block_idx], // blockB + rhs, // rhs + output, // output + m_base_start, // m + k_start, // k + n_start, // n + mc, // mc + actual_kc, // kc + actual_nc, // nc + num_threads, + numBlockAs, + m, + k_block_idx, + m_block_idx, + n_block_idx, // n_block_idx + m_blocks, // m_blocks + n_blocks, // n_blocks + &kernel_notifications, // kernel notifications + &lhs_notifications, // lhs notifications + need_to_pack, // need_to_pack + }; + + // We asynchronously kick off this function, which ends up + // notifying the appropriate kernel_notifications objects, + // which this thread waits on before exiting. + this->m_device.enqueueNoNotification(&Self::packRhsAndKernel, arg); + } + } + } + + // Make sure all the kernels are done. + for (size_t i = 0; i < kernel_notifications.size(); ++i) { + wait_until_ready(kernel_notifications[i]); + delete kernel_notifications[i]; + } + + // No need to wait for lhs notifications since they should have + // already been waited on. Just clean them up. + for (size_t i = 0; i < lhs_notifications.size(); ++i) { + delete lhs_notifications[i]; + } + + // deallocate all of the memory for both A and B's + for (size_t i = 0; i < blockAs.size(); i++) { + this->m_device.deallocate(blockAs[i]); + } + for (size_t i = 0; i < blockBs.size(); i++) { + this->m_device.deallocate(blockBs[i]); + } + +#undef CEIL_DIV + } + + /* + * Packs a LHS block of size (mt, kc) starting at lhs(m, k). Before packing + * the LHS block, check that all of the kernels that worked on the same + * mt_block_idx in the previous m_block are done. + */ + template + static void packLhs(const packLArg arg) { + // perform actual packing + LhsPacker pack_lhs; + pack_lhs(arg.blockA, arg.lhs.getSubMapper(arg.m_start, arg.k_start), arg.kc, arg.mc); + } + + /* + * Packs a RHS block of size (kc, nc) starting at (k, n) after checking that + * all kernels in the previous block are done. + * Then for each LHS future, we wait on the future and then call GEBP + * on the area packed by the future (which starts at + * blockA + future_idx * mt * kc) on the LHS and with the full packed + * RHS block. + * The output of this GEBP is written to output(m + i * mt, n). + */ + template + static void packRhsAndKernel(packRKArg arg) { + if (arg.need_to_pack) { + RhsPacker pack_rhs; + pack_rhs(arg.blockB, arg.rhs.getSubMapper(arg.k, arg.n), arg.kc, arg.nc); + } + + GebpKernel gebp; + for (Index mt_block_idx = 0; mt_block_idx < arg.num_blockAs; mt_block_idx++) { + const Index m_base_start = arg.m + arg.mc*mt_block_idx; + if (m_base_start < arg.max_m) { + Index blockAId = (arg.k_block_idx * arg.m_blocks + mt_block_idx + arg.m_block_idx) % arg.num_threads; + wait_until_ready((*arg.lhs_notifications)[blockAId]); + const Index actual_mc = numext::mini(m_base_start + arg.mc, arg.max_m) - m_base_start; + gebp(arg.output.getSubMapper(m_base_start, arg.n), + (*arg.blockAs)[blockAId], arg.blockB, + actual_mc, arg.kc, arg.nc, Scalar(1), -1, -1, 0, 0); + + // Notify that the kernel is done. + const Index set_idx = blockAId * arg.n_blocks + arg.n_block_idx; + (*arg.kernel_notifications)[set_idx]->Notify(); + } + } + } +#endif // EIGEN_USE_SIMPLE_THREAD_POOL + + TensorOpCost contractionCost(Index m, Index n, Index bm, Index bn, Index bk, + bool shard_by_col, bool prepacked) const { + const int packed_size = std::min(PacketType::size, + PacketType::size); + const int output_packet_size = internal::unpacket_traits::size; + const double kd = static_cast(bk); + // Peak VFMA bandwidth is 0.5. However if we have not enough data for + // vectorization bandwidth drops. The 4.0 and 2.0 bandwidth is determined + // experimentally. + double computeBandwidth = bk == 1 ? 4.0 : + (shard_by_col ? bn : bm) < Traits::nr || + (shard_by_col ? bm : bn) < Traits::mr ? 2.0 : 0.5; +#ifndef EIGEN_VECTORIZE_FMA + // Bandwidth of all of VFMA/MULPS/ADDPS is 0.5 on latest Intel processors. + // However for MULPS/ADDPS we have dependent sequence of 2 such instructions, + // so overall bandwidth is 1.0. + if (computeBandwidth == 0.5) computeBandwidth = 1.0; +#endif + // Computations. + TensorOpCost cost = TensorOpCost(0, 0, kd * computeBandwidth, true, packed_size); + // Output stores. + cost += TensorOpCost(0, sizeof(CoeffReturnType), 0, true, output_packet_size); + if (prepacked) { + // Packing and kernels are executed in different tasks. When we calculate + // task grain size we look only at kernel cost assuming that kernel + // is more expensive than packing. + return cost; + } + // Lhs/rhs loads + computations. + TensorOpCost lhsCost = this->m_leftImpl.costPerCoeff(true) * (kd / n); + TensorOpCost rhsCost = this->m_rightImpl.costPerCoeff(true) * (kd / m); + // Lhs packing memory cost does not contribute considerably to overall + // execution time because lhs is prefetched early and accessed sequentially. + if (shard_by_col) + lhsCost.dropMemoryCost(); + else + rhsCost.dropMemoryCost(); + return cost + lhsCost + rhsCost; + } +}; + +} // end namespace Eigen + +#endif // EIGEN_USE_THREADS +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_THREAD_POOL_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h new file mode 100644 index 0000000..860a694 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConversion.h @@ -0,0 +1,279 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H + +namespace Eigen { + +/** \class TensorConversionOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor conversion class. This class makes it possible to vectorize + * type casting operations when the number of scalars per packet in the source + * and the destination type differ + */ +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef TargetType Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + enum { Flags = 0 }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConversionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConversionOp type; +}; + +} // end namespace internal + + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + return internal::pcast(m_impl.template packet(index)); + } + + private: + const TensorEvaluator& m_impl; +}; + + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + + SrcPacket src1 = m_impl.template packet(index); + SrcPacket src2 = m_impl.template packet(index + SrcPacketSize); + TgtPacket result = internal::pcast(src1, src2); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + + SrcPacket src1 = m_impl.template packet(index); + SrcPacket src2 = m_impl.template packet(index + SrcPacketSize); + SrcPacket src3 = m_impl.template packet(index + 2 * SrcPacketSize); + SrcPacket src4 = m_impl.template packet(index + 3 * SrcPacketSize); + TgtPacket result = internal::pcast(src1, src2, src3, src4); + return result; + } + + private: + const TensorEvaluator& m_impl; +}; + +template +struct PacketConverter { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketConverter(const TensorEvaluator& impl) + : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const { + const int SrcPacketSize = internal::unpacket_traits::size; + // Only call m_impl.packet() when we have direct access to the underlying data. This + // ensures that we don't compute the subexpression twice. We may however load some + // coefficients twice, but in practice this doesn't negatively impact performance. + if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) { + // Force unaligned memory loads since we can't ensure alignment anymore + return internal::pcast(m_impl.template packet(index)); + } else { + const int TgtPacketSize = internal::unpacket_traits::size; + typedef typename internal::unpacket_traits::type SrcType; + typedef typename internal::unpacket_traits::type TgtType; + internal::scalar_cast_op converter; + EIGEN_ALIGN_MAX typename internal::unpacket_traits::type values[TgtPacketSize]; + for (int i = 0; i < TgtPacketSize; ++i) { + values[i] = converter(m_impl.coeff(index+i)); + } + TgtPacket rslt = internal::pload(values); + return rslt; + } + } + + private: + const TensorEvaluator& m_impl; + const typename TensorEvaluator::Index m_maxIndex; +}; + +template +class TensorConversionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::nested::type Nested; + typedef Scalar CoeffReturnType; + typedef typename NumTraits::Real RealScalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType& xpr) + : m_xpr(xpr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + +template struct ConversionSubExprEval { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar*) { + impl.evalSubExprsIfNeeded(NULL); + return true; + } +}; + +template struct ConversionSubExprEval { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(Eval& impl, Scalar* data) { + return impl.evalSubExprsIfNeeded(data); + } +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorConversionOp XprType; + typedef typename XprType::Index Index; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef TargetType Scalar; + typedef TargetType CoeffReturnType; + typedef typename internal::remove_all::Scalar>::type SrcType; + typedef typename PacketType::type PacketReturnType; + typedef typename PacketType::type PacketSourceType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = true, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) + { + return ConversionSubExprEval::value, TensorEvaluator, Scalar>::run(m_impl, data); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() + { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + internal::scalar_cast_op converter; + return converter(m_impl.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const bool Vectorizable = TensorEvaluator::PacketAccess & + internal::type_casting_traits::VectorizedCast; + return PacketConv::run(m_impl, index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double cast_cost = TensorOpCost::CastCost(); + if (vectorized) { + const double SrcCoeffRatio = + internal::type_casting_traits::SrcCoeffRatio; + const double TgtCoeffRatio = + internal::type_casting_traits::TgtCoeffRatio; + return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) + + TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize)); + } else { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost); + } + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + template + struct PacketConv { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator& impl, Index index) { + internal::scalar_cast_op converter; + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = converter(impl.coeff(index+i)); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + }; + + template + struct PacketConv { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType run(const TensorEvaluator& impl, Index index) { + const int SrcCoeffRatio = internal::type_casting_traits::SrcCoeffRatio; + const int TgtCoeffRatio = internal::type_casting_traits::TgtCoeffRatio; + PacketConverter, PacketSourceType, PacketReturnType, + SrcCoeffRatio, TgtCoeffRatio> converter(impl); + return converter.template packet(index); + } + }; + + TensorEvaluator m_impl; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h new file mode 100644 index 0000000..abdf742 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -0,0 +1,1104 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H + +namespace Eigen { + +/** \class TensorConvolution + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor convolution class. + * + * + */ +namespace internal { + +template +class IndexMapper { + public: + IndexMapper(const InputDims& input_dims, const array& kernel_dims, + const array& indices) { + + array dimensions = input_dims; + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = indices[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + dimensions[index] = result_dim; + } + + array inputStrides; + array outputStrides; + if (static_cast(Layout) == static_cast(ColMajor)) { + inputStrides[0] = 1; + outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + inputStrides[i] = inputStrides[i-1] * input_dims[i-1]; + outputStrides[i] = outputStrides[i-1] * dimensions[i-1]; + } + } else { + inputStrides[NumDims - 1] = 1; + outputStrides[NumDims - 1] = 1; + for (int i = static_cast(NumDims) - 2; i >= 0; --i) { + inputStrides[i] = inputStrides[i + 1] * input_dims[i + 1]; + outputStrides[i] = outputStrides[i + 1] * dimensions[i + 1]; + } + } + + array cudaInputDimensions; + array cudaOutputDimensions; + array tmp = dimensions; + array ordering; + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = i + offset; + ordering[index] = indices[i]; + tmp[indices[i]] = -1; + cudaInputDimensions[index] = input_dims[indices[i]]; + cudaOutputDimensions[index] = dimensions[indices[i]]; + } + + int written = static_cast(Layout) == static_cast(ColMajor) + ? NumKernelDims + : 0; + for (int i = 0; i < NumDims; ++i) { + if (tmp[i] >= 0) { + ordering[written] = i; + cudaInputDimensions[written] = input_dims[i]; + cudaOutputDimensions[written] = dimensions[i]; + ++written; + } + } + + for (int i = 0; i < NumDims; ++i) { + m_inputStrides[i] = inputStrides[ordering[i]]; + m_outputStrides[i] = outputStrides[ordering[i]]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + if (i > NumKernelDims) { + m_cudaInputStrides[i] = + m_cudaInputStrides[i - 1] * cudaInputDimensions[i - 1]; + m_cudaOutputStrides[i] = + m_cudaOutputStrides[i - 1] * cudaOutputDimensions[i - 1]; + } else { + m_cudaInputStrides[i] = 1; + m_cudaOutputStrides[i] = 1; + } + } + } else { + for (int i = NumDims - 1; i >= 0; --i) { + if (i + 1 < offset) { + m_cudaInputStrides[i] = + m_cudaInputStrides[i + 1] * cudaInputDimensions[i + 1]; + m_cudaOutputStrides[i] = + m_cudaOutputStrides[i + 1] * cudaOutputDimensions[i + 1]; + } else { + m_cudaInputStrides[i] = 1; + m_cudaOutputStrides[i] = 1; + } + } + } + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputPlaneToTensorInputOffset(Index p) const { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int d = NumDims - 1; d > NumKernelDims; --d) { + const Index idx = p / m_cudaInputStrides[d]; + inputIndex += idx * m_inputStrides[d]; + p -= idx * m_cudaInputStrides[d]; + } + inputIndex += p * m_inputStrides[NumKernelDims]; + } else { + std::ptrdiff_t limit = 0; + if (NumKernelDims < NumDims) { + limit = NumDims - NumKernelDims - 1; + } + for (int d = 0; d < limit; ++d) { + const Index idx = p / m_cudaInputStrides[d]; + inputIndex += idx * m_inputStrides[d]; + p -= idx * m_cudaInputStrides[d]; + } + inputIndex += p * m_inputStrides[limit]; + } + return inputIndex; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputPlaneToTensorOutputOffset(Index p) const { + Index outputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int d = NumDims - 1; d > NumKernelDims; --d) { + const Index idx = p / m_cudaOutputStrides[d]; + outputIndex += idx * m_outputStrides[d]; + p -= idx * m_cudaOutputStrides[d]; + } + outputIndex += p * m_outputStrides[NumKernelDims]; + } else { + std::ptrdiff_t limit = 0; + if (NumKernelDims < NumDims) { + limit = NumDims - NumKernelDims - 1; + } + for (int d = 0; d < limit; ++d) { + const Index idx = p / m_cudaOutputStrides[d]; + outputIndex += idx * m_outputStrides[d]; + p -= idx * m_cudaOutputStrides[d]; + } + outputIndex += p * m_outputStrides[limit]; + } + return outputIndex; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaInputKernelToTensorInputOffset(Index i, Index j, Index k) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_inputStrides[offset] + j * m_inputStrides[offset + 1] + + k * m_inputStrides[offset + 2]; + } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Index mapCudaOutputKernelToTensorOutputOffset(Index i, Index j, Index k) const { + const size_t offset = static_cast(Layout) == static_cast(ColMajor) + ? 0 + : NumDims - NumKernelDims; + return i * m_outputStrides[offset] + j * m_outputStrides[offset + 1] + + k * m_outputStrides[offset + 2]; + } + + private: + static const int NumDims = internal::array_size::value; + array m_inputStrides; + array m_outputStrides; + array m_cudaInputStrides; + array m_cudaOutputStrides; +}; + + + +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename promote_storage_type::ret Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename InputXprType::Nested LhsNested; + typedef typename KernelXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorConvolutionOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorConvolutionOp type; +}; + +} // end namespace internal + + + +template +class TensorConvolutionOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims) + : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Indices& indices() const { return m_indices; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& + inputExpression() const { return m_input_xpr; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const typename internal::remove_all::type& + kernelExpression() const { return m_kernel_xpr; } + + protected: + typename InputXprType::Nested m_input_xpr; + typename KernelXprType::Nested m_kernel_xpr; + const Indices m_indices; +}; + + +template +struct TensorEvaluator, Device> +{ + typedef TensorConvolutionOp XprType; + + static const int NumDims = internal::array_size::Dimensions>::value; + static const int NumKernelDims = internal::array_size::value; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + + const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); + const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStride[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1]; + } + } else { + m_inputStride[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1]; + } + } + + m_dimensions = m_inputImpl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + if (i > 0) { + m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1]; + } else { + m_kernelStride[0] = 1; + } + m_indexStride[i] = m_inputStride[index]; + } + + m_outputStride[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1]; + } + } else { + for (int i = NumKernelDims - 1; i >= 0; --i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + if (i < NumKernelDims - 1) { + m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1]; + } else { + m_kernelStride[NumKernelDims - 1] = 1; + } + m_indexStride[i] = m_inputStride[index]; + } + + m_outputStride[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + m_inputImpl.evalSubExprsIfNeeded(NULL); + preloadKernel(); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_inputImpl.cleanup(); + if (m_local_kernel) { + m_device.deallocate((void*)m_kernel); + m_local_kernel = false; + } + m_kernel = NULL; + } + + void evalTo(typename XprType::Scalar* buffer) { + evalSubExprsIfNeeded(NULL); + for (int i = 0; i < dimensions().TotalSize(); ++i) { + buffer[i] += coeff(i); + } + cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + CoeffReturnType result = CoeffReturnType(0); + convolve(firstInput(index), 0, NumKernelDims-1, result); + return result; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(const Index index) const + { + Index indices[2] = {index, index+PacketSize-1}; + Index startInputs[2] = {0, 0}; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_outputStride[i]; + const Index idx1 = indices[1] / m_outputStride[i]; + startInputs[0] += idx0 * m_inputStride[i]; + startInputs[1] += idx1 * m_inputStride[i]; + indices[0] -= idx0 * m_outputStride[i]; + indices[1] -= idx1 * m_outputStride[i]; + } + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / m_outputStride[i]; + const Index idx1 = indices[1] / m_outputStride[i]; + startInputs[0] += idx0 * m_inputStride[i]; + startInputs[1] += idx1 * m_inputStride[i]; + indices[0] -= idx0 * m_outputStride[i]; + indices[1] -= idx1 * m_outputStride[i]; + } + } + startInputs[0] += indices[0]; + startInputs[1] += indices[1]; + + if (startInputs[1]-startInputs[0] == PacketSize-1) { + PacketReturnType result = internal::pset1(0); + convolvePacket(startInputs[0], 0, NumKernelDims-1, result); + return result; + } else { + EIGEN_ALIGN_MAX Scalar data[PacketSize]; + data[0] = Scalar(0); + convolve(startInputs[0], 0, NumKernelDims-1, data[0]); + for (int i = 1; i < PacketSize-1; ++i) { + data[i] = Scalar(0); + convolve(firstInput(index+i), 0, NumKernelDims-1, data[i]); + } + data[PacketSize-1] = Scalar(0); + convolve(startInputs[1], 0, NumKernelDims-1, data[PacketSize-1]); + return internal::pload(data); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double kernel_size = m_kernelImpl.dimensions().TotalSize(); + // We ignore the use of fused multiply-add. + const double convolve_compute_cost = + TensorOpCost::AddCost() + TensorOpCost::MulCost(); + const double firstIndex_compute_cost = + NumDims * + (2 * TensorOpCost::AddCost() + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost()); + return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + + kernel_size * (m_inputImpl.costPerCoeff(vectorized) + + m_kernelImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, convolve_compute_cost, vectorized, + PacketSize)); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { + Index startInput = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStride[i]; + startInput += idx * m_inputStride[i]; + index -= idx * m_outputStride[i]; + } + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStride[i]; + startInput += idx * m_inputStride[i]; + index -= idx * m_outputStride[i]; + } + } + startInput += index; + return startInput; + } + + EIGEN_DEVICE_FUNC void convolve(Index firstIndex, Index firstKernel, int DimIndex, CoeffReturnType& accum) const { + for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { + const Index input = firstIndex + j * m_indexStride[DimIndex]; + const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; + if (DimIndex > 0) { + convolve(input, kernel, DimIndex-1, accum); + } else { + accum += m_inputImpl.coeff(input) * m_kernel[kernel]; + } + } + } + + template + EIGEN_DEVICE_FUNC void convolvePacket(Index firstIndex, Index firstKernel, int DimIndex, Packet& accum) const { + for (int j = 0; j < m_kernelImpl.dimensions()[DimIndex]; ++j) { + const Index input = firstIndex + j * m_indexStride[DimIndex]; + const Index kernel = firstKernel + j * m_kernelStride[DimIndex]; + if (DimIndex > 0) { + convolvePacket(input, kernel, DimIndex-1, accum); + } else { + accum = internal::pmadd(m_inputImpl.template packet(input), internal::pset1(m_kernel[kernel]), accum); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void preloadKernel() { + // Don't make a local copy of the kernel unless we have to (i.e. it's an + // expression that needs to be evaluated) + const Scalar* in_place = m_kernelImpl.data(); + if (in_place) { + m_kernel = in_place; + m_local_kernel = false; + } else { + size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); + Scalar* local = (Scalar*)m_device.allocate(kernel_sz); + typedef TensorEvalToOp EvalTo; + EvalTo evalToTmp(local, m_kernelArg); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); + + m_kernel = local; + m_local_kernel = true; + } + } + + array m_inputStride; + array m_outputStride; + + array m_indexStride; + array m_kernelStride; + TensorEvaluator m_inputImpl; + TensorEvaluator m_kernelImpl; + Dimensions m_dimensions; + + KernelArgType m_kernelArg; + const Scalar* m_kernel; + bool m_local_kernel; + const Device& m_device; +}; + + + + +// Use an optimized implementation of the evaluation code for GPUs whenever possible. +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + +template +struct GetKernelSize { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int /*kernelSize*/) const { + return StaticKernelSize; + } +}; +template <> +struct GetKernelSize { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int operator() (const int kernelSize) const { + return kernelSize; + } +}; + +template +__global__ void EigenConvolutionKernel1D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const int numPlanes, const int numX, + const int maxX, const int kernelSize, float* buffer) { + extern __shared__ float s[]; + + const int first_x = blockIdx.x * maxX; + const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; + const int num_x_input = last_x - first_x + GetKernelSize()(kernelSize); + const int num_x_output = last_x - first_x + 1; + + const int first_plane = blockIdx.y * blockDim.y; + const int plane_stride = blockDim.y * gridDim.y; + + for (int p = first_plane + threadIdx.y; p < numPlanes; p += plane_stride) { + // Load inputs to shared memory + const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); + const int plane_kernel_offset = threadIdx.y * num_x_input; + #pragma unroll + for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { + const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x); + s[i + plane_kernel_offset] = eval.coeff(tensor_index); + } + + __syncthreads(); + + // Compute the convolution + const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); + + #pragma unroll + for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { + const int kernel_offset = plane_kernel_offset + i; + float result = 0.0f; + #pragma unroll + for (int k = 0; k < GetKernelSize()(kernelSize); ++k) { + result += s[k + kernel_offset] * kernel[k]; + } + const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x); + buffer[tensor_index] = result; + } + __syncthreads(); + } +}; + +template +__global__ void EigenConvolutionKernel2D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const int numPlanes, const int numX, + const int maxX, const int numY, const int maxY, const int kernelSizeX, + const int kernelSizeY, float* buffer) { + extern __shared__ float s[]; + + const int first_x = blockIdx.x * maxX; + const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; + const int num_x_input = last_x - first_x + GetKernelSize()(kernelSizeX); + const int num_x_output = last_x - first_x + 1; + + const int first_y = blockIdx.y * maxY; + const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; + const int num_y_input = last_y - first_y + GetKernelSize()(kernelSizeY); + const int num_y_output = last_y - first_y + 1; + + const int first_plane = blockIdx.z * blockDim.z; + const int plane_stride = blockDim.z * gridDim.z; + + for (int p = first_plane + threadIdx.z; p < numPlanes; p += plane_stride) { + + const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); + const int plane_kernel_offset = threadIdx.z * num_y_input; + + // Load inputs to shared memory + #pragma unroll + for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { + const int input_offset = num_x_input * (j + plane_kernel_offset); + #pragma unroll + for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { + const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y); + s[i + input_offset] = eval.coeff(tensor_index); + } + } + + __syncthreads(); + + // Convolution + const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); + + #pragma unroll + for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { + #pragma unroll + for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { + float result = 0.0f; + #pragma unroll + for (int l = 0; l < GetKernelSize()(kernelSizeY); ++l) { + const int kernel_offset = kernelSizeX * l; + const int input_offset = i + num_x_input * (j + l + plane_kernel_offset); + #pragma unroll + for (int k = 0; k < GetKernelSize()(kernelSizeX); ++k) { + result += s[k + input_offset] * kernel[k + kernel_offset]; + } + } + const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y); + buffer[tensor_index] = result; + } + } + + __syncthreads(); + } +}; + +template +__global__ void EigenConvolutionKernel3D( + InputEvaluator eval, + const internal::IndexMapper + indexMapper, + const float* __restrict kernel, const size_t numPlanes, const size_t numX, + const size_t maxX, const size_t numY, const size_t maxY, const size_t numZ, + const size_t maxZ, const size_t kernelSizeX, const size_t kernelSizeY, + const size_t kernelSizeZ, float* buffer) { + extern __shared__ float s[]; + + // Load inputs to shared memory + const int first_x = blockIdx.x * maxX; + const int last_x = (first_x + maxX < numX ? first_x + maxX : numX) - 1; + const int num_x_input = last_x - first_x + kernelSizeX; + + const int first_y = blockIdx.y * maxY; + const int last_y = (first_y + maxY < numY ? first_y + maxY : numY) - 1; + const int num_y_input = last_y - first_y + kernelSizeY; + + const int first_z = blockIdx.z * maxZ; + const int last_z = (first_z + maxZ < numZ ? first_z + maxZ : numZ) - 1; + const int num_z_input = last_z - first_z + kernelSizeZ; + + for (int p = 0; p < numPlanes; ++p) { + + const int plane_input_offset = indexMapper.mapCudaInputPlaneToTensorInputOffset(p); + const int plane_kernel_offset = 0; + + for (int k = threadIdx.z; k < num_z_input; k += blockDim.z) { + for (int j = threadIdx.y; j < num_y_input; j += blockDim.y) { + for (int i = threadIdx.x; i < num_x_input; i += blockDim.x) { + const int tensor_index = plane_input_offset + indexMapper.mapCudaInputKernelToTensorInputOffset(i+first_x, j+first_y, k+first_z); + s[i + num_x_input * (j + num_y_input * (k + plane_kernel_offset))] = eval.coeff(tensor_index); + } + } + } + + __syncthreads(); + + // Convolution + const int num_z_output = last_z - first_z + 1; + const int num_y_output = last_y - first_y + 1; + const int num_x_output = last_x - first_x + 1; + const int plane_output_offset = indexMapper.mapCudaOutputPlaneToTensorOutputOffset(p); + + for (int k = threadIdx.z; k < num_z_output; k += blockDim.z) { + for (int j = threadIdx.y; j < num_y_output; j += blockDim.y) { + for (int i = threadIdx.x; i < num_x_output; i += blockDim.x) { + float result = 0.0f; + for (int n = 0; n < kernelSizeZ; ++n) { + for (int m = 0; m < kernelSizeY; ++m) { + for (int l = 0; l < kernelSizeX; ++l) { + result += s[i + l + num_x_input * (j + m + num_y_input * (k + n + plane_kernel_offset))] * kernel[l + kernelSizeX * (m + kernelSizeY * n)]; + } + } + } + const int tensor_index = plane_output_offset + indexMapper.mapCudaOutputKernelToTensorOutputOffset(i+first_x, j+first_y, k+first_z); + buffer[tensor_index] = result; + } + } + } + __syncthreads(); + } +}; + + + +template +struct TensorEvaluator, GpuDevice> +{ + typedef TensorConvolutionOp XprType; + + static const int NumDims = internal::array_size::Dimensions>::value; + static const int NumKernelDims = internal::array_size::value; + typedef typename XprType::Index Index; + typedef DSizes Dimensions; + typedef typename TensorEvaluator::Dimensions KernelDimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device) + : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + + const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); + const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); + + m_dimensions = m_inputImpl.dimensions(); + for (int i = 0; i < NumKernelDims; ++i) { + const Index index = op.indices()[i]; + const Index input_dim = input_dims[index]; + const Index kernel_dim = kernel_dims[i]; + const Index result_dim = input_dim - kernel_dim + 1; + m_dimensions[index] = result_dim; + } + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename InputArgType::Scalar Scalar; + static const int PacketSize = internal::unpacket_traits::size; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { + preloadKernel(); + m_inputImpl.evalSubExprsIfNeeded(NULL); + if (data) { + executeEval(data); + return false; + } else { + m_buf = (Scalar*)m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)); + executeEval(m_buf); + return true; + } + } + + EIGEN_STRONG_INLINE void cleanup() { + m_inputImpl.cleanup(); + if (m_buf) { + m_device.deallocate(m_buf); + m_buf = NULL; + } + if (m_local_kernel) { + m_device.deallocate((void*)m_kernel); + m_local_kernel = false; + } + m_kernel = NULL; + } + + EIGEN_STRONG_INLINE void preloadKernel() { + // Don't make a local copy of the kernel unless we have to (i.e. it's an + // expression that needs to be evaluated) + const Scalar* in_place = m_kernelImpl.data(); + if (in_place) { + m_kernel = in_place; + m_local_kernel = false; + } else { + size_t kernel_sz = m_kernelImpl.dimensions().TotalSize() * sizeof(Scalar); + Scalar* local = (Scalar*)m_device.allocate(kernel_sz); + typedef TensorEvalToOp EvalTo; + EvalTo evalToTmp(local, m_kernelArg); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); + + m_kernel = local; + m_local_kernel = true; + } + } + + static unsigned int ceil(unsigned int num, unsigned int denom) { + const unsigned int rounded_toward_zero = num / denom; + if (num > rounded_toward_zero * denom) { + return rounded_toward_zero + 1; + } + return rounded_toward_zero; + } + + void executeEval(Scalar* data) const { + typedef typename TensorEvaluator::Dimensions InputDims; + + const int maxSharedMem = m_device.sharedMemPerBlock(); + const int maxThreadsPerBlock = m_device.maxCudaThreadsPerBlock(); + const int maxBlocksPerProcessor = m_device.maxCudaThreadsPerMultiProcessor() / maxThreadsPerBlock; + const int numMultiProcessors = m_device.getNumCudaMultiProcessors(); + const int warpSize = 32; + + switch (NumKernelDims) { + case 1: { + const int kernel_size = m_kernelImpl.dimensions().TotalSize(); + + const int numX = dimensions()[m_indices[0]]; + const int numP = dimensions().TotalSize() / numX; + int maxX; + dim3 block_size; + + const int single_stride_dim = + static_cast(Layout) == static_cast(ColMajor) + ? 0 + : m_inputImpl.dimensions().rank() - 1; + if (m_indices[0] == single_stride_dim) { + // Maximum the reuse + const int inner_dim = ((maxSharedMem / (sizeof(Scalar)) - kernel_size + 1 + 31) / 32) * 32; + maxX = numext::mini(inner_dim, numX); + const int maxP = numext::mini(maxSharedMem / ((kernel_size - 1 + maxX) * sizeof(Scalar)), numP); + block_size.x = numext::mini(maxThreadsPerBlock, maxX); + block_size.y = numext::mini(maxThreadsPerBlock / block_size.x, maxP); + } + else { + // Read as much as possible alongside the inner most dimension, that is the plane + const int inner_dim = maxSharedMem / ((warpSize + kernel_size) * sizeof(Scalar)); + const int maxP = numext::mini(inner_dim, numP); + maxX = numext::mini(maxSharedMem / (inner_dim * sizeof(Scalar)) - kernel_size + 1, numX); + + block_size.x = numext::mini(warpSize, maxX); + block_size.y = numext::mini(maxThreadsPerBlock/block_size.x, maxP); + } + + const int shared_mem = block_size.y * (maxX + kernel_size - 1) * sizeof(Scalar); + assert(shared_mem <= maxSharedMem); + + const int num_x_blocks = ceil(numX, maxX); + const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); + const int num_y_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks); + + dim3 num_blocks(num_x_blocks, numext::mini(num_y_blocks, ceil(numP, block_size.y))); + + + //cout << "launching 1D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " maxX: " << maxX << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; + + const array indices(m_indices[0]); + const array kernel_dims(m_kernelImpl.dimensions()[0]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); + switch(kernel_size) { + case 4: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D, Index, InputDims, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 4, data); + break; + } + case 7: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D, Index, InputDims, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, 7, data); + break; + } + default: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel1D, Index, InputDims, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, kernel_size, data); + } + } + break; + } + + case 2: { + const int idxX = + static_cast(Layout) == static_cast(ColMajor) ? 0 : 1; + const int idxY = + static_cast(Layout) == static_cast(ColMajor) ? 1 : 0; + const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; + const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; + + const int numX = dimensions()[m_indices[idxX]]; + const int numY = dimensions()[m_indices[idxY]]; + const int numP = dimensions().TotalSize() / (numX*numY); + + const float scaling_factor = sqrtf(static_cast(maxSharedMem) / (sizeof(Scalar) * kernel_size_y * kernel_size_x)); + + // Snap maxX to warp size + int inner_dim = ((static_cast(scaling_factor * kernel_size_x) - kernel_size_x + 1 + 32) / 32) * 32; + const int maxX = numext::mini(inner_dim, numX); + const int maxY = numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1)) - kernel_size_y + 1, numY); + const int maxP = numext::mini(maxSharedMem / ((kernel_size_x - 1 + maxX) * (kernel_size_y - 1 + maxY) * sizeof(Scalar)), numP); + + dim3 block_size; + block_size.x = numext::mini(1024, maxX); + block_size.y = numext::mini(1024/block_size.x, maxY); + block_size.z = numext::mini(1024/(block_size.x*block_size.y), maxP); + + const int shared_mem = block_size.z * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * sizeof(Scalar); + assert(shared_mem <= maxSharedMem); + + const int num_x_blocks = ceil(numX, maxX); + const int num_y_blocks = ceil(numY, maxY); + const int blocksPerProcessor = numext::mini(maxBlocksPerProcessor, maxSharedMem / shared_mem); + const int num_z_blocks = ceil(numMultiProcessors * blocksPerProcessor, num_x_blocks * num_y_blocks); + + dim3 num_blocks(num_x_blocks, num_y_blocks, numext::mini(num_z_blocks, ceil(numP, block_size.z))); + + + //cout << "launching 2D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " maxX: " << maxX << " maxY: " << maxY << " maxP: " << maxP << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; + + const array indices(m_indices[idxX], m_indices[idxY]); + const array kernel_dims(m_kernelImpl.dimensions()[idxX], + m_kernelImpl.dimensions()[idxY]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); + switch (kernel_size_x) { + case 4: { + switch (kernel_size_y) { + case 7: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 4, 7>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, 7, data); + break; + } + default: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 4, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 4, kernel_size_y, data); + break; + } + } + break; + } + case 7: { + switch (kernel_size_y) { + case 4: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 7, 4>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, 4, data); + break; + } + default: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, 7, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, 7, kernel_size_y, data); + break; + } + } + break; + } + default: { + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel2D, Index, InputDims, Dynamic, Dynamic>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, kernel_size_x, kernel_size_y, data); + break; + } + } + break; + } + + case 3: { + const int idxX = + static_cast(Layout) == static_cast(ColMajor) ? 0 : 2; + const int idxY = + static_cast(Layout) == static_cast(ColMajor) ? 1 : 1; + const int idxZ = + static_cast(Layout) == static_cast(ColMajor) ? 2 : 0; + + const int kernel_size_x = m_kernelImpl.dimensions()[idxX]; + const int kernel_size_y = m_kernelImpl.dimensions()[idxY]; + const int kernel_size_z = m_kernelImpl.dimensions()[idxZ]; + + const int numX = dimensions()[m_indices[idxX]]; + const int numY = dimensions()[m_indices[idxY]]; + const int numZ = dimensions()[m_indices[idxZ]]; + const int numP = dimensions().TotalSize() / (numX*numY*numZ); + + const int maxX = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * kernel_size_y * kernel_size_z) - kernel_size_x + 1, numX)); + const int maxY = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * kernel_size_z) - kernel_size_y + 1, numY)); + const int maxZ = numext::mini(128, numext::mini(maxSharedMem / (sizeof(Scalar) * (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1)) - kernel_size_z + 1, numZ)); + + dim3 block_size; + block_size.x = numext::mini(32, maxX); + block_size.y = numext::mini(32, maxY); + block_size.z = numext::mini(1024/(block_size.x*block_size.y), maxZ); + dim3 num_blocks(ceil(numX, maxX), ceil(numY, maxY), ceil(numZ, maxZ)); + + const int shared_mem = (maxX + kernel_size_x - 1) * (maxY + kernel_size_y - 1) * (maxZ + kernel_size_z - 1) * sizeof(Scalar); + assert(shared_mem <= maxSharedMem); + + //cout << "launching 3D kernel with block_size.x: " << block_size.x << " block_size.y: " << block_size.y << " block_size.z: " << block_size.z << " num_blocks.x: " << num_blocks.x << " num_blocks.y: " << num_blocks.y << " num_blocks.z: " << num_blocks.z << " shared_mem: " << shared_mem << " in stream " << m_device.stream() << endl; + const array indices(m_indices[idxX], m_indices[idxY], + m_indices[idxZ]); + const array kernel_dims(m_kernelImpl.dimensions()[idxX], + m_kernelImpl.dimensions()[idxY], + m_kernelImpl.dimensions()[idxZ]); + internal::IndexMapper indexMapper( + m_inputImpl.dimensions(), kernel_dims, indices); + + LAUNCH_CUDA_KERNEL((EigenConvolutionKernel3D, Index, InputDims>), num_blocks, block_size, shared_mem, m_device, m_inputImpl, indexMapper, m_kernel, numP, numX, maxX, numY, maxY, numZ, maxZ, kernel_size_x, kernel_size_y, kernel_size_z, data); + break; + } + + default: { + EIGEN_STATIC_ASSERT((NumKernelDims >= 1 && NumKernelDims <= 3), THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE); + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + eigen_assert(m_buf); + eigen_assert(index < m_dimensions.TotalSize()); + return m_buf[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(const Index index) const + { + eigen_assert(m_buf); + eigen_assert(index < m_dimensions.TotalSize()); + return internal::ploadt(m_buf+index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): FIXME: For now, this is just a copy of the CPU cost + // model. + const double kernel_size = m_kernelImpl.dimensions().TotalSize(); + // We ignore the use of fused multiply-add. + const double convolve_compute_cost = + TensorOpCost::AddCost() + TensorOpCost::MulCost(); + const double firstIndex_compute_cost = + NumDims * + (2 * TensorOpCost::AddCost() + 2 * TensorOpCost::MulCost() + + TensorOpCost::DivCost()); + return TensorOpCost(0, 0, firstIndex_compute_cost, vectorized, PacketSize) + + kernel_size * (m_inputImpl.costPerCoeff(vectorized) + + m_kernelImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, convolve_compute_cost, vectorized, + PacketSize)); + } + + private: + // No assignment (copies are needed by the kernels) + TensorEvaluator& operator = (const TensorEvaluator&); + + TensorEvaluator m_inputImpl; + TensorEvaluator m_kernelImpl; + KernelArgType m_kernelArg; + Indices m_indices; + Dimensions m_dimensions; + Scalar* m_buf; + const Scalar* m_kernel; + bool m_local_kernel; + + const GpuDevice& m_device; +}; +#endif + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTION_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h new file mode 100644 index 0000000..83c449c --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h @@ -0,0 +1,212 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Rasmus Munk Larsen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H +#define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H + +namespace Eigen { + +/** \class TensorEvaluator + * \ingroup CXX11_Tensor_Module + * + * \brief A cost model used to limit the number of threads used for evaluating + * tensor expression. + * + */ + +// Class storing the cost of evaluating a tensor expression in terms of the +// estimated number of operand bytes loads, bytes stored, and compute cycles. +class TensorOpCost { + public: + // TODO(rmlarsen): Fix the scalar op costs in Eigen proper. Even a simple + // model based on minimal reciprocal throughput numbers from Intel or + // Agner Fog's tables would be better than what is there now. + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost() { + return internal::functor_traits< + internal::scalar_product_op >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost() { + return internal::functor_traits >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost() { + return internal::functor_traits< + internal::scalar_quotient_op >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost() { + return internal::functor_traits >::Cost; + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost() { + return internal::functor_traits< + internal::scalar_cast_op >::Cost; + } + + EIGEN_DEVICE_FUNC + TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {} + EIGEN_DEVICE_FUNC + TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles) + : bytes_loaded_(bytes_loaded), + bytes_stored_(bytes_stored), + compute_cycles_(compute_cycles) {} + + EIGEN_DEVICE_FUNC + TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles, + bool vectorized, double packet_size) + : bytes_loaded_(bytes_loaded), + bytes_stored_(bytes_stored), + compute_cycles_(vectorized ? compute_cycles / packet_size + : compute_cycles) { + eigen_assert(bytes_loaded >= 0 && (numext::isfinite)(bytes_loaded)); + eigen_assert(bytes_stored >= 0 && (numext::isfinite)(bytes_stored)); + eigen_assert(compute_cycles >= 0 && (numext::isfinite)(compute_cycles)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_loaded() const { + return bytes_loaded_; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_stored() const { + return bytes_stored_; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double compute_cycles() const { + return compute_cycles_; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double total_cost( + double load_cost, double store_cost, double compute_cost) const { + return load_cost * bytes_loaded_ + store_cost * bytes_stored_ + + compute_cost * compute_cycles_; + } + + // Drop memory access component. Intended for cases when memory accesses are + // sequential or are completely masked by computations. + EIGEN_DEVICE_FUNC void dropMemoryCost() { + bytes_loaded_ = 0; + bytes_stored_ = 0; + } + + // TODO(rmlarsen): Define min in terms of total cost, not elementwise. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); + } + + // TODO(rmlarsen): Define max in terms of total cost, not elementwise. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator+=( + const TensorOpCost& rhs) { + bytes_loaded_ += rhs.bytes_loaded(); + bytes_stored_ += rhs.bytes_stored(); + compute_cycles_ += rhs.compute_cycles(); + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator*=(double rhs) { + bytes_loaded_ *= rhs; + bytes_stored_ *= rhs; + compute_cycles_ *= rhs; + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator+( + TensorOpCost lhs, const TensorOpCost& rhs) { + lhs += rhs; + return lhs; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( + TensorOpCost lhs, double rhs) { + lhs *= rhs; + return lhs; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*( + double lhs, TensorOpCost rhs) { + rhs *= lhs; + return rhs; + } + + friend std::ostream& operator<<(std::ostream& os, const TensorOpCost& tc) { + return os << "[bytes_loaded = " << tc.bytes_loaded() + << ", bytes_stored = " << tc.bytes_stored() + << ", compute_cycles = " << tc.compute_cycles() << "]"; + } + + private: + double bytes_loaded_; + double bytes_stored_; + double compute_cycles_; +}; + +// TODO(rmlarsen): Implement a policy that chooses an "optimal" number of theads +// in [1:max_threads] instead of just switching multi-threading off for small +// work units. +template +class TensorCostModel { + public: + // Scaling from Eigen compute cost to device cycles. + static const int kDeviceCyclesPerComputeCycle = 1; + + // Costs in device cycles. + static const int kStartupCycles = 100000; + static const int kPerThreadCycles = 100000; + static const int kTaskSize = 40000; + + // Returns the number of threads in [1:max_threads] to use for + // evaluating an expression with the given output size and cost per + // coefficient. + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int numThreads( + double output_size, const TensorOpCost& cost_per_coeff, int max_threads) { + double cost = totalCost(output_size, cost_per_coeff); + int threads = (cost - kStartupCycles) / kPerThreadCycles + 0.9; + return numext::mini(max_threads, numext::maxi(1, threads)); + } + + // taskSize assesses parallel task size. + // Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task + // granularity needs to be increased to mitigate parallelization overheads. + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double taskSize( + double output_size, const TensorOpCost& cost_per_coeff) { + return totalCost(output_size, cost_per_coeff) / kTaskSize; + } + + private: + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double totalCost( + double output_size, const TensorOpCost& cost_per_coeff) { + // Cost of memory fetches from L2 cache. 64 is typical cache line size. + // 11 is L2 cache latency on Haswell. + // We don't know whether data is in L1, L2 or L3. But we are most interested + // in single-threaded computational time around 100us-10ms (smaller time + // is too small for parallelization, larger time is not intersting + // either because we are probably using all available threads already). + // And for the target time range, L2 seems to be what matters. Data set + // fitting into L1 is too small to take noticeable time. Data set fitting + // only into L3 presumably will take more than 10ms to load and process. + const double kLoadCycles = 1.0 / 64 * 11; + const double kStoreCycles = 1.0 / 64 * 11; + // Scaling from Eigen compute cost to device cycles. + return output_size * + cost_per_coeff.total_cost(kLoadCycles, kStoreCycles, + kDeviceCyclesPerComputeCycle); + } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h new file mode 100644 index 0000000..e020d07 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -0,0 +1,313 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H +#define EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H + +namespace Eigen { + +/** \class TensorCustomUnaryOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor custom class. + * + * + */ +namespace internal { +template +struct traits > +{ + typedef typename XprType::Scalar Scalar; + typedef typename XprType::StorageKind StorageKind; + typedef typename XprType::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCustomUnaryOp& type; +}; + +template +struct nested > +{ + typedef TensorCustomUnaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCustomUnaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomUnaryOp(const XprType& expr, const CustomUnaryFunc& func) + : m_expr(expr), m_func(func) {} + + EIGEN_DEVICE_FUNC + const CustomUnaryFunc& func() const { return m_func; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_expr; } + + protected: + typename XprType::Nested m_expr; + const CustomUnaryFunc m_func; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorCustomUnaryOp ArgType; + typedef typename internal::traits::Index Index; + static const int NumDims = internal::traits::NumDimensions; + typedef DSizes Dimensions; + typedef typename internal::remove_const::type Scalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = (internal::packet_traits::size > 1), + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const ArgType& op, const Device& device) + : m_op(op), m_device(device), m_result(NULL) + { + m_dimensions = op.func().dimensions(op.expression()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast( + m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalTo(m_result); + return true; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_result != NULL) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } + + protected: + EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { + TensorMap > result( + data, m_dimensions); + m_op.func().eval(m_op.expression(), result, m_device); + } + + Dimensions m_dimensions; + const ArgType m_op; + const Device& m_device; + CoeffReturnType* m_result; +}; + + + +/** \class TensorCustomBinaryOp + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor custom class. + * + * + */ +namespace internal { +template +struct traits > +{ + typedef typename internal::promote_storage_type::ret Scalar; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = traits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCustomBinaryOp& type; +}; + +template +struct nested > +{ + typedef TensorCustomBinaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCustomBinaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::traits::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCustomBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const CustomBinaryFunc& func) + + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_func(func) {} + + EIGEN_DEVICE_FUNC + const CustomBinaryFunc& func() const { return m_func; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const CustomBinaryFunc m_func; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorCustomBinaryOp XprType; + typedef typename internal::traits::Index Index; + static const int NumDims = internal::traits::NumDimensions; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = (internal::packet_traits::size > 1), + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_op(op), m_device(device), m_result(NULL) + { + m_dimensions = op.func().dimensions(op.lhsExpression(), op.rhsExpression()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + if (data) { + evalTo(data); + return false; + } else { + m_result = static_cast(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + evalTo(m_result); + return true; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_result != NULL) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_result[index]; + } + + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const { + return internal::ploadt(m_result + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // TODO(rmlarsen): Extend CustomOp API to return its cost estimate. + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_result; } + + protected: + EIGEN_DEVICE_FUNC void evalTo(Scalar* data) { + TensorMap > result(data, m_dimensions); + m_op.func().eval(m_op.lhsExpression(), m_op.rhsExpression(), result, m_device); + } + + Dimensions m_dimensions; + const XprType m_op; + const Device& m_device; + CoeffReturnType* m_result; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_CUSTOM_OP_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h new file mode 100644 index 0000000..29e50a3 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h @@ -0,0 +1,68 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H + +namespace Eigen { + +/** \class TensorDevice + * \ingroup CXX11_Tensor_Module + * + * \brief Pseudo expression providing an operator = that will evaluate its argument + * on the specified computing 'device' (GPU, thread pool, ...) + * + * Example: + * C.device(EIGEN_GPU) = A + B; + * + * Todo: operator *= and /=. + */ + +template class TensorDevice { + public: + TensorDevice(const DeviceType& device, ExpressionType& expression) : m_device(device), m_expression(expression) {} + + template + EIGEN_STRONG_INLINE TensorDevice& operator=(const OtherDerived& other) { + typedef TensorAssignOp Assign; + Assign assign(m_expression, other); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + + template + EIGEN_STRONG_INLINE TensorDevice& operator+=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Sum; + Sum sum(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, sum); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + + template + EIGEN_STRONG_INLINE TensorDevice& operator-=(const OtherDerived& other) { + typedef typename OtherDerived::Scalar Scalar; + typedef TensorCwiseBinaryOp, const ExpressionType, const OtherDerived> Difference; + Difference difference(m_expression, other); + typedef TensorAssignOp Assign; + Assign assign(m_expression, difference); + internal::TensorExecutor::run(assign, m_device); + return *this; + } + + protected: + const DeviceType& m_device; + ExpressionType& m_expression; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h new file mode 100644 index 0000000..4f5767b --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -0,0 +1,337 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_GPU) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H + +namespace Eigen { + +static const int kCudaScratchSize = 1024; + +// This defines an interface that GPUDevice can take to use +// CUDA streams underneath. +class StreamInterface { + public: + virtual ~StreamInterface() {} + + virtual const cudaStream_t& stream() const = 0; + virtual const cudaDeviceProp& deviceProperties() const = 0; + + // Allocate memory on the actual device where the computation will run + virtual void* allocate(size_t num_bytes) const = 0; + virtual void deallocate(void* buffer) const = 0; + + // Return a scratchpad buffer of size 1k + virtual void* scratchpad() const = 0; + + // Return a semaphore. The semaphore is initially initialized to 0, and + // each kernel using it is responsible for resetting to 0 upon completion + // to maintain the invariant that the semaphore is always equal to 0 upon + // each kernel start. + virtual unsigned int* semaphore() const = 0; +}; + +static cudaDeviceProp* m_deviceProperties; +static bool m_devicePropInitialized = false; + +static void initializeDeviceProp() { + if (!m_devicePropInitialized) { + // Attempts to ensure proper behavior in the case of multiple threads + // calling this function simultaneously. This would be trivial to + // implement if we could use std::mutex, but unfortunately mutex don't + // compile with nvcc, so we resort to atomics and thread fences instead. + // Note that if the caller uses a compiler that doesn't support c++11 we + // can't ensure that the initialization is thread safe. +#if __cplusplus >= 201103L + static std::atomic first(true); + if (first.exchange(false)) { +#else + static bool first = true; + if (first) { + first = false; +#endif + // We're the first thread to reach this point. + int num_devices; + cudaError_t status = cudaGetDeviceCount(&num_devices); + if (status != cudaSuccess) { + std::cerr << "Failed to get the number of CUDA devices: " + << cudaGetErrorString(status) + << std::endl; + assert(status == cudaSuccess); + } + m_deviceProperties = new cudaDeviceProp[num_devices]; + for (int i = 0; i < num_devices; ++i) { + status = cudaGetDeviceProperties(&m_deviceProperties[i], i); + if (status != cudaSuccess) { + std::cerr << "Failed to initialize CUDA device #" + << i + << ": " + << cudaGetErrorString(status) + << std::endl; + assert(status == cudaSuccess); + } + } + +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_release); +#endif + m_devicePropInitialized = true; + } else { + // Wait for the other thread to inititialize the properties. + while (!m_devicePropInitialized) { +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_acquire); +#endif + sleep(1); + } + } + } +} + +static const cudaStream_t default_stream = cudaStreamDefault; + +class CudaStreamDevice : public StreamInterface { + public: + // Use the default stream on the current device + CudaStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) { + cudaGetDevice(&device_); + initializeDeviceProp(); + } + // Use the default stream on the specified device + CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) { + initializeDeviceProp(); + } + // Use the specified stream. Note that it's the + // caller responsibility to ensure that the stream can run on + // the specified device. If no device is specified the code + // assumes that the stream is associated to the current gpu device. + CudaStreamDevice(const cudaStream_t* stream, int device = -1) + : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) { + if (device < 0) { + cudaGetDevice(&device_); + } else { + int num_devices; + cudaError_t err = cudaGetDeviceCount(&num_devices); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + assert(device < num_devices); + device_ = device; + } + initializeDeviceProp(); + } + + virtual ~CudaStreamDevice() { + if (scratch_) { + deallocate(scratch_); + } + } + + const cudaStream_t& stream() const { return *stream_; } + const cudaDeviceProp& deviceProperties() const { + return m_deviceProperties[device_]; + } + virtual void* allocate(size_t num_bytes) const { + cudaError_t err = cudaSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + void* result; + err = cudaMalloc(&result, num_bytes); + assert(err == cudaSuccess); + assert(result != NULL); + return result; + } + virtual void deallocate(void* buffer) const { + cudaError_t err = cudaSetDevice(device_); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + assert(buffer != NULL); + err = cudaFree(buffer); + assert(err == cudaSuccess); + } + + virtual void* scratchpad() const { + if (scratch_ == NULL) { + scratch_ = allocate(kCudaScratchSize + sizeof(unsigned int)); + } + return scratch_; + } + + virtual unsigned int* semaphore() const { + if (semaphore_ == NULL) { + char* scratch = static_cast(scratchpad()) + kCudaScratchSize; + semaphore_ = reinterpret_cast(scratch); + cudaError_t err = cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + } + return semaphore_; + } + + private: + const cudaStream_t* stream_; + int device_; + mutable void* scratch_; + mutable unsigned int* semaphore_; +}; + +struct GpuDevice { + // The StreamInterface is not owned: the caller is + // responsible for its initialization and eventual destruction. + explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) { + eigen_assert(stream); + } + explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) { + eigen_assert(stream); + } + // TODO(bsteiner): This is an internal API, we should not expose it. + EIGEN_STRONG_INLINE const cudaStream_t& stream() const { + return stream_->stream(); + } + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return stream_->allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + stream_->deallocate(buffer); + } + + EIGEN_STRONG_INLINE void* scratchpad() const { + return stream_->scratchpad(); + } + + EIGEN_STRONG_INLINE unsigned int* semaphore() const { + return stream_->semaphore(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { +#ifndef __CUDA_ARCH__ + cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, + stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); +#else + eigen_assert(false && "The default device should be used instead to generate kernel code"); +#endif + } + + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + cudaError_t err = + cudaMemcpyAsync(dst, src, n, cudaMemcpyHostToDevice, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + } + + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + cudaError_t err = + cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToHost, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { +#ifndef __CUDA_ARCH__ + cudaError_t err = cudaMemsetAsync(buffer, c, n, stream_->stream()); + EIGEN_UNUSED_VARIABLE(err) + assert(err == cudaSuccess); +#else + eigen_assert(false && "The default device should be used instead to generate kernel code"); +#endif + } + + EIGEN_STRONG_INLINE size_t numThreads() const { + // FIXME + return 32; + } + + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + // FIXME + return 48*1024; + } + + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + // We won't try to take advantage of the l2 cache for the time being, and + // there is no l3 cache on cuda devices. + return firstLevelCacheSize(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void synchronize() const { +#if defined(__CUDACC__) && !defined(__CUDA_ARCH__) + cudaError_t err = cudaStreamSynchronize(stream_->stream()); + if (err != cudaSuccess) { + std::cerr << "Error detected in CUDA stream: " + << cudaGetErrorString(err) + << std::endl; + assert(err == cudaSuccess); + } +#else + assert(false && "The default device should be used instead to generate kernel code"); +#endif + } + + EIGEN_STRONG_INLINE int getNumCudaMultiProcessors() const { + return stream_->deviceProperties().multiProcessorCount; + } + EIGEN_STRONG_INLINE int maxCudaThreadsPerBlock() const { + return stream_->deviceProperties().maxThreadsPerBlock; + } + EIGEN_STRONG_INLINE int maxCudaThreadsPerMultiProcessor() const { + return stream_->deviceProperties().maxThreadsPerMultiProcessor; + } + EIGEN_STRONG_INLINE int sharedMemPerBlock() const { + return stream_->deviceProperties().sharedMemPerBlock; + } + EIGEN_STRONG_INLINE int majorDeviceVersion() const { + return stream_->deviceProperties().major; + } + EIGEN_STRONG_INLINE int minorDeviceVersion() const { + return stream_->deviceProperties().minor; + } + + EIGEN_STRONG_INLINE int maxBlocks() const { + return max_blocks_; + } + + // This function checks if the CUDA runtime recorded an error for the + // underlying stream device. + inline bool ok() const { +#ifdef __CUDACC__ + cudaError_t error = cudaStreamQuery(stream_->stream()); + return (error == cudaSuccess) || (error == cudaErrorNotReady); +#else + return false; +#endif + } + + private: + const StreamInterface* stream_; + int max_blocks_; +}; + +#define LAUNCH_CUDA_KERNEL(kernel, gridsize, blocksize, sharedmem, device, ...) \ + (kernel) <<< (gridsize), (blocksize), (sharedmem), (device).stream() >>> (__VA_ARGS__); \ + assert(cudaGetLastError() == cudaSuccess); + + +// FIXME: Should be device and kernel specific. +#ifdef __CUDACC__ +static EIGEN_DEVICE_FUNC inline void setCudaSharedMemConfig(cudaSharedMemConfig config) { +#ifndef __CUDA_ARCH__ + cudaError_t status = cudaDeviceSetSharedMemConfig(config); + EIGEN_UNUSED_VARIABLE(status) + assert(status == cudaSuccess); +#else + EIGEN_UNUSED_VARIABLE(config) +#endif +} +#endif + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_CUDA_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h new file mode 100644 index 0000000..9d14139 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -0,0 +1,81 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H + + +namespace Eigen { + +// Default device for the machine (typically a single cpu core) +struct DefaultDevice { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t numThreads() const { +#ifndef __CUDA_ARCH__ + // Running on the host CPU + return 1; +#else + // Running on a CUDA device + return 32; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { +#ifndef __CUDA_ARCH__ + // Running on the host CPU + return l1CacheSize(); +#else + // Running on a CUDA device, return the amount of shared memory available. + return 48*1024; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { +#ifndef __CUDA_ARCH__ + // Running single threaded on the host CPU + return l3CacheSize(); +#else + // Running on a CUDA device + return firstLevelCacheSize(); +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { +#ifndef __CUDA_ARCH__ + // Running single threaded on the host CPU + // Should return an enum that encodes the ISA supported by the CPU + return 1; +#else + // Running on a CUDA device + return __CUDA_ARCH__ / 100; +#endif + } +}; + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_DEFAULT_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h new file mode 100644 index 0000000..7c03989 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -0,0 +1,122 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// Copyright (C) 2016 Benoit Steiner + +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H + +namespace Eigen { +struct SyclDevice { + /// class members + /// sycl queue + mutable cl::sycl::queue m_queue; + /// std::map is the container used to make sure that we create only one buffer + /// per pointer. The lifespan of the buffer now depends on the lifespan of SyclDevice. + /// If a non-read-only pointer is needed to be accessed on the host we should manually deallocate it. + mutable std::map> buffer_map; + /// creating device by using selector + template SyclDevice(dev_Selector s) + : +#ifdef EIGEN_EXCEPTIONS + m_queue(cl::sycl::queue(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + })) +#else + m_queue(cl::sycl::queue(s)) +#endif + {} + // destructor + ~SyclDevice() { deallocate_all(); } + + template void deallocate(T *p) const { + auto it = buffer_map.find(p); + if (it != buffer_map.end()) { + buffer_map.erase(it); + internal::aligned_free(p); + } + } + void deallocate_all() const { + std::map>::iterator it=buffer_map.begin(); + while (it!=buffer_map.end()) { + auto p=it->first; + buffer_map.erase(it); + internal::aligned_free(const_cast(p)); + it=buffer_map.begin(); + } + buffer_map.clear(); + } + + /// creation of sycl accessor for a buffer. This function first tries to find + /// the buffer in the buffer_map. If found it gets the accessor from it, if not, + ///the function then adds an entry by creating a sycl buffer for that particular pointer. + template inline cl::sycl::accessor + get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, const T * ptr) const { + return (get_sycl_buffer(num_bytes, ptr)->template get_access(cgh)); + } + + template inline std::pair>::iterator,bool> add_sycl_buffer(const T *ptr, size_t num_bytes) const { + using Type = cl::sycl::buffer; + std::pair>::iterator,bool> ret = buffer_map.insert(std::pair>(ptr, std::shared_ptr(new Type(cl::sycl::range<1>(num_bytes)), + [](void *dataMem) { delete static_cast(dataMem); }))); + (static_cast(buffer_map.at(ptr).get()))->set_final_data(nullptr); + return ret; + } + + template inline cl::sycl::buffer* get_sycl_buffer(size_t num_bytes,const T * ptr) const { + return static_cast*>(add_sycl_buffer(ptr, num_bytes).first->second.get()); + } + + /// allocating memory on the cpu + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t) const { + return internal::aligned_malloc(8); + } + + // some runtime conditions that can be applied here + bool isDeviceSuitable() const { return true; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const { + ::memcpy(dst, src, n); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const { + auto host_acc= (static_cast*>(add_sycl_buffer(dst, n).first->second.get()))-> template get_access(); + memcpy(host_acc.get_pointer(), src, n); + } + /// whith the current implementation of sycl, the data is copied twice from device to host. This will be fixed soon. + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost(T *dst, const T *src, size_t n) const { + auto it = buffer_map.find(src); + if (it != buffer_map.end()) { + auto host_acc= (static_cast*>(it->second.get()))-> template get_access(); + memcpy(dst,host_acc.get_pointer(), n); + } else{ + eigen_assert("no device memory found. The memory might be destroyed before creation"); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { + return 1; + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h new file mode 100644 index 0000000..069680a --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -0,0 +1,279 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_THREADS) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H + +namespace Eigen { + +// Use the SimpleThreadPool by default. We'll switch to the new non blocking +// thread pool later. +#ifndef EIGEN_USE_SIMPLE_THREAD_POOL +template using ThreadPoolTempl = NonBlockingThreadPoolTempl; +typedef NonBlockingThreadPool ThreadPool; +#else +template using ThreadPoolTempl = SimpleThreadPoolTempl; +typedef SimpleThreadPool ThreadPool; +#endif + + +// Barrier is an object that allows one or more threads to wait until +// Notify has been called a specified number of times. +class Barrier { + public: + Barrier(unsigned int count) : state_(count << 1), notified_(false) { + eigen_assert(((count << 1) >> 1) == count); + } + ~Barrier() { + eigen_assert((state_>>1) == 0); + } + + void Notify() { + unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; + if (v != 1) { + eigen_assert(((v + 2) & ~1) != 0); + return; // either count has not dropped to 0, or waiter is not waiting + } + std::unique_lock l(mu_); + eigen_assert(!notified_); + notified_ = true; + cv_.notify_all(); + } + + void Wait() { + unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); + if ((v >> 1) == 0) return; + std::unique_lock l(mu_); + while (!notified_) { + cv_.wait(l); + } + } + + private: + std::mutex mu_; + std::condition_variable cv_; + std::atomic state_; // low bit is waiter flag + bool notified_; +}; + + +// Notification is an object that allows a user to to wait for another +// thread to signal a notification that an event has occurred. +// +// Multiple threads can wait on the same Notification object, +// but only one caller must call Notify() on the object. +struct Notification : Barrier { + Notification() : Barrier(1) {}; +}; + + +// Runs an arbitrary function and then calls Notify() on the passed in +// Notification. +template struct FunctionWrapperWithNotification +{ + static void run(Notification* n, Function f, Args... args) { + f(args...); + if (n) { + n->Notify(); + } + } +}; + +template struct FunctionWrapperWithBarrier +{ + static void run(Barrier* b, Function f, Args... args) { + f(args...); + if (b) { + b->Notify(); + } + } +}; + +template +static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { + if (n) { + n->Wait(); + } +} + + +// Build a thread pool device on top the an existing pool of threads. +struct ThreadPoolDevice { + // The ownership of the thread pool remains with the caller. + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } + + EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate(void* buffer) const { + internal::aligned_free(buffer); + } + + EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_STRONG_INLINE void memcpyDeviceToHost(void* dst, const void* src, size_t n) const { + memcpy(dst, src, n); + } + + EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const { + ::memset(buffer, c, n); + } + + EIGEN_STRONG_INLINE int numThreads() const { + return num_threads_; + } + + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + return l1CacheSize(); + } + + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + // The l3 cache size is shared between all the cores. + return l3CacheSize() / num_threads_; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int majorDeviceVersion() const { + // Should return an enum that encodes the ISA supported by the CPU + return 1; + } + + template + EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { + Notification* n = new Notification(); + pool_->Schedule(std::bind(&FunctionWrapperWithNotification::run, n, f, args...)); + return n; + } + + template + EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b, + Function&& f, + Args&&... args) const { + pool_->Schedule(std::bind( + &FunctionWrapperWithBarrier::run, b, f, args...)); + } + + template + EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { + pool_->Schedule(std::bind(f, args...)); + } + + // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if + // called from one of the threads in pool_. Returns -1 otherwise. + EIGEN_STRONG_INLINE int currentThreadId() const { + return pool_->CurrentThreadId(); + } + + // parallelFor executes f with [0, n) arguments in parallel and waits for + // completion. F accepts a half-open interval [first, last). + // Block size is choosen based on the iteration cost and resulting parallel + // efficiency. If block_align is not nullptr, it is called to round up the + // block size. + void parallelFor(Index n, const TensorOpCost& cost, + std::function block_align, + std::function f) const { + typedef TensorCostModel CostModel; + if (n <= 1 || numThreads() == 1 || + CostModel::numThreads(n, cost, static_cast(numThreads())) == 1) { + f(0, n); + return; + } + + // Calculate block size based on (1) the iteration cost and (2) parallel + // efficiency. We want blocks to be not too small to mitigate + // parallelization overheads; not too large to mitigate tail + // effect and potential load imbalance and we also want number + // of blocks to be evenly dividable across threads. + + double block_size_f = 1.0 / CostModel::taskSize(1, cost); + Index block_size = numext::mini(n, numext::maxi(1, block_size_f)); + const Index max_block_size = + numext::mini(n, numext::maxi(1, 2 * block_size_f)); + if (block_align) { + Index new_block_size = block_align(block_size); + eigen_assert(new_block_size >= block_size); + block_size = numext::mini(n, new_block_size); + } + Index block_count = divup(n, block_size); + // Calculate parallel efficiency as fraction of total CPU time used for + // computations: + double max_efficiency = + static_cast(block_count) / + (divup(block_count, numThreads()) * numThreads()); + // Now try to increase block size up to max_block_size as long as it + // doesn't decrease parallel efficiency. + for (Index prev_block_count = block_count; prev_block_count > 1;) { + // This is the next block size that divides size into a smaller number + // of blocks than the current block_size. + Index coarser_block_size = divup(n, prev_block_count - 1); + if (block_align) { + Index new_block_size = block_align(coarser_block_size); + eigen_assert(new_block_size >= coarser_block_size); + coarser_block_size = numext::mini(n, new_block_size); + } + if (coarser_block_size > max_block_size) { + break; // Reached max block size. Stop. + } + // Recalculate parallel efficiency. + const Index coarser_block_count = divup(n, coarser_block_size); + eigen_assert(coarser_block_count < prev_block_count); + prev_block_count = coarser_block_count; + const double coarser_efficiency = + static_cast(coarser_block_count) / + (divup(coarser_block_count, numThreads()) * numThreads()); + if (coarser_efficiency + 0.01 >= max_efficiency) { + // Taking it. + block_size = coarser_block_size; + block_count = coarser_block_count; + if (max_efficiency < coarser_efficiency) { + max_efficiency = coarser_efficiency; + } + } + } + + // Recursively divide size into halves until we reach block_size. + // Division code rounds mid to block_size, so we are guaranteed to get + // block_count leaves that do actual computations. + Barrier barrier(static_cast(block_count)); + std::function handleRange; + handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) { + if (last - first <= block_size) { + // Single block or less, execute directly. + f(first, last); + barrier.Notify(); + return; + } + // Split into halves and submit to the pool. + Index mid = first + divup((last - first) / 2, block_size) * block_size; + pool_->Schedule([=, &handleRange]() { handleRange(mid, last); }); + pool_->Schedule([=, &handleRange]() { handleRange(first, mid); }); + }; + handleRange(0, n); + barrier.Wait(); + } + + // Convenience wrapper for parallelFor that does not align blocks. + void parallelFor(Index n, const TensorOpCost& cost, + std::function f) const { + parallelFor(n, cost, nullptr, std::move(f)); + } + + private: + ThreadPoolInterface* pool_; + int num_threads_; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_THREAD_POOL_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h new file mode 100644 index 0000000..1a30e45 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h @@ -0,0 +1,236 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H +#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H + +namespace Eigen { + +/** \internal + * + * \class TensorDimensionList + * \ingroup CXX11_Tensor_Module + * + * \brief Special case of tensor index list used to list all the dimensions of a tensor of rank n. + * + * \sa Tensor + */ + +template struct DimensionList { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + const Index operator[] (const Index i) const { return i; } +}; + +namespace internal { + +template struct array_size > { + static const size_t value = Rank; +}; +template struct array_size > { + static const size_t value = Rank; +}; + +template const Index array_get(DimensionList&) { + return n; +} +template const Index array_get(const DimensionList&) { + return n; +} + + +#if EIGEN_HAS_CONSTEXPR +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { + return true; + } +}; +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { + return true; + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; + +template +struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; +template +struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return true; + } +}; + +template +struct index_statically_eq_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i == value; + } +}; +template +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i == value; + } +}; + +template +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i != value; + } +}; +template +struct index_statically_ne_impl > { + static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i != value; + } +}; + +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i > value; + } +}; +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i > value; + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i < value; + } +}; +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return i < value; + } +}; + +#else +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + return true; + } +}; +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + return true; + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; + +template +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; +template +struct indices_statically_known_to_increase_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return true; + } +}; + +template +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +template +struct index_statically_eq_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; + +template +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex){ + return false; + } +}; +template +struct index_statically_ne_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; + +template +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +template +struct index_statically_gt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; + +template +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +template +struct index_statically_lt_impl > { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex, const DenseIndex) { + return false; + } +}; +#endif + +} // end namespace internal +} // end namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSION_LIST_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h new file mode 100644 index 0000000..b24cdeb --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h @@ -0,0 +1,428 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H + + +namespace Eigen { + +/** \internal + * + * \class TensorDimensions + * \ingroup CXX11_Tensor_Module + * + * \brief Set of classes used to encode and store the dimensions of a Tensor. + * + * The Sizes class encodes as part of the type the number of dimensions and the + * sizes corresponding to each dimension. It uses no storage space since it is + * entirely known at compile time. + * The DSizes class is its dynamic sibling: the number of dimensions is known + * at compile time but the sizes are set during execution. + * + * \sa Tensor + */ + +// Boilerplate code +namespace internal { + +template struct dget { + static const std::size_t value = get::value; +}; + + +template +struct fixed_size_tensor_index_linearization_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(array const& indices, + const Dimensions& dimensions) + { + return array_get(indices) + + dget::value * + fixed_size_tensor_index_linearization_helper::run(indices, dimensions); + } +}; + +template +struct fixed_size_tensor_index_linearization_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(array const&, const Dimensions&) + { + return 0; + } +}; + +template +struct fixed_size_tensor_index_extraction_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(const Index index, + const Dimensions& dimensions) + { + const Index mult = (index == n-1) ? 1 : 0; + return array_get(dimensions) * mult + + fixed_size_tensor_index_extraction_helper::run(index, dimensions); + } +}; + +template +struct fixed_size_tensor_index_extraction_helper +{ + template EIGEN_DEVICE_FUNC + static inline Index run(const Index, + const Dimensions&) + { + return 0; + } + }; + +} // end namespace internal + + +// Fixed size +#ifndef EIGEN_EMULATE_CXX11_META_H +template +struct Sizes : internal::numeric_list { + typedef internal::numeric_list Base; + static const std::ptrdiff_t total_size = internal::arg_prod(Indices...); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t rank() const { + return Base::count; + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t TotalSize() { + return internal::arg_prod(Indices...); + } + + EIGEN_DEVICE_FUNC Sizes() { } + template + explicit EIGEN_DEVICE_FUNC Sizes(const array& /*indices*/) { + // todo: add assertion + } +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC Sizes(DenseIndex...) { } + explicit EIGEN_DEVICE_FUNC Sizes(std::initializer_list /*l*/) { + // todo: add assertion + } +#endif + + template Sizes& operator = (const T& /*other*/) { + // add assertion failure if the size of other is different + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t operator[] (const std::size_t index) const { + return internal::fixed_size_tensor_index_extraction_helper::run(index, *this); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t IndexOfColMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t IndexOfRowMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *static_cast(this)); + } +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes&) { + return Sizes::total_size; +} +} + +#else + +template +struct non_zero_size { + typedef internal::type2val type; +}; +template <> +struct non_zero_size<0> { + typedef internal::null_type type; +}; + +template struct Sizes { + typedef typename internal::make_type_list::type, typename non_zero_size::type, typename non_zero_size::type, typename non_zero_size::type, typename non_zero_size::type >::type Base; + static const size_t count = Base::count; + static const std::size_t total_size = internal::arg_prod::value; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { + return count; + } + + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t TotalSize() { + return internal::arg_prod::value; + } + + Sizes() { } + template + explicit Sizes(const array& /*indices*/) { + // todo: add assertion + } + template Sizes& operator = (const T& /*other*/) { + // add assertion failure if the size of other is different + return *this; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template Sizes(DenseIndex... /*indices*/) { } + explicit Sizes(std::initializer_list) { + // todo: add assertion + } +#else + EIGEN_DEVICE_FUNC explicit Sizes(const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { + } + EIGEN_DEVICE_FUNC Sizes(const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex, const DenseIndex) { + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex operator[] (const int index) const { + switch (index) { + case 0: + return internal::get<0, Base>::value; + case 1: + return internal::get<1, Base>::value; + case 2: + return internal::get<2, Base>::value; + case 3: + return internal::get<3, Base>::value; + case 4: + return internal::get<4, Base>::value; + default: + eigen_assert(false && "index overflow"); + return static_cast(-1); + } + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t IndexOfColMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + size_t IndexOfRowMajor(const array& indices) const { + return internal::fixed_size_tensor_index_linearization_helper::run(indices, *reinterpret_cast(this)); + } +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_prod(const Sizes&) { + return Sizes::total_size; +} +} + +#endif + +// Boilerplate +namespace internal { +template +struct tensor_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, array const& dimensions) + { + return array_get(indices) + + array_get(dimensions) * + tensor_index_linearization_helper::run(indices, dimensions); + } +}; + +template +struct tensor_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, array const&) + { + return array_get(indices); + } +}; +} // end namespace internal + + + +// Dynamic size +template +struct DSizes : array { + typedef array Base; + static const int count = NumDims; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE size_t rank() const { + return NumDims; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const { + return (NumDims == 0) ? 1 : internal::array_prod(*static_cast(this)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DSizes() { + for (int i = 0 ; i < NumDims; ++i) { + (*this)[i] = 0; + } + } + EIGEN_DEVICE_FUNC explicit DSizes(const array& a) : Base(a) { } + + EIGEN_DEVICE_FUNC explicit DSizes(const DenseIndex i0) { + eigen_assert(NumDims == 1); + (*this)[0] = i0; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE explicit DSizes(DenseIndex firstDimension, DenseIndex secondDimension, IndexTypes... otherDimensions) : Base({{firstDimension, secondDimension, otherDimensions...}}) { + EIGEN_STATIC_ASSERT(sizeof...(otherDimensions) + 2 == NumDims, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#else + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1) { + eigen_assert(NumDims == 2); + (*this)[0] = i0; + (*this)[1] = i1; + } + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2) { + eigen_assert(NumDims == 3); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + } + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3) { + eigen_assert(NumDims == 4); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + (*this)[3] = i3; + } + EIGEN_DEVICE_FUNC DSizes(const DenseIndex i0, const DenseIndex i1, const DenseIndex i2, const DenseIndex i3, const DenseIndex i4) { + eigen_assert(NumDims == 5); + (*this)[0] = i0; + (*this)[1] = i1; + (*this)[2] = i2; + (*this)[3] = i3; + (*this)[4] = i4; + } +#endif + + EIGEN_DEVICE_FUNC DSizes& operator = (const array& other) { + *static_cast(this) = other; + return *this; + } + + // A constexpr would be so much better here + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfColMajor(const array& indices) const { + return internal::tensor_index_linearization_helper::run(indices, *static_cast(this)); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex IndexOfRowMajor(const array& indices) const { + return internal::tensor_index_linearization_helper::run(indices, *static_cast(this)); + } +}; + + + + +// Boilerplate +namespace internal { +template +struct tensor_vsize_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, std::vector const& dimensions) + { + return array_get(indices) + + array_get(dimensions) * + tensor_vsize_index_linearization_helper::run(indices, dimensions); + } +}; + +template +struct tensor_vsize_index_linearization_helper +{ + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Index run(array const& indices, std::vector const&) + { + return array_get(indices); + } +}; +} // end namespace internal + + +namespace internal { + +template struct array_size > { + static const size_t value = NumDims; +}; +template struct array_size > { + static const size_t value = NumDims; +}; +#ifndef EIGEN_EMULATE_CXX11_META_H +template struct array_size > { +static const std::ptrdiff_t value = Sizes::count; +}; +template struct array_size > { +static const std::ptrdiff_t value = Sizes::count; +}; +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes&) { + return get >::value; +} +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_get(const Sizes<>&) { + eigen_assert(false && "should never be called"); + return -1; +} +#else +template struct array_size > { + static const size_t value = Sizes::count; +}; +template struct array_size > { + static const size_t value = Sizes::count; +}; +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::size_t array_get(const Sizes&) { + return get::Base>::value; +} + +#endif + + +template +struct sizes_match_below_dim { + static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { + return false; + } +}; +template +struct sizes_match_below_dim { + static EIGEN_DEVICE_FUNC inline bool run(Dims1& dims1, Dims2& dims2) { + return (array_get(dims1) == array_get(dims2)) & + sizes_match_below_dim::run(dims1, dims2); + } +}; +template +struct sizes_match_below_dim { + static EIGEN_DEVICE_FUNC inline bool run(Dims1&, Dims2&) { + return true; + } +}; + +} // end namespace internal + + +template +EIGEN_DEVICE_FUNC bool dimensions_match(Dims1& dims1, Dims2& dims2) { + return internal::sizes_match_below_dim::value, internal::array_size::value>::run(dims1, dims2); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DIMENSIONS_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h new file mode 100644 index 0000000..0698713 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -0,0 +1,181 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H +#define EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H + +namespace Eigen { + +/** \class TensorForcedEval + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reshaping class. + * + * + */ +namespace internal { +template class MakePointer_> +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; + template + struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_ MakePointerT; + typedef typename MakePointerT::Type Type; + }; +}; + +template class MakePointer_> +struct eval, Eigen::Dense> +{ + typedef const TensorEvalToOp& type; +}; + +template class MakePointer_> +struct nested, 1, typename eval >::type> +{ + typedef TensorEvalToOp type; +}; + +} // end namespace internal + + + + +template class MakePointer_> +class TensorEvalToOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename MakePointer_::Type PointerType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr) + : m_xpr(expr), m_buffer(buffer) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; } + + protected: + typename XprType::Nested m_xpr; + PointerType m_buffer; +}; + + + +template class MakePointer_> +struct TensorEvaluator, Device> +{ + typedef TensorEvalToOp XprType; + typedef typename ArgType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename XprType::Index Index; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = true + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_device(device), + m_buffer(op.buffer()), m_op(op), m_expression(op.expression()) + { } + + // Used for accessor extraction in SYCL Managed TensorMap: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const { + return m_op; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { + } + + typedef typename internal::traits >::template MakePointer::Type DevicePointer; + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) { + EIGEN_UNUSED_VARIABLE(scalar); + eigen_assert(scalar == NULL); + return m_impl.evalSubExprsIfNeeded(m_buffer); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalScalar(Index i) { + m_buffer[i] = m_impl.coeff(i); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalPacket(Index i) { + internal::pstoret(m_buffer + i, m_impl.template packet::IsAligned ? Aligned : Unaligned>(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_buffer[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return internal::ploadt(m_buffer + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + // We assume that evalPacket or evalScalar is called to perform the + // assignment and account for the cost of the write here. + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; } + ArgType expression() const { return m_expression; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_impl; } + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} + + private: + TensorEvaluator m_impl; + const Device& m_device; + DevicePointer m_buffer; + const XprType& m_op; + const ArgType m_expression; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EVAL_TO_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h new file mode 100644 index 0000000..834ce07 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -0,0 +1,633 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H + +namespace Eigen { + +/** \class TensorEvaluator + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor evaluator classes. + * + * These classes are responsible for the evaluation of the tensor expression. + * + * TODO: add support for more types of expressions, in particular expressions + * leading to lvalues (slicing, reshaping, etc...) + */ + +// Generic evaluator +template +struct TensorEvaluator +{ + typedef typename Derived::Index Index; + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename Derived::Dimensions Dimensions; + + // NumDimensions is -1 for variable dim tensors + static const int NumCoords = internal::traits::NumDimensions > 0 ? + internal::traits::NumDimensions : 0; + + enum { + IsAligned = Derived::IsAligned, + PacketAccess = (internal::unpacket_traits::size > 1), + Layout = Derived::Layout, + CoordAccess = NumCoords > 0, + RawAccess = true + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) + : m_data(const_cast::template MakePointer::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) + { } + + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { + if (dest) { + m_device.memcpy((void*)dest, m_data, sizeof(Scalar) * m_dims.TotalSize()); + return false; + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + eigen_assert(m_data); + return m_data[index]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + eigen_assert(m_data); + return m_data[index]; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketReturnType packet(Index index) const + { + return internal::ploadt(m_data + index); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + return internal::pstoret(m_data + index, x); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { + eigen_assert(m_data); + if (static_cast(Layout) == static_cast(ColMajor)) { + return m_data[m_dims.IndexOfColMajor(coords)]; + } else { + return m_data[m_dims.IndexOfRowMajor(coords)]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(const array& coords) { + eigen_assert(m_data); + if (static_cast(Layout) == static_cast(ColMajor)) { + return m_data[m_dims.IndexOfColMajor(coords)]; + } else { + return m_data[m_dims.IndexOfRowMajor(coords)]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, + internal::unpacket_traits::size); + } + + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } + + /// required by sycl in order to construct sycl buffer from raw pointer + const Device& device() const{return m_device;} + + protected: + typename internal::traits::template MakePointer::Type m_data; + Dimensions m_dims; + const Device& m_device; + const Derived& m_impl; +}; + +namespace { +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T loadConstant(const T* address) { + return *address; +} +// Use the texture cache on CUDA devices whenever possible +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float loadConstant(const float* address) { + return __ldg(address); +} +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double loadConstant(const double* address) { + return __ldg(address); +} +template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +Eigen::half loadConstant(const Eigen::half* address) { + return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); +} +#endif +} + + +// Default evaluator for rvalues +template +struct TensorEvaluator +{ + typedef typename Derived::Index Index; + typedef typename Derived::Scalar Scalar; + typedef typename Derived::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef typename Derived::Dimensions Dimensions; + + // NumDimensions is -1 for variable dim tensors + static const int NumCoords = internal::traits::NumDimensions > 0 ? + internal::traits::NumDimensions : 0; + + enum { + IsAligned = Derived::IsAligned, + PacketAccess = (internal::unpacket_traits::size > 1), + Layout = Derived::Layout, + CoordAccess = NumCoords > 0, + RawAccess = true + }; + + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) + : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + if (!NumTraits::type>::RequireInitialization && data) { + m_device.memcpy((void*)data, m_data, m_dims.TotalSize() * sizeof(Scalar)); + return false; + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + eigen_assert(m_data); + return loadConstant(m_data+index); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + PacketReturnType packet(Index index) const + { + return internal::ploadt_ro(m_data + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { + eigen_assert(m_data); + const Index index = (static_cast(Layout) == static_cast(ColMajor)) ? m_dims.IndexOfColMajor(coords) + : m_dims.IndexOfRowMajor(coords); + return loadConstant(m_data+index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, + internal::unpacket_traits::size); + } + + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } + + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} + + protected: + typename internal::traits::template MakePointer::Type m_data; + Dimensions m_dims; + const Device& m_device; + const Derived& m_impl; +}; + + + + +// -------------------- CwiseNullaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseNullaryOp XprType; + + enum { + IsAligned = true, + PacketAccess = internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC + TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { return true; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_wrapper(m_functor, index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_wrapper.template packetOp(m_functor, index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, + internal::unpacket_traits::size); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_argImpl; } + /// required by sycl in order to extract the accessor + NullaryOp functor() const { return m_functor; } + + + private: + const NullaryOp m_functor; + TensorEvaluator m_argImpl; + const internal::nullary_wrapper m_wrapper; +}; + + + +// -------------------- CwiseUnaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseUnaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression(), device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_argImpl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + m_argImpl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_argImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_argImpl.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_argImpl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_argImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator & impl() const { return m_argImpl; } + /// added for sycl in order to construct the buffer from sycl device + UnaryOp functor() const { return m_functor; } + + + private: + const UnaryOp m_functor; + TensorEvaluator m_argImpl; +}; + + +// -------------------- CwiseBinaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseBinaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_leftImpl(op.lhsExpression(), device), + m_rightImpl(op.rhsExpression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(dimensions_match(m_leftImpl.dimensions(), m_rightImpl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use right impl instead if right impl dimensions are known at compile time. + return m_leftImpl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_leftImpl.evalSubExprsIfNeeded(NULL); + m_rightImpl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_leftImpl.cleanup(); + m_rightImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_leftImpl.coeff(index), m_rightImpl.coeff(index)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_leftImpl.template packet(index), m_rightImpl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_leftImpl.costPerCoeff(vectorized) + + m_rightImpl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& right_impl() const { return m_rightImpl; } + /// required by sycl in order to extract the accessor + BinaryOp functor() const { return m_functor; } + + private: + const BinaryOp m_functor; + TensorEvaluator m_leftImpl; + TensorEvaluator m_rightImpl; +}; + +// -------------------- CwiseTernaryOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorCwiseTernaryOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::functor_traits::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_functor(op.functor()), + m_arg1Impl(op.arg1Expression(), device), + m_arg2Impl(op.arg2Expression(), device), + m_arg3Impl(op.arg3Expression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout) || internal::traits::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::Index, + typename internal::traits::Index>::value), + STORAGE_INDEX_MUST_MATCH) + + eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use arg2 or arg3 dimensions if they are known at compile time. + return m_arg1Impl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_arg1Impl.evalSubExprsIfNeeded(NULL); + m_arg2Impl.evalSubExprsIfNeeded(NULL); + m_arg3Impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_arg1Impl.cleanup(); + m_arg2Impl.cleanup(); + m_arg3Impl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_functor.packetOp(m_arg1Impl.template packet(index), + m_arg2Impl.template packet(index), + m_arg3Impl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + const double functor_cost = internal::functor_traits::Cost; + return m_arg1Impl.costPerCoeff(vectorized) + + m_arg2Impl.costPerCoeff(vectorized) + + m_arg3Impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, functor_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator & arg1Impl() const { return m_arg1Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& arg2Impl() const { return m_arg2Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& arg3Impl() const { return m_arg3Impl; } + + private: + const TernaryOp m_functor; + TensorEvaluator m_arg1Impl; + TensorEvaluator m_arg2Impl; + TensorEvaluator m_arg3Impl; +}; + + +// -------------------- SelectOp -------------------- + +template +struct TensorEvaluator, Device> +{ + typedef TensorSelectOp XprType; + typedef typename XprType::Scalar Scalar; + + enum { + IsAligned = TensorEvaluator::IsAligned & TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess & TensorEvaluator::PacketAccess & + internal::packet_traits::HasBlend, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + : m_condImpl(op.ifExpression(), device), + m_thenImpl(op.thenExpression(), device), + m_elseImpl(op.elseExpression(), device) + { + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(dimensions_match(m_condImpl.dimensions(), m_thenImpl.dimensions())); + eigen_assert(dimensions_match(m_thenImpl.dimensions(), m_elseImpl.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename internal::traits::Scalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + typedef typename TensorEvaluator::Dimensions Dimensions; + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const + { + // TODO: use then or else impl instead if they happen to be known at compile time. + return m_condImpl.dimensions(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_condImpl.evalSubExprsIfNeeded(NULL); + m_thenImpl.evalSubExprsIfNeeded(NULL); + m_elseImpl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_condImpl.cleanup(); + m_thenImpl.cleanup(); + m_elseImpl.cleanup(); + } + + EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const + { + return m_condImpl.coeff(index) ? m_thenImpl.coeff(index) : m_elseImpl.coeff(index); + } + template + EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const + { + internal::Selector select; + for (Index i = 0; i < PacketSize; ++i) { + select.select[i] = m_condImpl.coeff(index+i); + } + return internal::pblend(select, + m_thenImpl.template packet(index), + m_elseImpl.template packet(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return m_condImpl.costPerCoeff(vectorized) + + m_thenImpl.costPerCoeff(vectorized) + .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator & cond_impl() const { return m_condImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& then_impl() const { return m_thenImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& else_impl() const { return m_elseImpl; } + + private: + TensorEvaluator m_condImpl; + TensorEvaluator m_thenImpl; + TensorEvaluator m_elseImpl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EVALUATOR_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h new file mode 100644 index 0000000..f01d77c --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -0,0 +1,288 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H + +namespace Eigen { + +/** \class TensorExecutor + * \ingroup CXX11_Tensor_Module + * + * \brief The tensor executor class. + * + * This class is responsible for launch the evaluation of the expression on + * the specified computing device. + */ +namespace internal { + +// Default strategy: the expression is evaluated with a single cpu thread. +template +class TensorExecutor +{ + public: + typedef typename Expression::Index Index; + EIGEN_DEVICE_FUNC + static inline void run(const Expression& expr, const Device& device = Device()) + { + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) + { + const Index size = array_prod(evaluator.dimensions()); + for (Index i = 0; i < size; ++i) { + evaluator.evalScalar(i); + } + } + evaluator.cleanup(); + } +}; + + +template +class TensorExecutor +{ + public: + typedef typename Expression::Index Index; + EIGEN_DEVICE_FUNC + static inline void run(const Expression& expr, const DefaultDevice& device = DefaultDevice()) + { + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) + { + const Index size = array_prod(evaluator.dimensions()); + const int PacketSize = unpacket_traits::PacketReturnType>::size; + // Give the compiler a strong hint to unroll the loop. But don't insist + // on unrolling, because if the function is expensive the compiler should not + // unroll the loop at the expense of inlining. + const Index UnrolledSize = (size / (4 * PacketSize)) * 4 * PacketSize; + for (Index i = 0; i < UnrolledSize; i += 4*PacketSize) { + for (Index j = 0; j < 4; j++) { + evaluator.evalPacket(i + j * PacketSize); + } + } + const Index VectorizedSize = (size / PacketSize) * PacketSize; + for (Index i = UnrolledSize; i < VectorizedSize; i += PacketSize) { + evaluator.evalPacket(i); + } + for (Index i = VectorizedSize; i < size; ++i) { + evaluator.evalScalar(i); + } + } + evaluator.cleanup(); + } +}; + + + +// Multicore strategy: the index space is partitioned and each partition is executed on a single core +#ifdef EIGEN_USE_THREADS +template +struct EvalRange { + static void run(Evaluator* evaluator_in, const Index first, const Index last) { + Evaluator evaluator = *evaluator_in; + eigen_assert(last >= first); + for (Index i = first; i < last; ++i) { + evaluator.evalScalar(i); + } + } + + static Index alignBlockSize(Index size) { + return size; + } +}; + +template +struct EvalRange { + static const int PacketSize = unpacket_traits::size; + + static void run(Evaluator* evaluator_in, const Index first, const Index last) { + Evaluator evaluator = *evaluator_in; + eigen_assert(last >= first); + Index i = first; + if (last - first >= PacketSize) { + eigen_assert(first % PacketSize == 0); + Index last_chunk_offset = last - 4 * PacketSize; + // Give the compiler a strong hint to unroll the loop. But don't insist + // on unrolling, because if the function is expensive the compiler should not + // unroll the loop at the expense of inlining. + for (; i <= last_chunk_offset; i += 4*PacketSize) { + for (Index j = 0; j < 4; j++) { + evaluator.evalPacket(i + j * PacketSize); + } + } + last_chunk_offset = last - PacketSize; + for (; i <= last_chunk_offset; i += PacketSize) { + evaluator.evalPacket(i); + } + } + for (; i < last; ++i) { + evaluator.evalScalar(i); + } + } + + static Index alignBlockSize(Index size) { + // Align block size to packet size and account for unrolling in run above. + if (size >= 16 * PacketSize) { + return (size + 4 * PacketSize - 1) & ~(4 * PacketSize - 1); + } + // Aligning to 4 * PacketSize would increase block size by more than 25%. + return (size + PacketSize - 1) & ~(PacketSize - 1); + } +}; + +template +class TensorExecutor { + public: + typedef typename Expression::Index Index; + static inline void run(const Expression& expr, const ThreadPoolDevice& device) + { + typedef TensorEvaluator Evaluator; + Evaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) + { + const Index size = array_prod(evaluator.dimensions()); +#if !defined(EIGEN_USE_SIMPLE_THREAD_POOL) + device.parallelFor(size, evaluator.costPerCoeff(Vectorizable), + EvalRange::alignBlockSize, + [&evaluator](Index first, Index last) { + EvalRange::run(&evaluator, first, last); + }); +#else + size_t num_threads = device.numThreads(); + if (num_threads > 1) { + num_threads = TensorCostModel::numThreads( + size, evaluator.costPerCoeff(Vectorizable), num_threads); + } + if (num_threads == 1) { + EvalRange::run(&evaluator, 0, size); + } else { + const Index PacketSize = Vectorizable ? unpacket_traits::size : 1; + Index blocksz = std::ceil(static_cast(size)/num_threads) + PacketSize - 1; + const Index blocksize = numext::maxi(PacketSize, (blocksz - (blocksz % PacketSize))); + const Index numblocks = size / blocksize; + + Barrier barrier(numblocks); + for (int i = 0; i < numblocks; ++i) { + device.enqueue_with_barrier( + &barrier, &EvalRange::run, + &evaluator, i * blocksize, (i + 1) * blocksize); + } + if (numblocks * blocksize < size) { + EvalRange::run( + &evaluator, numblocks * blocksize, size); + } + barrier.Wait(); + } +#endif // defined(!EIGEN_USE_SIMPLE_THREAD_POOL) + } + evaluator.cleanup(); + } +}; +#endif // EIGEN_USE_THREADS + + +// GPU: the evaluation of the expression is offloaded to a GPU. +#if defined(EIGEN_USE_GPU) + +template +class TensorExecutor { + public: + typedef typename Expression::Index Index; + static void run(const Expression& expr, const GpuDevice& device); +}; + + +#if defined(__CUDACC__) +template +struct EigenMetaKernelEval { + static __device__ EIGEN_ALWAYS_INLINE + void run(Evaluator& eval, Index first, Index last, Index step_size) { + for (Index i = first; i < last; i += step_size) { + eval.evalScalar(i); + } + } +}; + +template +struct EigenMetaKernelEval { + static __device__ EIGEN_ALWAYS_INLINE + void run(Evaluator& eval, Index first, Index last, Index step_size) { + const Index PacketSize = unpacket_traits::size; + const Index vectorized_size = (last / PacketSize) * PacketSize; + const Index vectorized_step_size = step_size * PacketSize; + + // Use the vector path + for (Index i = first * PacketSize; i < vectorized_size; + i += vectorized_step_size) { + eval.evalPacket(i); + } + for (Index i = vectorized_size + first; i < last; i += step_size) { + eval.evalScalar(i); + } + } +}; + +template +__global__ void +__launch_bounds__(1024) +EigenMetaKernel(Evaluator eval, Index size) { + + const Index first_index = blockIdx.x * blockDim.x + threadIdx.x; + const Index step_size = blockDim.x * gridDim.x; + + const bool vectorizable = Evaluator::PacketAccess & Evaluator::IsAligned; + EigenMetaKernelEval::run(eval, first_index, size, step_size); +} + +/*static*/ +template +inline void TensorExecutor::run( + const Expression& expr, const GpuDevice& device) { + TensorEvaluator evaluator(expr, device); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { + const int block_size = device.maxCudaThreadsPerBlock(); + const int max_blocks = device.getNumCudaMultiProcessors() * + device.maxCudaThreadsPerMultiProcessor() / block_size; + const Index size = array_prod(evaluator.dimensions()); + // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. + const int num_blocks = numext::maxi(numext::mini(max_blocks, divup(size, block_size)), 1); + + LAUNCH_CUDA_KERNEL( + (EigenMetaKernel, Index>), + num_blocks, block_size, 0, device, evaluator, size); + } + evaluator.cleanup(); +} + +#endif // __CUDACC__ +#endif // EIGEN_USE_GPU + +// SYCL Executor policy +#ifdef EIGEN_USE_SYCL + +template +class TensorExecutor { +public: + static inline void run(const Expression &expr, const SyclDevice &device) { + // call TensorSYCL module + TensorSycl::run(expr, device); + } +}; + +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EXECUTOR_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h new file mode 100644 index 0000000..85dfc7a --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h @@ -0,0 +1,371 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_EXPR_H +#define EIGEN_CXX11_TENSOR_TENSOR_EXPR_H + +namespace Eigen { + +/** \class TensorExpr + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor expression classes. + * + * The TensorCwiseNullaryOp class applies a nullary operators to an expression. + * This is typically used to generate constants. + * + * The TensorCwiseUnaryOp class represents an expression where a unary operator + * (e.g. cwiseSqrt) is applied to an expression. + * + * The TensorCwiseBinaryOp class represents an expression where a binary + * operator (e.g. addition) is applied to a lhs and a rhs expression. + * + */ +namespace internal { +template +struct traits > + : traits +{ + typedef traits XprTraits; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::Nested XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; +}; + +} // end namespace internal + + + +template +class TensorCwiseNullaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef TensorCwiseNullaryOp Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseNullaryOp(const XprType& xpr, const NullaryOp& func = NullaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + nestedExpression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + const NullaryOp& functor() const { return m_functor; } + + protected: + typename XprType::Nested m_xpr; + const NullaryOp m_functor; +}; + + + +namespace internal { +template +struct traits > + : traits +{ + // TODO(phli): Add InputScalar, InputPacket. Check references to + // current Scalar/Packet to see if the intent is Input or Output. + typedef typename result_of::type Scalar; + typedef traits XprTraits; + typedef typename XprType::Nested XprTypeNested; + typedef typename remove_reference::type _XprTypeNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseUnaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseUnaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseUnaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + // TODO(phli): Add InputScalar, InputPacket. Check references to + // current Scalar/Packet to see if the intent is Input or Output. + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const UnaryOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + nestedExpression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const UnaryOp m_functor; +}; + + +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs + // are different. + // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to + // current Scalar/Packet to see if the intent is Inputs or Output. + typedef typename result_of< + BinaryOp(typename LhsXprType::Scalar, + typename RhsXprType::Scalar)>::type Scalar; + typedef traits XprTraits; + typedef typename promote_storage_type< + typename traits::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type< + typename traits::Index, + typename traits::Index>::type Index; + typedef typename LhsXprType::Nested LhsNested; + typedef typename RhsXprType::Nested RhsNested; + typedef typename remove_reference::type _LhsNested; + typedef typename remove_reference::type _RhsNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseBinaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseBinaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseBinaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + // TODO(phli): Add Lhs/RhsScalar, Lhs/RhsPacket. Check references to + // current Scalar/Packet to see if the intent is Inputs or Output. + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseBinaryOp(const LhsXprType& lhs, const RhsXprType& rhs, const BinaryOp& func = BinaryOp()) + : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const BinaryOp& functor() const { return m_functor; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + lhsExpression() const { return m_lhs_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + rhsExpression() const { return m_rhs_xpr; } + + protected: + typename LhsXprType::Nested m_lhs_xpr; + typename RhsXprType::Nested m_rhs_xpr; + const BinaryOp m_functor; +}; + + +namespace internal { +template +struct traits > +{ + // Type promotion to handle the case where the types of the args are different. + typedef typename result_of< + TernaryOp(typename Arg1XprType::Scalar, + typename Arg2XprType::Scalar, + typename Arg3XprType::Scalar)>::type Scalar; + typedef traits XprTraits; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename Arg1XprType::Nested Arg1Nested; + typedef typename Arg2XprType::Nested Arg2Nested; + typedef typename Arg3XprType::Nested Arg3Nested; + typedef typename remove_reference::type _Arg1Nested; + typedef typename remove_reference::type _Arg2Nested; + typedef typename remove_reference::type _Arg3Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorCwiseTernaryOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorCwiseTernaryOp type; +}; + +} // end namespace internal + + + +template +class TensorCwiseTernaryOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef Scalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp()) + : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {} + + EIGEN_DEVICE_FUNC + const TernaryOp& functor() const { return m_functor; } + + /** \returns the nested expressions */ + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg1Expression() const { return m_arg1_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg2Expression() const { return m_arg2_xpr; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + arg3Expression() const { return m_arg3_xpr; } + + protected: + typename Arg1XprType::Nested m_arg1_xpr; + typename Arg2XprType::Nested m_arg2_xpr; + typename Arg3XprType::Nested m_arg3_xpr; + const TernaryOp m_functor; +}; + + +namespace internal { +template +struct traits > + : traits +{ + typedef typename traits::Scalar Scalar; + typedef traits XprTraits; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + typedef typename IfXprType::Nested IfNested; + typedef typename ThenXprType::Nested ThenNested; + typedef typename ElseXprType::Nested ElseNested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorSelectOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorSelectOp type; +}; + +} // end namespace internal + + +template +class TensorSelectOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::promote_storage_type::ret CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC + TensorSelectOp(const IfXprType& a_condition, + const ThenXprType& a_then, + const ElseXprType& a_else) + : m_condition(a_condition), m_then(a_then), m_else(a_else) + { } + + EIGEN_DEVICE_FUNC + const IfXprType& ifExpression() const { return m_condition; } + + EIGEN_DEVICE_FUNC + const ThenXprType& thenExpression() const { return m_then; } + + EIGEN_DEVICE_FUNC + const ElseXprType& elseExpression() const { return m_else; } + + protected: + typename IfXprType::Nested m_condition; + typename ThenXprType::Nested m_then; + typename ElseXprType::Nested m_else; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_EXPR_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h new file mode 100644 index 0000000..08eb559 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -0,0 +1,651 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Jianwei Cui +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H +#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H + +// This code requires the ability to initialize arrays of constant +// values directly inside a class. +#if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900 + +namespace Eigen { + +/** \class TensorFFT + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor FFT class. + * + * TODO: + * Vectorize the Cooley Tukey and the Bluestein algorithm + * Add support for multithreaded evaluation + * Improve the performance on GPU + */ + +template struct MakeComplex { + template + EIGEN_DEVICE_FUNC + T operator() (const T& val) const { return val; } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const T& val) const { return std::complex(val, 0); } +}; + +template <> struct MakeComplex { + template + EIGEN_DEVICE_FUNC + std::complex operator() (const std::complex& val) const { return val; } +}; + +template struct PartOf { + template T operator() (const T& val) const { return val; } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.real(); } +}; + +template <> struct PartOf { + template T operator() (const std::complex& val) const { return val.imag(); } +}; + +namespace internal { +template +struct traits > : public traits { + typedef traits XprTraits; + typedef typename NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename XprTraits::Scalar InputScalar; + typedef typename conditional::type OutputScalar; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> { + typedef const TensorFFTOp& type; +}; + +template +struct nested, 1, typename eval >::type> { + typedef TensorFFTOp type; +}; + +} // end namespace internal + +template +class TensorFFTOp : public TensorBase, ReadOnlyAccessors> { + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) + : m_xpr(expr), m_fft(fft) {} + + EIGEN_DEVICE_FUNC + const FFT& fft() const { return m_fft; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& expression() const { + return m_xpr; + } + + protected: + typename XprType::Nested m_xpr; + const FFT m_fft; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> { + typedef TensorFFTOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename std::complex ComplexScalar; + typedef typename TensorEvaluator::Dimensions InputDimensions; + typedef internal::traits XprTraits; + typedef typename XprTraits::Scalar InputScalar; + typedef typename internal::conditional::type OutputScalar; + typedef OutputScalar CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = true, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) { + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + for (int i = 0; i < NumDims; ++i) { + eigen_assert(input_dims[i] > 0); + m_dimensions[i] = input_dims[i]; + } + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_strides[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; + } + } + m_size = m_dimensions.TotalSize(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { + return m_dimensions; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { + m_impl.evalSubExprsIfNeeded(NULL); + if (data) { + evalToBuf(data); + return false; + } else { + m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); + evalToBuf(m_data); + return true; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + if (m_data) { + m_device.deallocate(m_data); + m_data = NULL; + } + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { + return m_data[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType + packet(Index index) const { + return internal::ploadt(m_data + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } + + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { + const bool write_to_out = internal::is_same::value; + ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); + + for (Index i = 0; i < m_size; ++i) { + buf[i] = MakeComplex::value>()(m_impl.coeff(i)); + } + + for (size_t i = 0; i < m_fft.size(); ++i) { + Index dim = m_fft[i]; + eigen_assert(dim >= 0 && dim < NumDims); + Index line_len = m_dimensions[dim]; + eigen_assert(line_len >= 1); + ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); + const bool is_power_of_two = isPowerOfTwo(line_len); + const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); + const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); + + ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); + ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); + if (!is_power_of_two) { + // Compute twiddle factors + // t_n = exp(sqrt(-1) * pi * n^2 / line_len) + // for n = 0, 1,..., line_len-1. + // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2 + pos_j_base_powered[0] = ComplexScalar(1, 0); + if (line_len > 1) { + const RealScalar pi_over_len(EIGEN_PI / line_len); + const ComplexScalar pos_j_base = ComplexScalar( + std::cos(pi_over_len), std::sin(pi_over_len)); + pos_j_base_powered[1] = pos_j_base; + if (line_len > 2) { + const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; + for (int j = 2; j < line_len + 1; ++j) { + pos_j_base_powered[j] = pos_j_base_powered[j - 1] * + pos_j_base_powered[j - 1] / + pos_j_base_powered[j - 2] * pos_j_base_sq; + } + } + } + } + + for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { + const Index base_offset = getBaseOffsetFromIndex(partial_index, dim); + + // get data into line_buf + const Index stride = m_strides[dim]; + if (stride == 1) { + memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar)); + } else { + Index offset = base_offset; + for (int j = 0; j < line_len; ++j, offset += stride) { + line_buf[j] = buf[offset]; + } + } + + // processs the line + if (is_power_of_two) { + processDataLineCooleyTukey(line_buf, line_len, log_len); + } + else { + processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); + } + + // write back + if (FFTDir == FFT_FORWARD && stride == 1) { + memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar)); + } else { + Index offset = base_offset; + const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0); + for (int j = 0; j < line_len; ++j, offset += stride) { + buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor; + } + } + } + m_device.deallocate(line_buf); + if (!is_power_of_two) { + m_device.deallocate(a); + m_device.deallocate(b); + m_device.deallocate(pos_j_base_powered); + } + } + + if(!write_to_out) { + for (Index i = 0; i < m_size; ++i) { + data[i] = PartOf()(buf[i]); + } + m_device.deallocate(buf); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { + eigen_assert(x > 0); + return !(x & (x - 1)); + } + + // The composite number for padding, used in Bluestein's FFT algorithm + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { + Index i = 2; + while (i < 2 * n - 1) i *= 2; + return i; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { + Index log2m = 0; + while (m >>= 1) log2m++; + return log2m; + } + + // Call Cooley Tukey algorithm directly, data length must be power of 2 + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { + eigen_assert(isPowerOfTwo(line_len)); + scramble_FFT(line_buf, line_len); + compute_1D_Butterfly(line_buf, line_len, log_len); + } + + // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { + Index n = line_len; + Index m = good_composite; + ComplexScalar* data = line_buf; + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + a[i] = data[i] * numext::conj(pos_j_base_powered[i]); + } + else { + a[i] = data[i] * pos_j_base_powered[i]; + } + } + for (Index i = n; i < m; ++i) { + a[i] = ComplexScalar(0, 0); + } + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[i]; + } + else { + b[i] = numext::conj(pos_j_base_powered[i]); + } + } + for (Index i = n; i < m - n; ++i) { + b[i] = ComplexScalar(0, 0); + } + for (Index i = m - n; i < m; ++i) { + if(FFTDir == FFT_FORWARD) { + b[i] = pos_j_base_powered[m-i]; + } + else { + b[i] = numext::conj(pos_j_base_powered[m-i]); + } + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + scramble_FFT(b, m); + compute_1D_Butterfly(b, m, log_len); + + for (Index i = 0; i < m; ++i) { + a[i] *= b[i]; + } + + scramble_FFT(a, m); + compute_1D_Butterfly(a, m, log_len); + + //Do the scaling after ifft + for (Index i = 0; i < m; ++i) { + a[i] /= m; + } + + for (Index i = 0; i < n; ++i) { + if(FFTDir == FFT_FORWARD) { + data[i] = a[i] * numext::conj(pos_j_base_powered[i]); + } + else { + data[i] = a[i] * pos_j_base_powered[i]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { + eigen_assert(isPowerOfTwo(n)); + Index j = 1; + for (Index i = 1; i < n; ++i){ + if (j > i) { + std::swap(data[j-1], data[i-1]); + } + Index m = n >> 1; + while (m >= 2 && j > m) { + j -= m; + m >>= 1; + } + j += m; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) { + ComplexScalar tmp = data[1]; + data[1] = data[0] - data[1]; + data[0] += tmp; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) { + ComplexScalar tmp[4]; + tmp[0] = data[0] + data[1]; + tmp[1] = data[0] - data[1]; + tmp[2] = data[2] + data[3]; + if (Dir == FFT_FORWARD) { + tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); + } else { + tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); + } + data[0] = tmp[0] + tmp[2]; + data[1] = tmp[1] + tmp[3]; + data[2] = tmp[0] - tmp[2]; + data[3] = tmp[1] - tmp[3]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) { + ComplexScalar tmp_1[8]; + ComplexScalar tmp_2[8]; + + tmp_1[0] = data[0] + data[1]; + tmp_1[1] = data[0] - data[1]; + tmp_1[2] = data[2] + data[3]; + if (Dir == FFT_FORWARD) { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); + } else { + tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); + } + tmp_1[4] = data[4] + data[5]; + tmp_1[5] = data[4] - data[5]; + tmp_1[6] = data[6] + data[7]; + if (Dir == FFT_FORWARD) { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); + } else { + tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); + } + tmp_2[0] = tmp_1[0] + tmp_1[2]; + tmp_2[1] = tmp_1[1] + tmp_1[3]; + tmp_2[2] = tmp_1[0] - tmp_1[2]; + tmp_2[3] = tmp_1[1] - tmp_1[3]; + tmp_2[4] = tmp_1[4] + tmp_1[6]; +// SQRT2DIV2 = sqrt(2)/2 +#define SQRT2DIV2 0.7071067811865476 + if (Dir == FFT_FORWARD) { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); + } else { + tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); + tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); + tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); + } + data[0] = tmp_2[0] + tmp_2[4]; + data[1] = tmp_2[1] + tmp_2[5]; + data[2] = tmp_2[2] + tmp_2[6]; + data[3] = tmp_2[3] + tmp_2[7]; + data[4] = tmp_2[0] - tmp_2[4]; + data[5] = tmp_2[1] - tmp_2[5]; + data[6] = tmp_2[2] - tmp_2[6]; + data[7] = tmp_2[3] - tmp_2[7]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge( + ComplexScalar* data, Index n, Index n_power_of_2) { + // Original code: + // RealScalar wtemp = std::sin(M_PI/n); + // RealScalar wpi = -std::sin(2 * M_PI/n); + const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; + const RealScalar wpi = (Dir == FFT_FORWARD) + ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2] + : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; + + const ComplexScalar wp(wtemp, wpi); + const ComplexScalar wp_one = wp + ComplexScalar(1, 0); + const ComplexScalar wp_one_2 = wp_one * wp_one; + const ComplexScalar wp_one_3 = wp_one_2 * wp_one; + const ComplexScalar wp_one_4 = wp_one_3 * wp_one; + const Index n2 = n / 2; + ComplexScalar w(1.0, 0.0); + for (Index i = 0; i < n2; i += 4) { + ComplexScalar temp0(data[i + n2] * w); + ComplexScalar temp1(data[i + 1 + n2] * w * wp_one); + ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2); + ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3); + w = w * wp_one_4; + + data[i + n2] = data[i] - temp0; + data[i] += temp0; + + data[i + 1 + n2] = data[i + 1] - temp1; + data[i + 1] += temp1; + + data[i + 2 + n2] = data[i + 2] - temp2; + data[i + 2] += temp2; + + data[i + 3 + n2] = data[i + 3] - temp3; + data[i + 3] += temp3; + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly( + ComplexScalar* data, Index n, Index n_power_of_2) { + eigen_assert(isPowerOfTwo(n)); + if (n > 8) { + compute_1D_Butterfly(data, n / 2, n_power_of_2 - 1); + compute_1D_Butterfly(data + n / 2, n / 2, n_power_of_2 - 1); + butterfly_1D_merge(data, n, n_power_of_2); + } else if (n == 8) { + butterfly_8(data); + } else if (n == 4) { + butterfly_4(data); + } else if (n == 2) { + butterfly_2(data); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { + Index result = 0; + + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > omitted_dim; --i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + else { + for (Index i = 0; i < omitted_dim; ++i) { + const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; + const Index idx = index / partial_m_stride; + index -= idx * partial_m_stride; + result += idx * m_strides[i]; + } + result += index; + } + // Value of index_coords[omitted_dim] is not determined to this step + return result; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { + Index result = base + offset * m_strides[omitted_dim] ; + return result; + } + + protected: + Index m_size; + const FFT& m_fft; + Dimensions m_dimensions; + array m_strides; + TensorEvaluator m_impl; + CoeffReturnType* m_data; + const Device& m_device; + + // This will support a maximum FFT size of 2^32 for each dimension + // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; + const RealScalar m_sin_PI_div_n_LUT[32] = { + RealScalar(0.0), + RealScalar(-2), + RealScalar(-0.999999999999999), + RealScalar(-0.292893218813453), + RealScalar(-0.0761204674887130), + RealScalar(-0.0192147195967696), + RealScalar(-0.00481527332780311), + RealScalar(-0.00120454379482761), + RealScalar(-3.01181303795779e-04), + RealScalar(-7.52981608554592e-05), + RealScalar(-1.88247173988574e-05), + RealScalar(-4.70619042382852e-06), + RealScalar(-1.17654829809007e-06), + RealScalar(-2.94137117780840e-07), + RealScalar(-7.35342821488550e-08), + RealScalar(-1.83835707061916e-08), + RealScalar(-4.59589268710903e-09), + RealScalar(-1.14897317243732e-09), + RealScalar(-2.87243293150586e-10), + RealScalar( -7.18108232902250e-11), + RealScalar(-1.79527058227174e-11), + RealScalar(-4.48817645568941e-12), + RealScalar(-1.12204411392298e-12), + RealScalar(-2.80511028480785e-13), + RealScalar(-7.01277571201985e-14), + RealScalar(-1.75319392800498e-14), + RealScalar(-4.38298482001247e-15), + RealScalar(-1.09574620500312e-15), + RealScalar(-2.73936551250781e-16), + RealScalar(-6.84841378126949e-17), + RealScalar(-1.71210344531737e-17), + RealScalar(-4.28025861329343e-18) + }; + + // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); + const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { + RealScalar(0.0), + RealScalar(0.0), + RealScalar(-1.00000000000000e+00), + RealScalar(-7.07106781186547e-01), + RealScalar(-3.82683432365090e-01), + RealScalar(-1.95090322016128e-01), + RealScalar(-9.80171403295606e-02), + RealScalar(-4.90676743274180e-02), + RealScalar(-2.45412285229123e-02), + RealScalar(-1.22715382857199e-02), + RealScalar(-6.13588464915448e-03), + RealScalar(-3.06795676296598e-03), + RealScalar(-1.53398018628477e-03), + RealScalar(-7.66990318742704e-04), + RealScalar(-3.83495187571396e-04), + RealScalar(-1.91747597310703e-04), + RealScalar(-9.58737990959773e-05), + RealScalar(-4.79368996030669e-05), + RealScalar(-2.39684498084182e-05), + RealScalar(-1.19842249050697e-05), + RealScalar(-5.99211245264243e-06), + RealScalar(-2.99605622633466e-06), + RealScalar(-1.49802811316901e-06), + RealScalar(-7.49014056584716e-07), + RealScalar(-3.74507028292384e-07), + RealScalar(-1.87253514146195e-07), + RealScalar(-9.36267570730981e-08), + RealScalar(-4.68133785365491e-08), + RealScalar(-2.34066892682746e-08), + RealScalar(-1.17033446341373e-08), + RealScalar(-5.85167231706864e-09), + RealScalar(-2.92583615853432e-09) + }; +}; + +} // end namespace Eigen + +#endif // EIGEN_HAS_CONSTEXPR + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h new file mode 100644 index 0000000..fcee5f6 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -0,0 +1,389 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H +#define EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H + +namespace Eigen { + +/** \class TensorFixedSize + * \ingroup CXX11_Tensor_Module + * + * \brief The fixed sized version of the tensor class. + * + * The fixed sized equivalent of + * Eigen::Tensor t(3, 5, 7); + * is + * Eigen::TensorFixedSize> t; + */ + +template +class TensorFixedSize : public TensorBase > +{ + public: + typedef TensorFixedSize Self; + typedef TensorBase > Base; + typedef typename Eigen::internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef Scalar_ Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename Base::CoeffReturnType CoeffReturnType; + + static const int Options = Options_; + + enum { + IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0), + Layout = Options_ & RowMajor ? RowMajor : ColMajor, + CoordAccess = true, + RawAccess = true + }; + + typedef Dimensions_ Dimensions; + static const std::size_t NumIndices = Dimensions::count; + + protected: + TensorStorage m_storage; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rank() const { return NumIndices; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index dimension(std::size_t n) const { return m_storage.dimensions()[n]; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_storage.dimensions(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_storage.size(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar *data() { return m_storage.data(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar *data() const { return m_storage.data(); } + + // This makes EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + // work, because that uses base().coeffRef() - and we don't yet + // implement a similar class hierarchy + inline Self& base() { return *this; } + inline const Self& base() const { return *this; } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeff(Index firstIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeff(array{{firstIndex, otherIndices...}}); + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(const array& indices) const + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& coeff() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index firstIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(array{{firstIndex, otherIndices...}}); + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(const array& indices) + { + eigen_internal_assert(checkIndexRange(indices)); + return m_storage.data()[linearizedIndex(indices)]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_storage.data()[index]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& coeffRef() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return m_storage.data()[0]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) const + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return this->operator()(array{{firstIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + if (Options&RowMajor) { + const Index index = i1 + i0 * m_storage.dimensions()[1]; + return m_storage.data()[index]; + } else { + const Index index = i0 + i1 * m_storage.dimensions()[0]; + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + if (Options&RowMajor) { + const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + if (Options&RowMajor) { + const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + if (Options&RowMajor) { + const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); + return m_storage.data()[index]; + } + } +#endif + + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const + { + eigen_assert(checkIndexRange(indices)); + return coeff(indices); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return coeff(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator[](Index index) const + { + // The bracket operator is only for vectors, use the parenthesis operator instead. + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeff(index); + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, IndexTypes... otherIndices) + { + // The number of indices used to access a tensor coefficient must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 1 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + return operator()(array{{firstIndex, otherIndices...}}); + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + if (Options&RowMajor) { + const Index index = i1 + i0 * m_storage.dimensions()[1]; + return m_storage.data()[index]; + } else { + const Index index = i0 + i1 * m_storage.dimensions()[0]; + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + if (Options&RowMajor) { + const Index index = i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * i2); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + if (Options&RowMajor) { + const Index index = i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0)); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * i3)); + return m_storage.data()[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + if (Options&RowMajor) { + const Index index = i4 + m_storage.dimensions()[4] * (i3 + m_storage.dimensions()[3] * (i2 + m_storage.dimensions()[2] * (i1 + m_storage.dimensions()[1] * i0))); + return m_storage.data()[index]; + } else { + const Index index = i0 + m_storage.dimensions()[0] * (i1 + m_storage.dimensions()[1] * (i2 + m_storage.dimensions()[2] * (i3 + m_storage.dimensions()[3] * i4))); + return m_storage.data()[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(const array& indices) + { + eigen_assert(checkIndexRange(indices)); + return coeffRef(indices); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index index) + { + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE); + return coeffRef(); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator[](Index index) + { + // The bracket operator is only for vectors, use the parenthesis operator instead + EIGEN_STATIC_ASSERT(NumIndices == 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + return coeffRef(index); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize() + : m_storage() + { + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize(const Self& other) + : m_storage(other.m_storage) + { + } + +#if EIGEN_HAS_RVALUE_REFERENCES + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFixedSize(Self&& other) + : m_storage(other.m_storage) + { + } +#endif + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize(const TensorBase& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other.derived()); + internal::TensorExecutor::run(assign, DefaultDevice()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize& operator=(const TensorFixedSize& other) + { + // FIXME: check that the dimensions of other match the dimensions of *this. + // Unfortunately this isn't possible yet when the rhs is an expression. + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorFixedSize& operator=(const OtherDerived& other) + { + // FIXME: check that the dimensions of other match the dimensions of *this. + // Unfortunately this isn't possible yet when the rhs is an expression. + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE bool checkIndexRange(const array& /*indices*/) const + { + using internal::array_apply_and_reduce; + using internal::array_zip_and_reduce; + using internal::greater_equal_zero_op; + using internal::logical_and_op; + using internal::lesser_op; + + return true; + // check whether the indices are all >= 0 + /* array_apply_and_reduce(indices) && + // check whether the indices fit in the dimensions + array_zip_and_reduce(indices, m_storage.dimensions());*/ + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index linearizedIndex(const array& indices) const + { + if (Options&RowMajor) { + return m_storage.dimensions().IndexOfRowMajor(indices); + } else { + return m_storage.dimensions().IndexOfColMajor(indices); + } + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FIXED_SIZE_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h new file mode 100644 index 0000000..bbd5eb3 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -0,0 +1,167 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H +#define EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H + +namespace Eigen { + +/** \class TensorForcedEval + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reshaping class. + * + * + */ +/// template class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_ is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. +namespace internal { +template class MakePointer_> +struct traits > +{ + // Type promotion to handle the case where the types of the lhs and the rhs are different. + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; + + enum { + Flags = 0 + }; + template struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_ MakePointerT; + typedef typename MakePointerT::Type Type; + }; +}; + +template class MakePointer_> +struct eval, Eigen::Dense> +{ + typedef const TensorForcedEvalOp& type; +}; + +template class MakePointer_> +struct nested, 1, typename eval >::type> +{ + typedef TensorForcedEvalOp type; +}; + +} // end namespace internal + + + +template class MakePointer_> +class TensorForcedEvalOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorForcedEvalOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; +}; + + +template class MakePointer_> +struct TensorEvaluator, Device> +{ + typedef TensorForcedEvalOp XprType; + typedef typename ArgType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions Dimensions; + typedef typename XprType::Index Index; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = true, + PacketAccess = (PacketSize > 1), + Layout = TensorEvaluator::Layout, + RawAccess = true + }; + + EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + /// op_ is used for sycl + : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) + { } + + EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + const Index numValues = internal::array_prod(m_impl.dimensions()); + m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); + // Should initialize the memory in case we're dealing with non POD types. + if (NumTraits::RequireInitialization) { + for (Index i = 0; i < numValues; ++i) { + new(m_buffer+i) CoeffReturnType(); + } + } + typedef TensorEvalToOp< const typename internal::remove_const::type > EvalTo; + EvalTo evalToTmp(m_buffer, m_op); + const bool PacketAccess = internal::IsVectorizable::value; + internal::TensorExecutor::type, PacketAccess>::run(evalToTmp, m_device); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_device.deallocate(m_buffer); + m_buffer = NULL; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_buffer[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return internal::ploadt(m_buffer + index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC typename MakePointer::Type data() const { return m_buffer; } + + /// required by sycl in order to extract the sycl accessor + const TensorEvaluator& impl() { return m_impl; } + /// used by sycl in order to build the sycl buffer + const Device& device() const{return m_device;} + private: + TensorEvaluator m_impl; + const ArgType m_op; + const Device& m_device; + typename MakePointer::Type m_buffer; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FORCED_EVAL_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h new file mode 100644 index 0000000..52b803d --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -0,0 +1,109 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H + +namespace Eigen { + +// MakePointer class is used as a container of the adress space of the pointer +// on the host and on the device. From the host side it generates the T* pointer +// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to +// T* m_data on the host. It is always called on the device. +// Specialisation of MakePointer class for creating the sycl buffer with +// map_allocator. +template struct MakePointer { + typedef T* Type; +}; + +template class MakePointer_ = MakePointer> class TensorMap; +template class Tensor; +template class TensorFixedSize; +template class TensorRef; +template class TensorBase; + +template class TensorCwiseNullaryOp; +template class TensorCwiseUnaryOp; +template class TensorCwiseBinaryOp; +template class TensorCwiseTernaryOp; +template class TensorSelectOp; +template class MakePointer_ = MakePointer > class TensorReductionOp; +template class TensorIndexTupleOp; +template class TensorTupleReducerOp; +template class TensorConcatenationOp; +template class TensorContractionOp; +template class TensorConversionOp; +template class TensorConvolutionOp; +template class TensorFFTOp; +template class TensorPatchOp; +template class TensorImagePatchOp; +template class TensorVolumePatchOp; +template class TensorBroadcastingOp; +template class TensorChippingOp; +template class TensorReshapingOp; +template class TensorLayoutSwapOp; +template class TensorSlicingOp; +template class TensorReverseOp; +template class TensorPaddingOp; +template class TensorShufflingOp; +template class TensorStridingOp; +template class TensorStridingSlicingOp; +template class TensorInflationOp; +template class TensorGeneratorOp; +template class TensorAssignOp; +template class TensorScanOp; + +template class TensorCustomUnaryOp; +template class TensorCustomBinaryOp; + +template class MakePointer_ = MakePointer> class TensorEvalToOp; +template class MakePointer_ = MakePointer> class TensorForcedEvalOp; + +template class TensorDevice; +template struct TensorEvaluator; + +struct DefaultDevice; +struct ThreadPoolDevice; +struct GpuDevice; +struct SyclDevice; + +enum FFTResultType { + RealPart = 0, + ImagPart = 1, + BothParts = 2 +}; + +enum FFTDirection { + FFT_FORWARD = 0, + FFT_REVERSE = 1 +}; + + +namespace internal { + +template +struct IsVectorizable { + static const bool value = TensorEvaluator::PacketAccess; +}; + +template +struct IsVectorizable { + static const bool value = TensorEvaluator::PacketAccess && + TensorEvaluator::IsAligned; +}; + +template ::value> +class TensorExecutor; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FORWARD_DECLARATIONS_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h new file mode 100644 index 0000000..d73f6dc --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -0,0 +1,489 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H +#define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H + +namespace Eigen { +namespace internal { + + +/** \internal + * \brief Template functor to compute the modulo between an array and a scalar. + */ +template +struct scalar_mod_op { + EIGEN_DEVICE_FUNC scalar_mod_op(const Scalar& divisor) : m_divisor(divisor) {} + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a % m_divisor; } + const Scalar m_divisor; +}; +template +struct functor_traits > +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; + + +/** \internal + * \brief Template functor to compute the modulo between 2 arrays. + */ +template +struct scalar_mod2_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op); + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } +}; +template +struct functor_traits > +{ enum { Cost = scalar_div_cost::value, PacketAccess = false }; }; + +template +struct scalar_fmod_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar + operator()(const Scalar& a, const Scalar& b) const { + return numext::fmod(a, b); + } +}; +template +struct functor_traits > { + enum { Cost = 13, // Reciprocal throughput of FPREM on Haswell. + PacketAccess = false }; +}; + + +/** \internal + * \brief Template functor to compute the sigmoid of a scalar + * \sa class CwiseUnaryOp, ArrayBase::sigmoid() + */ +template +struct scalar_sigmoid_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& x) const { + const T one = T(1); + return one / (one + numext::exp(-x)); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(const Packet& x) const { + const Packet one = pset1(T(1)); + return pdiv(one, padd(one, pexp(pnegate(x)))); + } +}; + +template +struct functor_traits > { + enum { + Cost = NumTraits::AddCost * 2 + NumTraits::MulCost * 6, + PacketAccess = packet_traits::HasAdd && packet_traits::HasDiv && + packet_traits::HasNegate && packet_traits::HasExp + }; +}; + + +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + +// Standard reduction functors +template struct SumReducer +{ + static const bool PacketAccess = packet_traits::HasAdd; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + internal::scalar_sum_op sum_op; + *accum = sum_op(*accum, t); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = padd(*accum, p); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + internal::scalar_cast_op conv; + return conv(0); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + internal::scalar_sum_op sum_op; + return sum_op(saccum, predux(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd + }; +}; + + +template struct MeanReducer +{ + static const bool PacketAccess = packet_traits::HasAdd && !NumTraits::IsInteger; + static const bool IsStateful = true; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + MeanReducer() : scalarCount_(0), packetCount_(0) { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { + internal::scalar_sum_op sum_op; + *accum = sum_op(*accum, t); + scalarCount_++; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { + (*accum) = padd(*accum, p); + packetCount_++; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + internal::scalar_cast_op conv; + return conv(0); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum / scalarCount_; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return pdiv(vaccum, pset1(packetCount_)); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + internal::scalar_sum_op sum_op; + return sum_op(saccum, predux(vaccum)) / (scalarCount_ + packetCount_ * unpacket_traits::size); + } + + protected: + DenseIndex scalarCount_; + DenseIndex packetCount_; +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasAdd + }; +}; + + +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::lowest(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return -Eigen::NumTraits::infinity(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::highest(); + } +}; +template +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits::infinity(); + } +}; + + +template struct MaxReducer +{ + static const bool PacketAccess = packet_traits::HasMax; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t > *accum) { *accum = t; } + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmax(*accum, p); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return MinMaxBottomValue::IsInteger>::bottom_value(); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + return numext::maxi(saccum, predux_max(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMax + }; +}; + + +template struct MinReducer +{ + static const bool PacketAccess = packet_traits::HasMin; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t < *accum) { *accum = t; } + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmin(*accum, p); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return MinMaxBottomValue::IsInteger>::bottom_value(); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + return numext::mini(saccum, predux_min(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = PacketType::HasMin + }; +}; + + +template struct ProdReducer +{ + static const bool PacketAccess = packet_traits::HasMul; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + internal::scalar_product_op prod_op; + (*accum) = prod_op(*accum, t); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { + (*accum) = pmul(*accum, p); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + internal::scalar_cast_op conv; + return conv(1); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1(initialize()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + return accum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return vaccum; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + internal::scalar_product_op prod_op; + return prod_op(saccum, predux_mul(vaccum)); + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::MulCost, + PacketAccess = PacketType::HasMul + }; +}; + + +struct AndReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum && t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + +struct OrReducer { + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { + *accum = *accum || t; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool initialize() const { + return false; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool finalize(bool accum) const { + return accum; + } +}; + +template +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + +// Argmin/Argmax reducers +template struct ArgMaxTupleReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { + if (t.second > accum->second) { *accum = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits::lowest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + + +template struct ArgMinTupleReducer +{ + static const bool PacketAccess = false; + static const bool IsStateful = false; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T& t, T* accum) const { + if (t.second < accum->second) { *accum = t; } + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return T(0, NumTraits::highest()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T& accum) const { + return accum; + } +}; + +template +struct reducer_traits, Device> { + enum { + Cost = NumTraits::AddCost, + PacketAccess = false + }; +}; + + +template +class GaussianGenerator { + public: + static const bool PacketAccess = false; + + EIGEN_DEVICE_FUNC GaussianGenerator(const array& means, + const array& std_devs) + : m_means(means) + { + for (size_t i = 0; i < NumDims; ++i) { + m_two_sigmas[i] = std_devs[i] * std_devs[i] * 2; + } + } + + EIGEN_DEVICE_FUNC T operator()(const array& coordinates) const { + T tmp = T(0); + for (size_t i = 0; i < NumDims; ++i) { + T offset = coordinates[i] - m_means[i]; + tmp += offset * offset / m_two_sigmas[i]; + } + return numext::exp(-tmp); + } + + private: + array m_means; + array m_two_sigmas; +}; + +template +struct functor_traits > { + enum { + Cost = NumDims * (2 * NumTraits::AddCost + NumTraits::MulCost + + functor_traits >::Cost) + + functor_traits >::Cost, + PacketAccess = GaussianGenerator::PacketAccess + }; +}; + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h new file mode 100644 index 0000000..eb1d493 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -0,0 +1,185 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H +#define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H + +namespace Eigen { + +/** \class TensorGenerator + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor generator class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorGeneratorOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorGeneratorOp type; +}; + +} // end namespace internal + + + +template +class TensorGeneratorOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorGeneratorOp(const XprType& expr, const Generator& generator) + : m_xpr(expr), m_generator(generator) {} + + EIGEN_DEVICE_FUNC + const Generator& generator() const { return m_generator; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Generator m_generator; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorGeneratorOp XprType; + typedef typename XprType::Index Index; + typedef typename TensorEvaluator::Dimensions Dimensions; + static const int NumDims = internal::array_size::value; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + enum { + IsAligned = false, + PacketAccess = (internal::unpacket_traits::size > 1), + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_generator(op.generator()) + { + TensorEvaluator impl(op.expression(), device); + m_dimensions = impl.dimensions(); + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_strides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_strides[NumDims - 1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + array coords; + extract_coordinates(index, coords); + return m_generator(coords); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool) const { + // TODO(rmlarsen): This is just a placeholder. Define interface to make + // generators return their cost. + return TensorOpCost(0, 0, TensorOpCost::AddCost() + + TensorOpCost::MulCost()); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void extract_coordinates(Index index, array& coords) const { + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_strides[i]; + index -= idx * m_strides[i]; + coords[i] = idx; + } + coords[0] = index; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_strides[i]; + index -= idx * m_strides[i]; + coords[i] = idx; + } + coords[NumDims-1] = index; + } + } + + Dimensions m_dimensions; + array m_strides; + Generator m_generator; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h new file mode 100644 index 0000000..665b861 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H +#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H + +namespace Eigen { + +/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors. + * + * This function computes the regularized incomplete beta function (integral). + * + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const + TensorCwiseTernaryOp, + const ADerived, const BDerived, const XDerived> + betainc(const ADerived& a, const BDerived& b, const XDerived& x) { + return TensorCwiseTernaryOp< + internal::scalar_betainc_op, const ADerived, + const BDerived, const XDerived>( + a, b, x, internal::scalar_betainc_op()); +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h new file mode 100644 index 0000000..a901c5d --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -0,0 +1,79 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_IO_H +#define EIGEN_CXX11_TENSOR_TENSOR_IO_H + +namespace Eigen { + +namespace internal { + +// Print the tensor as a 2d matrix +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions()); + static const int layout = Tensor::Layout; + Map > matrix(const_cast(tensor.data()), first_dim, total_size/first_dim); + os << matrix; + } + } +}; + + +// Print the tensor as a vector +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + typedef typename internal::remove_const::type Scalar; + typedef typename Tensor::Index Index; + const Index total_size = internal::array_prod(tensor.dimensions()); + if (total_size > 0) { + Map > array(const_cast(tensor.data()), total_size); + os << array; + } + } +}; + + +// Print the tensor as a scalar +template +struct TensorPrinter { + static void run (std::ostream& os, const Tensor& tensor) { + os << tensor.coeff(0); + } +}; +} + +template +std::ostream& operator << (std::ostream& os, const TensorBase& expr) { + typedef TensorEvaluator, DefaultDevice> Evaluator; + typedef typename Evaluator::Dimensions Dimensions; + + // Evaluate the expression if needed + TensorForcedEvalOp eval = expr.eval(); + Evaluator tensor(eval, DefaultDevice()); + tensor.evalSubExprsIfNeeded(NULL); + + // Print the result + static const int rank = internal::array_size::value; + internal::TensorPrinter::run(os, tensor); + + // Cleanup. + tensor.cleanup(); + return os; +} + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_IO_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h new file mode 100644 index 0000000..566856e --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -0,0 +1,509 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H +#define EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H + +namespace Eigen { + +/** \class TensorImagePatch + * \ingroup CXX11_Tensor_Module + * + * \brief Patch extraction specialized for image processing. + * This assumes that the input has a least 3 dimensions ordered as follow: + * 1st dimension: channels (of size d) + * 2nd dimension: rows (of size r) + * 3rd dimension: columns (of size c) + * There can be additional dimensions such as time (for video) or batch (for + * bulk processing after the first 3. + * Calling the image patch code with patch_rows and patch_cols is equivalent + * to calling the regular patch extraction code with parameters d, patch_rows, + * patch_cols, and 1 for all the additional dimensions. + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename internal::remove_const::type Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions + 1; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorImagePatchOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorImagePatchOp type; +}; + +} // end namespace internal + +template +class TensorImagePatchOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + PaddingType padding_type, Scalar padding_value) + : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_row_strides(row_strides), m_col_strides(col_strides), + m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(false), m_padding_top(0), m_padding_bottom(0), m_padding_left(0), m_padding_right(0), + m_padding_type(padding_type), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorImagePatchOp(const XprType& expr, DenseIndex patch_rows, DenseIndex patch_cols, + DenseIndex row_strides, DenseIndex col_strides, + DenseIndex in_row_strides, DenseIndex in_col_strides, + DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, + DenseIndex padding_top, DenseIndex padding_bottom, + DenseIndex padding_left, DenseIndex padding_right, + Scalar padding_value) + : m_xpr(expr), m_patch_rows(patch_rows), m_patch_cols(patch_cols), + m_row_strides(row_strides), m_col_strides(col_strides), + m_in_row_strides(in_row_strides), m_in_col_strides(in_col_strides), + m_row_inflate_strides(row_inflate_strides), m_col_inflate_strides(col_inflate_strides), + m_padding_explicit(true), m_padding_top(padding_top), m_padding_bottom(padding_bottom), + m_padding_left(padding_left), m_padding_right(padding_right), + m_padding_type(PADDING_VALID), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC + DenseIndex patch_rows() const { return m_patch_rows; } + EIGEN_DEVICE_FUNC + DenseIndex patch_cols() const { return m_patch_cols; } + EIGEN_DEVICE_FUNC + DenseIndex row_strides() const { return m_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_strides() const { return m_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_row_strides() const { return m_in_row_strides; } + EIGEN_DEVICE_FUNC + DenseIndex in_col_strides() const { return m_in_col_strides; } + EIGEN_DEVICE_FUNC + DenseIndex row_inflate_strides() const { return m_row_inflate_strides; } + EIGEN_DEVICE_FUNC + DenseIndex col_inflate_strides() const { return m_col_inflate_strides; } + EIGEN_DEVICE_FUNC + bool padding_explicit() const { return m_padding_explicit; } + EIGEN_DEVICE_FUNC + DenseIndex padding_top() const { return m_padding_top; } + EIGEN_DEVICE_FUNC + DenseIndex padding_bottom() const { return m_padding_bottom; } + EIGEN_DEVICE_FUNC + DenseIndex padding_left() const { return m_padding_left; } + EIGEN_DEVICE_FUNC + DenseIndex padding_right() const { return m_padding_right; } + EIGEN_DEVICE_FUNC + PaddingType padding_type() const { return m_padding_type; } + EIGEN_DEVICE_FUNC + Scalar padding_value() const { return m_padding_value; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const DenseIndex m_patch_rows; + const DenseIndex m_patch_cols; + const DenseIndex m_row_strides; + const DenseIndex m_col_strides; + const DenseIndex m_in_row_strides; + const DenseIndex m_in_col_strides; + const DenseIndex m_row_inflate_strides; + const DenseIndex m_col_inflate_strides; + const bool m_padding_explicit; + const DenseIndex m_padding_top; + const DenseIndex m_padding_bottom; + const DenseIndex m_padding_left; + const DenseIndex m_padding_right; + const PaddingType m_padding_type; + const Scalar m_padding_value; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorImagePatchOp XprType; + typedef typename XprType::Index Index; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims + 1; + typedef DSizes Dimensions; + typedef typename internal::remove_const::type Scalar; + typedef TensorEvaluator, + Device> Self; + typedef TensorEvaluator Impl; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + EIGEN_STATIC_ASSERT((NumDims >= 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + + m_paddingValue = op.padding_value(); + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + + // Caches a few variables. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputDepth = input_dims[0]; + m_inputRows = input_dims[1]; + m_inputCols = input_dims[2]; + } else { + m_inputDepth = input_dims[NumInputDims-1]; + m_inputRows = input_dims[NumInputDims-2]; + m_inputCols = input_dims[NumInputDims-3]; + } + + m_row_strides = op.row_strides(); + m_col_strides = op.col_strides(); + + // Input strides and effective input/patch size + m_in_row_strides = op.in_row_strides(); + m_in_col_strides = op.in_col_strides(); + m_row_inflate_strides = op.row_inflate_strides(); + m_col_inflate_strides = op.col_inflate_strides(); + // The "effective" input rows and input cols are the input rows and cols + // after inflating them with zeros. + // For examples, a 2x3 matrix with row_inflate_strides and + // col_inflate_strides of 2 comes from: + // A B C + // D E F + // + // to a matrix is 3 x 5: + // + // A . B . C + // . . . . . + // D . E . F + + m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1; + m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1; + m_patch_rows_eff = op.patch_rows() + (op.patch_rows() - 1) * (m_in_row_strides - 1); + m_patch_cols_eff = op.patch_cols() + (op.patch_cols() - 1) * (m_in_col_strides - 1); + + if (op.padding_explicit()) { + m_outputRows = numext::ceil((m_input_rows_eff + op.padding_top() + op.padding_bottom() - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff + op.padding_left() + op.padding_right() - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + m_rowPaddingTop = op.padding_top(); + m_colPaddingLeft = op.padding_left(); + } else { + // Computing padding from the type + switch (op.padding_type()) { + case PADDING_VALID: + m_outputRows = numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) / static_cast(m_row_strides)); + m_outputCols = numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) / static_cast(m_col_strides)); + // Calculate the padding + m_rowPaddingTop = numext::maxi(0, ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2); + m_colPaddingLeft = numext::maxi(0, ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2); + break; + case PADDING_SAME: + m_outputRows = numext::ceil(m_input_rows_eff / static_cast(m_row_strides)); + m_outputCols = numext::ceil(m_input_cols_eff / static_cast(m_col_strides)); + // Calculate the padding + m_rowPaddingTop = ((m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff) / 2; + m_colPaddingLeft = ((m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff) / 2; + break; + default: + eigen_assert(false && "unexpected padding"); + } + } + eigen_assert(m_outputRows > 0); + eigen_assert(m_outputCols > 0); + + // Dimensions for result of extraction. + if (static_cast(Layout) == static_cast(ColMajor)) { + // ColMajor + // 0: depth + // 1: patch_rows + // 2: patch_cols + // 3: number of patches + // 4 and beyond: anything else (such as batch). + m_dimensions[0] = input_dims[0]; + m_dimensions[1] = op.patch_rows(); + m_dimensions[2] = op.patch_cols(); + m_dimensions[3] = m_outputRows * m_outputCols; + for (int i = 4; i < NumDims; ++i) { + m_dimensions[i] = input_dims[i-1]; + } + } else { + // RowMajor + // NumDims-1: depth + // NumDims-2: patch_rows + // NumDims-3: patch_cols + // NumDims-4: number of patches + // NumDims-5 and beyond: anything else (such as batch). + m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; + m_dimensions[NumDims-2] = op.patch_rows(); + m_dimensions[NumDims-3] = op.patch_cols(); + m_dimensions[NumDims-4] = m_outputRows * m_outputCols; + for (int i = NumDims-5; i >= 0; --i) { + m_dimensions[i] = input_dims[i]; + } + } + + // Strides for moving the patch in various dimensions. + if (static_cast(Layout) == static_cast(ColMajor)) { + m_colStride = m_dimensions[1]; + m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0]; + m_otherStride = m_patchStride * m_dimensions[3]; + } else { + m_colStride = m_dimensions[NumDims-2]; + m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1]; + m_otherStride = m_patchStride * m_dimensions[NumDims-4]; + } + + // Strides for navigating through the input tensor. + m_rowInputStride = m_inputDepth; + m_colInputStride = m_inputDepth * m_inputRows; + m_patchInputStride = m_inputDepth * m_inputRows * m_inputCols; + + // Fast representations of different variables. + m_fastOtherStride = internal::TensorIntDivisor(m_otherStride); + m_fastPatchStride = internal::TensorIntDivisor(m_patchStride); + m_fastColStride = internal::TensorIntDivisor(m_colStride); + m_fastInflateRowStride = internal::TensorIntDivisor(m_row_inflate_strides); + m_fastInflateColStride = internal::TensorIntDivisor(m_col_inflate_strides); + m_fastInputColsEff = internal::TensorIntDivisor(m_input_cols_eff); + + // Number of patches in the width dimension. + m_fastOutputRows = internal::TensorIntDivisor(m_outputRows); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[0]); + } else { + m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[NumDims-1]); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + // Patch index corresponding to the passed in index. + const Index patchIndex = index / m_fastPatchStride; + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth; + + // Other ways to index this element. + const Index otherIndex = (NumDims == 4) ? 0 : index / m_fastOtherStride; + const Index patch2DIndex = (NumDims == 4) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride; + + // Calculate col index in the input original tensor. + const Index colIndex = patch2DIndex / m_fastOutputRows; + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft; + const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInflateColStride) : 0); + if (inputCol < 0 || inputCol >= m_input_cols_eff || + ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) { + return Scalar(m_paddingValue); + } + + // Calculate row index in the original input tensor. + const Index rowIndex = patch2DIndex - colIndex * m_outputRows; + const Index rowOffset = patchOffset - colOffset * m_colStride; + const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop; + const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInflateRowStride) : 0); + if (inputRow < 0 || inputRow >= m_input_rows_eff || + ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) { + return Scalar(m_paddingValue); + } + + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + + const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex * m_patchInputStride; + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1) { + return packetWithPossibleZero(index); + } + + const Index indices[2] = {index, index + PacketSize - 1}; + const Index patchIndex = indices[0] / m_fastPatchStride; + if (patchIndex != indices[1] / m_fastPatchStride) { + return packetWithPossibleZero(index); + } + const Index otherIndex = (NumDims == 4) ? 0 : indices[0] / m_fastOtherStride; + eigen_assert(otherIndex == indices[1] / m_fastOtherStride); + + // Find the offset of the element wrt the location of the first element. + const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth, + (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth}; + + const Index patch2DIndex = (NumDims == 4) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride; + eigen_assert(patch2DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride); + + const Index colIndex = patch2DIndex / m_fastOutputRows; + const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride}; + + // Calculate col indices in the original input tensor. + const Index inputCols[2] = {colIndex * m_col_strides + colOffsets[0] - + m_colPaddingLeft, colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft}; + if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputCols[0] == inputCols[1]) { + const Index rowIndex = patch2DIndex - colIndex * m_outputRows; + const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride}; + eigen_assert(rowOffsets[0] <= rowOffsets[1]); + // Calculate col indices in the original input tensor. + const Index inputRows[2] = {rowIndex * m_row_strides + rowOffsets[0] - + m_rowPaddingTop, rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop}; + + if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) { + return internal::pset1(Scalar(m_paddingValue)); + } + + if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { + // no padding + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index]; + const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride; + return m_impl.template packet(inputIndex); + } + } + + return packetWithPossibleZero(index); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + const TensorEvaluator& impl() const { return m_impl; } + + Index rowPaddingTop() const { return m_rowPaddingTop; } + Index colPaddingLeft() const { return m_colPaddingLeft; } + Index outputRows() const { return m_outputRows; } + Index outputCols() const { return m_outputCols; } + Index userRowStride() const { return m_row_strides; } + Index userColStride() const { return m_col_strides; } + Index userInRowStride() const { return m_in_row_strides; } + Index userInColStride() const { return m_in_col_strides; } + Index rowInflateStride() const { return m_row_inflate_strides; } + Index colInflateStride() const { return m_col_inflate_strides; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost + costPerCoeff(bool vectorized) const { + // We conservatively estimate the cost for the code path where the computed + // index is inside the original image and + // TensorEvaluator::CoordAccess is false. + const double compute_cost = 3 * TensorOpCost::DivCost() + + 6 * TensorOpCost::MulCost() + + 8 * TensorOpCost::MulCost(); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + Dimensions m_dimensions; + + Index m_otherStride; + Index m_patchStride; + Index m_colStride; + Index m_row_strides; + Index m_col_strides; + + Index m_in_row_strides; + Index m_in_col_strides; + Index m_row_inflate_strides; + Index m_col_inflate_strides; + + Index m_input_rows_eff; + Index m_input_cols_eff; + Index m_patch_rows_eff; + Index m_patch_cols_eff; + + internal::TensorIntDivisor m_fastOtherStride; + internal::TensorIntDivisor m_fastPatchStride; + internal::TensorIntDivisor m_fastColStride; + internal::TensorIntDivisor m_fastInflateRowStride; + internal::TensorIntDivisor m_fastInflateColStride; + internal::TensorIntDivisor m_fastInputColsEff; + + Index m_rowInputStride; + Index m_colInputStride; + Index m_patchInputStride; + + Index m_inputDepth; + Index m_inputRows; + Index m_inputCols; + + Index m_outputRows; + Index m_outputCols; + + Index m_rowPaddingTop; + Index m_colPaddingLeft; + + internal::TensorIntDivisor m_fastOutputRows; + internal::TensorIntDivisor m_fastOutputDepth; + + Scalar m_paddingValue; + + TensorEvaluator m_impl; +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_IMAGE_PATCH_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h new file mode 100644 index 0000000..3209fec --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h @@ -0,0 +1,725 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H +#define EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H + + +#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES + +#define EIGEN_HAS_INDEX_LIST + +namespace Eigen { + +/** \internal + * + * \class TensorIndexList + * \ingroup CXX11_Tensor_Module + * + * \brief Set of classes used to encode a set of Tensor dimensions/indices. + * + * The indices in the list can be known at compile time or at runtime. A mix + * of static and dynamic indices can also be provided if needed. The tensor + * code will attempt to take advantage of the indices that are known at + * compile time to optimize the code it generates. + * + * This functionality requires a c++11 compliant compiler. If your compiler + * is older you need to use arrays of indices instead. + * + * Several examples are provided in the cxx11_tensor_index_list.cpp file. + * + * \sa Tensor + */ + +template +struct type2index { + static const DenseIndex value = n; + EIGEN_DEVICE_FUNC constexpr operator DenseIndex() const { return n; } + EIGEN_DEVICE_FUNC void set(DenseIndex val) { + eigen_assert(val == n); + } +}; + +// This can be used with IndexPairList to get compile-time constant pairs, +// such as IndexPairList, type2indexpair<3,4>>(). +template +struct type2indexpair { + static const DenseIndex first = f; + static const DenseIndex second = s; + + constexpr EIGEN_DEVICE_FUNC operator IndexPair() const { + return IndexPair(f, s); + } + + EIGEN_DEVICE_FUNC void set(const IndexPair& val) { + eigen_assert(val.first == f); + eigen_assert(val.second == s); + } +}; + + +template struct NumTraits > +{ + typedef DenseIndex Real; + enum { + IsComplex = 0, + RequireInitialization = false, + ReadCost = 1, + AddCost = 1, + MulCost = 1 + }; + + EIGEN_DEVICE_FUNC static inline Real epsilon() { return 0; } + EIGEN_DEVICE_FUNC static inline Real dummy_precision() { return 0; } + EIGEN_DEVICE_FUNC static inline Real highest() { return n; } + EIGEN_DEVICE_FUNC static inline Real lowest() { return n; } +}; + +namespace internal { +template +EIGEN_DEVICE_FUNC void update_value(T& val, DenseIndex new_val) { + val = new_val; +} +template +EIGEN_DEVICE_FUNC void update_value(type2index& val, DenseIndex new_val) { + val.set(new_val); +} + +template +EIGEN_DEVICE_FUNC void update_value(T& val, IndexPair new_val) { + val = new_val; +} +template +EIGEN_DEVICE_FUNC void update_value(type2indexpair& val, IndexPair new_val) { + val.set(new_val); +} + + +template +struct is_compile_time_constant { + static constexpr bool value = false; +}; + +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; + +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; +template +struct is_compile_time_constant& > { + static constexpr bool value = true; +}; + + +template +struct IndexTuple; + +template +struct IndexTuple { + EIGEN_DEVICE_FUNC constexpr IndexTuple() : head(), others() { } + EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v, const O... o) : head(v), others(o...) { } + + constexpr static int count = 1 + sizeof...(O); + T head; + IndexTuple others; + typedef T Head; + typedef IndexTuple Other; +}; + +template + struct IndexTuple { + EIGEN_DEVICE_FUNC constexpr IndexTuple() : head() { } + EIGEN_DEVICE_FUNC constexpr IndexTuple(const T& v) : head(v) { } + + constexpr static int count = 1; + T head; + typedef T Head; +}; + + +template +struct IndexTupleExtractor; + +template +struct IndexTupleExtractor { + + typedef typename IndexTupleExtractor::ValType ValType; + + EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + + EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple& val) { + return IndexTupleExtractor::get_val(val.others); + } + template + EIGEN_DEVICE_FUNC static void set_val(IndexTuple& val, V& new_val) { + IndexTupleExtractor::set_val(val.others, new_val); + } + +}; + +template + struct IndexTupleExtractor<0, T, O...> { + + typedef T ValType; + + EIGEN_DEVICE_FUNC static constexpr ValType& get_val(IndexTuple& val) { + return val.head; + } + EIGEN_DEVICE_FUNC static constexpr const ValType& get_val(const IndexTuple& val) { + return val.head; + } + template + EIGEN_DEVICE_FUNC static void set_val(IndexTuple& val, V& new_val) { + val.head = new_val; + } +}; + + + +template +EIGEN_DEVICE_FUNC constexpr typename IndexTupleExtractor::ValType& array_get(IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template +EIGEN_DEVICE_FUNC constexpr const typename IndexTupleExtractor::ValType& array_get(const IndexTuple& tuple) { + return IndexTupleExtractor::get_val(tuple); +} +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; +template + struct array_size > { + static const size_t value = IndexTuple::count; +}; + + + + +template +struct tuple_coeff { + template + EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex i, const IndexTuple& t) { + // return array_get(t) * (i == Idx) + tuple_coeff::get(i, t) * (i != Idx); + return (i == Idx ? array_get(t) : tuple_coeff::get(i, t)); + } + template + EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple& t, const ValueT& value) { + if (i == Idx) { + update_value(array_get(t), value); + } else { + tuple_coeff::set(i, t, value); + } + } + + template + EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple& t) { + return ((i == Idx) & is_compile_time_constant::ValType>::value) || + tuple_coeff::value_known_statically(i, t); + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && + tuple_coeff::values_up_to_known_statically(t); + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple& t) { + return is_compile_time_constant::ValType>::value && + is_compile_time_constant::ValType>::value && + array_get(t) > array_get(t) && + tuple_coeff::values_up_to_statically_known_to_increase(t); + } +}; + +template +struct tuple_coeff<0, ValueT> { + template + EIGEN_DEVICE_FUNC static constexpr ValueT get(const DenseIndex /*i*/, const IndexTuple& t) { + // eigen_assert (i == 0); // gcc fails to compile assertions in constexpr + return array_get<0>(t)/* * (i == 0)*/; + } + template + EIGEN_DEVICE_FUNC static void set(const DenseIndex i, IndexTuple& t, const ValueT value) { + eigen_assert (i == 0); + update_value(array_get<0>(t), value); + } + template + EIGEN_DEVICE_FUNC static constexpr bool value_known_statically(const DenseIndex i, const IndexTuple&) { + return is_compile_time_constant::ValType>::value & (i == 0); + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_known_statically(const IndexTuple&) { + return is_compile_time_constant::ValType>::value; + } + + template + EIGEN_DEVICE_FUNC static constexpr bool values_up_to_statically_known_to_increase(const IndexTuple&) { + return true; + } +}; +} // namespace internal + + + +template +struct IndexList : internal::IndexTuple { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex operator[] (const DenseIndex i) const { + return internal::tuple_coeff >::value-1, DenseIndex>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr DenseIndex get(const DenseIndex i) const { + return internal::tuple_coeff >::value-1, DenseIndex>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const DenseIndex value) { + return internal::tuple_coeff >::value-1, DenseIndex>::set(i, *this, value); + } + + EIGEN_DEVICE_FUNC constexpr IndexList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } + EIGEN_DEVICE_FUNC constexpr IndexList(FirstType& first, OtherTypes... other) : internal::IndexTuple(first, other...) { } + EIGEN_DEVICE_FUNC constexpr IndexList() : internal::IndexTuple() { } + + EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { + return internal::tuple_coeff >::value-1, DenseIndex>::value_known_statically(i, *this); + } + EIGEN_DEVICE_FUNC constexpr bool all_values_known_statically() const { + return internal::tuple_coeff >::value-1, DenseIndex>::values_up_to_known_statically(*this); + } + + EIGEN_DEVICE_FUNC constexpr bool values_statically_known_to_increase() const { + return internal::tuple_coeff >::value-1, DenseIndex>::values_up_to_statically_known_to_increase(*this); + } +}; + + +template +constexpr IndexList make_index_list(FirstType val1, OtherTypes... other_vals) { + return IndexList(val1, other_vals...); +} + + +template +struct IndexPairList : internal::IndexTuple { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr IndexPair operator[] (const DenseIndex i) const { + return internal::tuple_coeff >::value-1, IndexPair>::get(i, *this); + } + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC void set(const DenseIndex i, const IndexPair value) { + return internal::tuple_coeff>::value-1, IndexPair >::set(i, *this, value); + } + + EIGEN_DEVICE_FUNC constexpr IndexPairList(const internal::IndexTuple& other) : internal::IndexTuple(other) { } + EIGEN_DEVICE_FUNC constexpr IndexPairList() : internal::IndexTuple() { } + + EIGEN_DEVICE_FUNC constexpr bool value_known_statically(const DenseIndex i) const { + return internal::tuple_coeff >::value-1, DenseIndex>::value_known_statically(i, *this); + } +}; + +namespace internal { + +template size_t array_prod(const IndexList& sizes) { + size_t result = 1; + for (int i = 0; i < array_size >::value; ++i) { + result *= sizes[i]; + } + return result; +} + +template struct array_size > { + static const size_t value = array_size >::value; +}; +template struct array_size > { + static const size_t value = array_size >::value; +}; + +template struct array_size > { + static const size_t value = std::tuple_size >::value; +}; +template struct array_size > { + static const size_t value = std::tuple_size >::value; +}; + +template EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(IndexList& a) { + return IndexTupleExtractor::get_val(a); +} +template EIGEN_DEVICE_FUNC constexpr DenseIndex array_get(const IndexList& a) { + return IndexTupleExtractor::get_val(a); +} + +template +struct index_known_statically_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex) { + return false; + } +}; + +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) { + return IndexList().value_known_statically(i); + } +}; + +template +struct index_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i) { + return IndexList().value_known_statically(i); + } +}; + + +template +struct all_indices_known_statically_impl { + static constexpr bool run() { + return false; + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return IndexList().all_values_known_statically(); + } +}; + +template +struct all_indices_known_statically_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return IndexList().all_values_known_statically(); + } +}; + + +template +struct indices_statically_known_to_increase_impl { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return false; + } +}; + +template + struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); + } +}; + +template + struct indices_statically_known_to_increase_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run() { + return Eigen::IndexList().values_statically_known_to_increase(); + } +}; + + +template +struct index_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) == value); + } +}; + +template +struct index_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) == value); + } +}; + + +template +struct index_statically_ne_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) != value); + } +}; + +template +struct index_statically_ne_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) != value); + } +}; + + +template +struct index_statically_gt_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) > value); + } +}; + +template +struct index_statically_gt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) > value); + } +}; + + + +template +struct index_statically_lt_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) < value); + } +}; + +template +struct index_statically_lt_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexList().value_known_statically(i) & + (IndexList().get(i) < value); + } +}; + + + +template +struct index_pair_first_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_first_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).first == value); + } +}; + +template +struct index_pair_first_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).first == value); + } +}; + + + +template +struct index_pair_second_statically_eq_impl { + EIGEN_DEVICE_FUNC static constexpr bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_second_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).second == value); + } +}; + +template +struct index_pair_second_statically_eq_impl > { + EIGEN_DEVICE_FUNC static constexpr bool run(const DenseIndex i, const DenseIndex value) { + return IndexPairList().value_known_statically(i) & + (IndexPairList().operator[](i).second == value); + } +}; + + +} // end namespace internal +} // end namespace Eigen + +#else + +namespace Eigen { +namespace internal { + +template +struct index_known_statically_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(const DenseIndex) { + return false; + } +}; + +template +struct all_indices_known_statically_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return false; + } +}; + +template +struct indices_statically_known_to_increase_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run() { + return false; + } +}; + +template +struct index_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_ne_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_gt_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_statically_lt_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_first_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + +template +struct index_pair_second_statically_eq_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool run(DenseIndex, DenseIndex) { + return false; + } +}; + + + +} // end namespace internal +} // end namespace Eigen + +#endif + + +namespace Eigen { +namespace internal { +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_known_statically(DenseIndex i) { + return index_known_statically_impl::run(i); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool all_indices_known_statically() { + return all_indices_known_statically_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool indices_statically_known_to_increase() { + return indices_statically_known_to_increase_impl::run(); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_eq(DenseIndex i, DenseIndex value) { + return index_statically_eq_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_ne(DenseIndex i, DenseIndex value) { + return index_statically_ne_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_gt(DenseIndex i, DenseIndex value) { + return index_statically_gt_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_statically_lt(DenseIndex i, DenseIndex value) { + return index_statically_lt_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_first_statically_eq(DenseIndex i, DenseIndex value) { + return index_pair_first_statically_eq_impl::run(i, value); +} + +template +static EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bool index_pair_second_statically_eq(DenseIndex i, DenseIndex value) { + return index_pair_second_statically_eq_impl::run(i, value); +} + +} // end namespace internal +} // end namespace Eigen + + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INDEX_LIST_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h new file mode 100644 index 0000000..f391fb9 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInflation.h @@ -0,0 +1,229 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Ke Yang +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H +#define EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H + +namespace Eigen { + +/** \class TensorInflation + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor inflation class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorInflationOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorInflationOp type; +}; + +} // end namespace internal + +template +class TensorInflationOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorInflationOp(const XprType& expr, const Strides& strides) + : m_xpr(expr), m_strides(strides) {} + + EIGEN_DEVICE_FUNC + const Strides& strides() const { return m_strides; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const Strides m_strides; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorInflationOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/ false, + PacketAccess = TensorEvaluator::PacketAccess, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_strides(op.strides()) + { + m_dimensions = m_impl.dimensions(); + // Expand each dimension to the inflated dimension. + for (int i = 0; i < NumDims; ++i) { + m_dimensions[i] = (m_dimensions[i] - 1) * op.strides()[i] + 1; + } + + // Remember the strides for fast division. + for (int i = 0; i < NumDims; ++i) { + m_fastStrides[i] = internal::TensorIntDivisor(m_strides[i]); + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + m_inputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + } + } else { // RowMajor + m_outputStrides[NumDims-1] = 1; + m_inputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + // Computes the input index given the output index. Returns true if the output + // index doesn't fall into a hole. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool getInputIndex(Index index, Index* inputIndex) const + { + eigen_assert(index < dimensions().TotalSize()); + *inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (idx != idx / m_fastStrides[i] * m_strides[i]) { + return false; + } + *inputIndex += idx / m_strides[i] * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (index != index / m_fastStrides[0] * m_strides[0]) { + return false; + } + *inputIndex += index / m_strides[0]; + return true; + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i]; + if (idx != idx / m_fastStrides[i] * m_strides[i]) { + return false; + } + *inputIndex += idx / m_strides[i] * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (index != index / m_fastStrides[NumDims-1] * m_strides[NumDims-1]) { + return false; + } + *inputIndex += index / m_strides[NumDims - 1]; + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + Index inputIndex = 0; + if (getInputIndex(index, &inputIndex)) { + return m_impl.coeff(inputIndex); + } else { + return Scalar(0); + } + } + + // TODO(yangke): optimize this function so that we can detect and produce + // all-zero packets + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (3 * TensorOpCost::DivCost() + + 3 * TensorOpCost::MulCost() + + 2 * TensorOpCost::AddCost()); + const double input_size = m_impl.dimensions().TotalSize(); + const double output_size = m_dimensions.TotalSize(); + if (output_size == 0) + return TensorOpCost(); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(sizeof(CoeffReturnType) * input_size / output_size, 0, + compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + const Strides m_strides; + array, NumDims> m_fastStrides; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INFLATION_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h new file mode 100644 index 0000000..33edc49 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorInitializer.h @@ -0,0 +1,82 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H +#define EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H + +#if EIGEN_HAS_VARIADIC_TEMPLATES + +#include + +namespace Eigen { + +/** \class TensorInitializer + * \ingroup CXX11_Tensor_Module + * + * \brief Helper template to initialize Tensors from std::initializer_lists. + */ +namespace internal { + +template +struct Initializer { + typedef std::initializer_list< + typename Initializer::InitList> InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>* indices, + const InitList& vals) { + int i = 0; + for (auto v : vals) { + (*indices)[traits::NumDimensions - N] = i++; + Initializer::run(tensor, indices, v); + } + } +}; + +template +struct Initializer { + typedef std::initializer_list::Scalar> InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>* indices, + const InitList& vals) { + int i = 0; + // There is likely a faster way to do that than iterating. + for (auto v : vals) { + (*indices)[traits::NumDimensions - 1] = i++; + tensor.coeffRef(*indices) = v; + } + } +}; + +template +struct Initializer { + typedef typename traits::Scalar InitList; + + static void run(TensorEvaluator& tensor, + Eigen::array::Index, traits::NumDimensions>*, + const InitList& v) { + tensor.coeffRef(0) = v; + } +}; + + +template +void initialize_tensor(TensorEvaluator& tensor, + const typename Initializer::NumDimensions>::InitList& vals) { + Eigen::array::Index, traits::NumDimensions> indices; + Initializer::NumDimensions>::run(tensor, &indices, vals); +} + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_HAS_VARIADIC_TEMPLATES + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INITIALIZER_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h new file mode 100644 index 0000000..ede3939 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -0,0 +1,253 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H +#define EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H + + +namespace Eigen { + +/** \internal + * + * \class TensorIntDiv + * \ingroup CXX11_Tensor_Module + * + * \brief Fast integer division by a constant. + * + * See the paper from Granlund and Montgomery for explanation. + * (at http://dx.doi.org/10.1145/773473.178249) + * + * \sa Tensor + */ + +namespace internal { + +namespace { + + // Note: result is undefined if val == 0 + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if::type count_leading_zeros(const T val) + { +#ifdef __CUDA_ARCH__ + return __clz(val); +#elif EIGEN_COMP_MSVC + unsigned long index; + _BitScanReverse(&index, val); + return 31 - index; +#else + EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); + return __builtin_clz(static_cast(val)); +#endif + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if::type count_leading_zeros(const T val) + { +#ifdef __CUDA_ARCH__ + return __clzll(val); +#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64 + unsigned long index; + _BitScanReverse64(&index, val); + return 63 - index; +#elif EIGEN_COMP_MSVC + // MSVC's _BitScanReverse64 is not available for 32bits builds. + unsigned int lo = (unsigned int)(val&0xffffffff); + unsigned int hi = (unsigned int)((val>>32)&0xffffffff); + int n; + if(hi==0) + n = 32 + count_leading_zeros(lo); + else + n = count_leading_zeros(hi); + return n; +#else + EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); + return __builtin_clzll(static_cast(val)); +#endif + } + + template + struct UnsignedTraits { + typedef typename conditional::type type; + }; + + template + struct DividerTraits { + typedef typename UnsignedTraits::type type; + static const int N = sizeof(T) * 8; + }; + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t muluh(const uint32_t a, const T b) { +#if defined(__CUDA_ARCH__) + return __umulhi(a, b); +#else + return (static_cast(a) * b) >> 32; +#endif + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t muluh(const uint64_t a, const T b) { +#if defined(__CUDA_ARCH__) + return __umul64hi(a, b); +#elif defined(__SIZEOF_INT128__) + __uint128_t v = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b); + return static_cast(v >> 64); +#else + return (TensorUInt128, uint64_t>(a) * TensorUInt128, uint64_t>(b)).upper(); +#endif + } + + template + struct DividerHelper { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint32_t computeMultiplier(const int log_div, const T divider) { + EIGEN_STATIC_ASSERT(N == 32, YOU_MADE_A_PROGRAMMING_MISTAKE); + return static_cast((static_cast(1) << (N+log_div)) / divider - (static_cast(1) << N) + 1); + } + }; + + template + struct DividerHelper<64, T> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE uint64_t computeMultiplier(const int log_div, const T divider) { +#if defined(__SIZEOF_INT128__) && !defined(__CUDA_ARCH__) + return static_cast((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); +#else + const uint64_t shift = 1ULL << log_div; + TensorUInt128 result = TensorUInt128 >(shift, 0) / TensorUInt128, uint64_t>(divider) + - TensorUInt128, static_val<0> >(1, 0) + + TensorUInt128, static_val<1> >(1); + return static_cast(result); +#endif + } + }; +} + + +template +struct TensorIntDivisor { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { + multiplier = 0; + shift1 = 0; + shift2 = 0; + } + + // Must have 0 < divider < 2^31. This is relaxed to + // 0 < divider < 2^63 when using 64-bit indices on platforms that support + // the __uint128_t type. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor(const T divider) { + const int N = DividerTraits::N; + eigen_assert(static_cast::type>(divider) < NumTraits::highest()/2); + eigen_assert(divider > 0); + + // fast ln2 + const int leading_zeros = count_leading_zeros(static_cast(divider)); + int log_div = N - leading_zeros; + // if divider is a power of two then log_div is 1 more than it should be. + if ((static_cast::type>(1) << (log_div-1)) == static_cast::type>(divider)) + log_div--; + + multiplier = DividerHelper::computeMultiplier(log_div, divider); + shift1 = log_div > 1 ? 1 : log_div; + shift2 = log_div > 1 ? log_div-1 : 0; + } + + // Must have 0 <= numerator. On platforms that dont support the __uint128_t + // type numerator should also be less than 2^32-1. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T divide(const T numerator) const { + eigen_assert(static_cast::type>(numerator) < NumTraits::highest()/2); + //eigen_assert(numerator >= 0); // this is implicitly asserted by the line above + + UnsignedType t1 = muluh(multiplier, numerator); + UnsignedType t = (static_cast(numerator) - t1) >> shift1; + return (t1 + t) >> shift2; + } + + private: + typedef typename DividerTraits::type UnsignedType; + UnsignedType multiplier; + int32_t shift1; + int32_t shift2; +}; + + +// Optimized version for signed 32 bit integers. +// Derived from Hacker's Delight. +// Only works for divisors strictly greater than one +template <> +class TensorIntDivisor { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorIntDivisor() { + magic = 0; + shift = 0; + } + // Must have 2 <= divider + EIGEN_DEVICE_FUNC TensorIntDivisor(int32_t divider) { + eigen_assert(divider >= 2); + calcMagic(divider); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int divide(const int32_t n) const { +#ifdef __CUDA_ARCH__ + return (__umulhi(magic, n) >> shift); +#else + uint64_t v = static_cast(magic) * static_cast(n); + return (static_cast(v >> 32) >> shift); +#endif + } + +private: + // Compute the magic numbers. See Hacker's Delight section 10 for an in + // depth explanation. + EIGEN_DEVICE_FUNC void calcMagic(int32_t d) { + const unsigned two31 = 0x80000000; // 2**31. + unsigned ad = d; + unsigned t = two31 + (ad >> 31); + unsigned anc = t - 1 - t%ad; // Absolute value of nc. + int p = 31; // Init. p. + unsigned q1 = two31/anc; // Init. q1 = 2**p/|nc|. + unsigned r1 = two31 - q1*anc; // Init. r1 = rem(2**p, |nc|). + unsigned q2 = two31/ad; // Init. q2 = 2**p/|d|. + unsigned r2 = two31 - q2*ad; // Init. r2 = rem(2**p, |d|). + unsigned delta = 0; + do { + p = p + 1; + q1 = 2*q1; // Update q1 = 2**p/|nc|. + r1 = 2*r1; // Update r1 = rem(2**p, |nc|). + if (r1 >= anc) { // (Must be an unsigned + q1 = q1 + 1; // comparison here). + r1 = r1 - anc;} + q2 = 2*q2; // Update q2 = 2**p/|d|. + r2 = 2*r2; // Update r2 = rem(2**p, |d|). + if (r2 >= ad) { // (Must be an unsigned + q2 = q2 + 1; // comparison here). + r2 = r2 - ad;} + delta = ad - r2; + } while (q1 < delta || (q1 == delta && r1 == 0)); + + magic = (unsigned)(q2 + 1); + shift = p - 32; + } + + uint32_t magic; + int32_t shift; +}; + + +template +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator / (const T& numerator, const TensorIntDivisor& divisor) { + return divisor.divide(numerator); +} + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_INTDIV_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h new file mode 100644 index 0000000..cd0109e --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorLayoutSwap.h @@ -0,0 +1,209 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H +#define EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H + +namespace Eigen { + +/** \class TensorLayoutSwap + * \ingroup CXX11_Tensor_Module + * + * \brief Swap the layout from col-major to row-major, or row-major + * to col-major, and invert the order of the dimensions. + * + * Beware: the dimensions are reversed by this operation. If you want to + * preserve the ordering of the dimensions, you need to combine this + * operation with a shuffle. + * + * \example: + * Tensor input(2, 4); + * Tensor output = input.swap_layout(); + * eigen_assert(output.dimension(0) == 4); + * eigen_assert(output.dimension(1) == 2); + * + * array shuffle(1, 0); + * output = input.swap_layout().shuffle(shuffle); + * eigen_assert(output.dimension(0) == 2); + * eigen_assert(output.dimension(1) == 4); + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = traits::NumDimensions; + static const int Layout = (traits::Layout == ColMajor) ? RowMajor : ColMajor; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorLayoutSwapOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorLayoutSwapOp type; +}; + +} // end namespace internal + + + +template +class TensorLayoutSwapOp : public TensorBase, WriteAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorLayoutSwapOp(const XprType& expr) + : m_xpr(expr) {} + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const TensorLayoutSwapOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorLayoutSwapOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + typename XprType::Nested m_xpr; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorLayoutSwapOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value; + typedef DSizes Dimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = (static_cast(TensorEvaluator::Layout) == static_cast(ColMajor)) ? RowMajor : ColMajor, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + for(int i = 0; i < NumDims; ++i) { + m_dimensions[i] = m_impl.dimensions()[NumDims-1-i]; + } + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + return m_impl.evalSubExprsIfNeeded(data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_impl.template packet(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return m_impl.data(); } + + const TensorEvaluator& impl() const { return m_impl; } + + protected: + TensorEvaluator m_impl; + Dimensions m_dimensions; +}; + + +// Eval as lvalue +template + struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorLayoutSwapOp XprType; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = (static_cast(TensorEvaluator::Layout) == static_cast(ColMajor)) ? RowMajor : ColMajor, + CoordAccess = false // to be implemented + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(index); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + this->m_impl.template writePacket(index, x); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_LAYOUT_SWAP_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h new file mode 100644 index 0000000..ee0078b --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMacros.h @@ -0,0 +1,54 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H +#define EIGEN_CXX11_TENSOR_TENSOR_META_MACROS_H + + +/** use this macro in sfinae selection in templated functions + * + * template::value , int >::type = 0 + * > + * void foo(){} + * + * becomes => + * + * template::value ) + * > + * void foo(){} + */ + +// SFINAE requires variadic templates +#ifndef __CUDACC__ +#if EIGEN_HAS_VARIADIC_TEMPLATES + // SFINAE doesn't work for gcc <= 4.7 + #ifdef EIGEN_COMP_GNUC + #if EIGEN_GNUC_AT_LEAST(4,8) + #define EIGEN_HAS_SFINAE + #endif + #else + #define EIGEN_HAS_SFINAE + #endif +#endif +#endif + +#define EIGEN_SFINAE_ENABLE_IF( __condition__ ) \ + typename internal::enable_if< ( __condition__ ) , int >::type = 0 + + +#if EIGEN_HAS_CONSTEXPR +#define EIGEN_CONSTEXPR constexpr +#else +#define EIGEN_CONSTEXPR +#endif + + +#endif diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h new file mode 100644 index 0000000..a8e5575 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -0,0 +1,321 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_MAP_H +#define EIGEN_CXX11_TENSOR_TENSOR_MAP_H + +namespace Eigen { + +/** \class TensorMap + * \ingroup CXX11_Tensor_Module + * + * \brief A tensor expression mapping an existing array of data. + * + */ +/// template class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_ is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. +template class MakePointer_> class TensorMap : public TensorBase > +{ + public: + typedef TensorMap Self; + typedef typename PlainObjectType::Base Base; + typedef typename Eigen::internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + typedef typename Base::CoeffReturnType CoeffReturnType; + + /* typedef typename internal::conditional< + bool(internal::is_lvalue::value), + Scalar *, + const Scalar *>::type + PointerType;*/ + typedef typename MakePointer_::Type PointerType; + typedef PointerType PointerArgType; + + static const int Options = Options_; + + static const Index NumIndices = PlainObjectType::NumIndices; + typedef typename PlainObjectType::Dimensions Dimensions; + + enum { + IsAligned = ((int(Options_)&Aligned)==Aligned), + Layout = PlainObjectType::Layout, + CoordAccess = true, + RawAccess = true + }; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr) : m_data(dataPtr), m_dimensions() { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((0 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension, IndexTypes... otherDimensions) : m_data(dataPtr), m_dimensions(firstDimension, otherDimensions...) { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((sizeof...(otherDimensions) + 1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index firstDimension) : m_data(dataPtr), m_dimensions(firstDimension) { + // The number of dimensions used to construct a tensor must be equal to the rank of the tensor. + EIGEN_STATIC_ASSERT((1 == NumIndices || NumIndices == Dynamic), YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2) : m_data(dataPtr), m_dimensions(dim1, dim2) { + EIGEN_STATIC_ASSERT(2 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3) { + EIGEN_STATIC_ASSERT(3 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4) { + EIGEN_STATIC_ASSERT(4 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, Index dim1, Index dim2, Index dim3, Index dim4, Index dim5) : m_data(dataPtr), m_dimensions(dim1, dim2, dim3, dim4, dim5) { + EIGEN_STATIC_ASSERT(5 == NumIndices || NumIndices == Dynamic, YOU_MADE_A_PROGRAMMING_MISTAKE) + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const array& dimensions) + : m_data(dataPtr), m_dimensions(dimensions) + { } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PointerArgType dataPtr, const Dimensions& dimensions) + : m_data(dataPtr), m_dimensions(dimensions) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorMap(PlainObjectType& tensor) + : m_data(tensor.data()), m_dimensions(tensor.dimensions()) + { } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index rank() const { return m_dimensions.rank(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index dimension(Index n) const { return m_dimensions[n]; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE PointerType data() { return m_data; } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const PointerType data() const { return m_data; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()() const + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index index) const + { + eigen_internal_assert(index >= 0 && index < size()); + return m_data[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) const + { + EIGEN_STATIC_ASSERT(sizeof...(otherIndices) + 2 == NumIndices, YOU_MADE_A_PROGRAMMING_MISTAKE) + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[1]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) const + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(const array& indices) + { + // eigen_assert(checkIndexRange(indices)); + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(indices); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(indices); + return m_data[index]; + } + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()() + { + EIGEN_STATIC_ASSERT(NumIndices == 0, YOU_MADE_A_PROGRAMMING_MISTAKE) + return m_data[0]; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index index) + { + eigen_internal_assert(index >= 0 && index < size()); + return m_data[index]; + } + +#if EIGEN_HAS_VARIADIC_TEMPLATES + template EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index firstIndex, Index secondIndex, IndexTypes... otherIndices) + { + static_assert(sizeof...(otherIndices) + 2 == NumIndices || NumIndices == Dynamic, "Number of indices used to access a tensor coefficient must be equal to the rank of the tensor."); + const std::size_t NumDims = sizeof...(otherIndices) + 2; + if (PlainObjectType::Options&RowMajor) { + const Index index = m_dimensions.IndexOfRowMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } else { + const Index index = m_dimensions.IndexOfColMajor(array{{firstIndex, secondIndex, otherIndices...}}); + return m_data[index]; + } + } +#else + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i1 + i0 * m_dimensions[1]; + return m_data[index]; + } else { + const Index index = i0 + i1 * m_dimensions[0]; + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * i2); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0)); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * i3)); + return m_data[index]; + } + } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar& operator()(Index i0, Index i1, Index i2, Index i3, Index i4) + { + if (PlainObjectType::Options&RowMajor) { + const Index index = i4 + m_dimensions[4] * (i3 + m_dimensions[3] * (i2 + m_dimensions[2] * (i1 + m_dimensions[1] * i0))); + return m_data[index]; + } else { + const Index index = i0 + m_dimensions[0] * (i1 + m_dimensions[1] * (i2 + m_dimensions[2] * (i3 + m_dimensions[3] * i4))); + return m_data[index]; + } + } +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Self& operator=(const Self& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Self& operator=(const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + private: + typename MakePointer_::Type m_data; + Dimensions m_dimensions; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_MAP_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h new file mode 100644 index 0000000..615559d --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h @@ -0,0 +1,218 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_META_H +#define EIGEN_CXX11_TENSOR_TENSOR_META_H + +namespace Eigen { + +template struct Cond {}; + +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +const T1& choose(Cond, const T1& first, const T2&) { + return first; +} + +template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +const T2& choose(Cond, const T1&, const T2& second) { + return second; +} + + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T divup(const X x, const Y y) { + return static_cast((x + y - 1) / y); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T divup(const T x, const T y) { + return static_cast((x + y - 1) / y); +} + +template struct max_n_1 { + static const size_t size = n; +}; +template <> struct max_n_1<0> { + static const size_t size = 1; +}; + + +// Default packet types +template +struct PacketType : internal::packet_traits { + typedef typename internal::packet_traits::type type; +}; + +// For CUDA packet types when using a GpuDevice +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16) +template <> +struct PacketType { + typedef half2 type; + static const int size = 2; + enum { + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasArg = 0, + HasAbs2 = 0, + HasMin = 1, + HasMax = 1, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0, + + HasDiv = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasExp = 1, + HasLog = 1, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 1, + }; +}; +#endif + +#if defined(EIGEN_USE_SYCL) +template + struct PacketType { + typedef T type; + static const int size = 1; + enum { + HasAdd = 0, + HasSub = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasArg = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0, + HasBlend = 0 + }; +}; +#endif + + +// Tuple mimics std::pair but works on e.g. nvcc. +template struct Tuple { + public: + U first; + V second; + + typedef U first_type; + typedef V second_type; + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple() : first(), second() {} + + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple(const U& f, const V& s) : first(f), second(s) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Tuple& operator= (const Tuple& rhs) { + if (&rhs == this) return *this; + first = rhs.first; + second = rhs.second; + return *this; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void swap(Tuple& rhs) { + using numext::swap; + swap(first, rhs.first); + swap(second, rhs.second); + } +}; + +template +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator==(const Tuple& x, const Tuple& y) { + return (x.first == y.first && x.second == y.second); +} + +template +EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +bool operator!=(const Tuple& x, const Tuple& y) { + return !(x == y); +} + + +// Can't use std::pairs on cuda devices +template struct IndexPair { + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair() : first(0), second(0) {} + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE IndexPair(Idx f, Idx s) : first(f), second(s) {} + + EIGEN_DEVICE_FUNC void set(IndexPair val) { + first = val.first; + second = val.second; + } + + Idx first; + Idx second; +}; + + +#ifdef EIGEN_HAS_SFINAE +namespace internal { + + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType& idx, numeric_list) { + return { idx[Is]... }; + } + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType&, numeric_list) { + return array(); + } + + /** Make an array (for index/dimensions) out of a custom index */ + template + EIGEN_CONSTEXPR EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + array customIndices2Array(IndexType& idx) { + return customIndices2Array(idx, typename gen_numeric_list::type{}); + } + + + template + struct is_base_of + { + + typedef char (&yes)[1]; + typedef char (&no)[2]; + + template + struct Host + { + operator BB*() const; + operator DD*(); + }; + + template + static yes check(D*, T); + static no check(B*, int); + + static const bool value = sizeof(check(Host(), int())) == sizeof(yes); + }; + +} +#endif + + + +} // namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_META_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h new file mode 100644 index 0000000..d34f1e3 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -0,0 +1,888 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H +#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H + +namespace Eigen { + +/** \class TensorReshaping + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reshaping class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorReshapingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorReshapingOp type; +}; + +} // end namespace internal + + + +template +class TensorReshapingOp : public TensorBase, WriteAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims) + : m_xpr(expr), m_dims(dims) {} + + EIGEN_DEVICE_FUNC + const NewDimensions& dimensions() const { return m_dims; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + protected: + typename XprType::Nested m_xpr; + const NewDimensions m_dims; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorReshapingOp XprType; + typedef NewDimensions Dimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_dimensions(op.dimensions()) + { + // The total size of the reshaped tensor must be equal to the total size + // of the input tensor. + eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions())); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + return m_impl.evalSubExprsIfNeeded(data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + return m_impl.template packet(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast(m_impl.data()); } + + EIGEN_DEVICE_FUNC const TensorEvaluator& impl() const { return m_impl; } + + protected: + TensorEvaluator m_impl; + NewDimensions m_dimensions; +}; + + +// Eval as lvalue +template + struct TensorEvaluator, Device> + : public TensorEvaluator, Device> + +{ + typedef TensorEvaluator, Device> Base; + typedef TensorReshapingOp XprType; + typedef NewDimensions Dimensions; + + enum { + IsAligned = TensorEvaluator::IsAligned, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = TensorEvaluator::RawAccess + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(index); + } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + this->m_impl.template writePacket(index, x); + } +}; + + +/** \class TensorSlicing + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor slicing class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorSlicingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorSlicingOp type; +}; + +} // end namespace internal + + + +template +class TensorSlicingOp : public TensorBase > +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes) + : m_xpr(expr), m_indices(indices), m_sizes(sizes) {} + + EIGEN_DEVICE_FUNC + const StartIndices& startIndices() const { return m_indices; } + EIGEN_DEVICE_FUNC + const Sizes& sizes() const { return m_sizes; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run(assign, DefaultDevice()); + return *this; + } + + + protected: + typename XprType::Nested m_xpr; + const StartIndices m_indices; + const Sizes m_sizes; +}; + + +// Fixme: figure out the exact threshold +namespace { +template struct MemcpyTriggerForSlicing { + EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { } + EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; } + + private: + Index threshold_; +}; + +// It is very expensive to start the memcpy kernel on GPU: we therefore only +// use it for large copies. +#ifdef EIGEN_USE_GPU +template struct MemcpyTriggerForSlicing { + EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { } + EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; } +}; +#endif +} + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets and sizes. + IsAligned = /*TensorEvaluator::IsAligned*/false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices()) + { + for (std::size_t i = 0; i < internal::array_size::value; ++i) { + eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]); + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + const Sizes& output_dims = op.sizes(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + } + + // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i]); + } + } else { + m_inputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + } + + // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed. + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; + m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i]); + } + } + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Sizes Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) { + m_impl.evalSubExprsIfNeeded(NULL); + if (!NumTraits::type>::RequireInitialization && data && m_impl.data()) { + Index contiguous_values = 1; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + contiguous_values *= dimensions()[i]; + if (dimensions()[i] != m_impl.dimensions()[i]) { + break; + } + } + } else { + for (int i = NumDims-1; i >= 0; --i) { + contiguous_values *= dimensions()[i]; + if (dimensions()[i] != m_impl.dimensions()[i]) { + break; + } + } + } + // Use memcpy if it's going to be faster than using the regular evaluation. + const MemcpyTriggerForSlicing trigger(m_device); + if (trigger(contiguous_values)) { + Scalar* src = (Scalar*)m_impl.data(); + for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) { + Index offset = srcCoeff(i); + m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar)); + } + return false; + } + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(srcCoeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + const int packetSize = internal::unpacket_traits::size; + EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+packetSize-1 < internal::array_prod(dimensions())); + + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / m_fastOutputStrides[i]; + const Index idx1 = indices[1] / m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; + inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + m_offsets[0]); + inputIndices[1] += (indices[1] + m_offsets[0]); + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / m_fastOutputStrides[i]; + const Index idx1 = indices[1] / m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i]; + inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i]; + indices[0] -= idx0 * m_outputStrides[i]; + indices[1] -= idx1 * m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + m_offsets[NumDims-1]); + inputIndices[1] += (indices[1] + m_offsets[NumDims-1]); + } + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + PacketReturnType rslt = m_impl.template packet(inputIndices[0]); + return rslt; + } + else { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[packetSize-1] = m_impl.coeff(inputIndices[1]); + for (int i = 1; i < packetSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); + } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { + Scalar* result = m_impl.data(); + if (result) { + Index offset = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) { + if (m_dimensions[i] != m_impl.dimensions()[i]) { + offset += m_offsets[i] * m_inputStrides[i]; + for (int j = i+1; j < NumDims; ++j) { + if (m_dimensions[j] > 1) { + return NULL; + } + offset += m_offsets[j] * m_inputStrides[j]; + } + break; + } + } + } else { + for (int i = NumDims - 1; i >= 0; --i) { + if (m_dimensions[i] != m_impl.dimensions()[i]) { + offset += m_offsets[i] * m_inputStrides[i]; + for (int j = i-1; j >= 0; --j) { + if (m_dimensions[j] > 1) { + return NULL; + } + offset += m_offsets[j] * m_inputStrides[j]; + } + break; + } + } + } + return result + offset; + } + return NULL; + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + inputIndex += (index + m_offsets[0]); + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += (idx + m_offsets[i]) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + inputIndex += (index + m_offsets[NumDims-1]); + } + return inputIndex; + } + + array m_outputStrides; + array, NumDims> m_fastOutputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + const Device& m_device; + Dimensions m_dimensions; + const StartIndices m_offsets; +}; + + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + IsAligned = /*TensorEvaluator::IsAligned*/false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Sizes Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + void writePacket(Index index, const PacketReturnType& x) + { + const int packetSize = internal::unpacket_traits::size; + Index inputIndices[] = {0, 0}; + Index indices[] = {index, index + packetSize - 1}; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; + const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; + inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; + indices[0] -= idx0 * this->m_outputStrides[i]; + indices[1] -= idx1 * this->m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + this->m_offsets[0]); + inputIndices[1] += (indices[1] + this->m_offsets[0]); + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx0 = indices[0] / this->m_fastOutputStrides[i]; + const Index idx1 = indices[1] / this->m_fastOutputStrides[i]; + inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i]; + inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i]; + indices[0] -= idx0 * this->m_outputStrides[i]; + indices[1] -= idx1 * this->m_outputStrides[i]; + } + inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]); + inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]); + } + if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + this->m_impl.template writePacket(inputIndices[0], x); + } + else { + EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + internal::pstore(values, x); + this->m_impl.coeffRef(inputIndices[0]) = values[0]; + this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1]; + for (int i = 1; i < packetSize-1; ++i) { + this->coeffRef(index+i) = values[i]; + } + } + } +}; + + + +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = array_size::value; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorStridingSlicingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorStridingSlicingOp type; +}; + +} // end namespace internal + + +template +class TensorStridingSlicingOp : public TensorBase > +{ + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename internal::nested::type Nested; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp( + const XprType& expr, const StartIndices& startIndices, + const StopIndices& stopIndices, const Strides& strides) + : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices), + m_strides(strides) {} + + EIGEN_DEVICE_FUNC + const StartIndices& startIndices() const { return m_startIndices; } + EIGEN_DEVICE_FUNC + const StartIndices& stopIndices() const { return m_stopIndices; } + EIGEN_DEVICE_FUNC + const StartIndices& strides() const { return m_strides; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run( + assign, DefaultDevice()); + return *this; + } + + template + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other) + { + typedef TensorAssignOp Assign; + Assign assign(*this, other); + internal::TensorExecutor::run( + assign, DefaultDevice()); + return *this; + } + + protected: + typename XprType::Nested m_xpr; + const StartIndices m_startIndices; + const StopIndices m_stopIndices; + const Strides m_strides; +}; + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorStridingSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + // Alignment can't be guaranteed at compile time since it depends on the + // slice offsets and sizes. + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_device(device), m_strides(op.strides()) + { + // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero + DSizes startIndicesClamped, stopIndicesClamped; + for (size_t i = 0; i < internal::array_size::value; ++i) { + eigen_assert(m_strides[i] != 0 && "0 stride is invalid"); + if(m_strides[i]>0){ + startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]); + stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]); + }else{ + /* implies m_strides[i]<0 by assert */ + startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1); + stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1); + } + m_startIndices[i] = startIndicesClamped[i]; + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + + // check for degenerate intervals and compute output tensor shape + bool degenerate = false;; + for(int i = 0; i < NumDims; i++){ + Index interval = stopIndicesClamped[i] - startIndicesClamped[i]; + if(interval == 0 || ((interval<0) != (m_strides[i]<0))){ + m_dimensions[i] = 0; + degenerate = true; + }else{ + m_dimensions[i] = interval / m_strides[i] + + (interval % m_strides[i] != 0 ? 1 : 0); + eigen_assert(m_dimensions[i] >= 0); + } + } + Strides output_dims = m_dimensions; + + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = m_strides[0]; + m_offsets[0] = startIndicesClamped[0]; + Index previousDimProduct = 1; + for (int i = 1; i < NumDims; ++i) { + previousDimProduct *= input_dims[i-1]; + m_inputStrides[i] = previousDimProduct * m_strides[i]; + m_offsets[i] = startIndicesClamped[i] * previousDimProduct; + } + + // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed. + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1]; + // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash + m_fastOutputStrides[i] = internal::TensorIntDivisor(degenerate ? 1 : m_outputStrides[i]); + } + } else { + m_inputStrides[NumDims-1] = m_strides[NumDims-1]; + m_offsets[NumDims-1] = startIndicesClamped[NumDims-1]; + Index previousDimProduct = 1; + for (int i = NumDims - 2; i >= 0; --i) { + previousDimProduct *= input_dims[i+1]; + m_inputStrides[i] = previousDimProduct * m_strides[i]; + m_offsets[i] = startIndicesClamped[i] * previousDimProduct; + } + + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1]; + // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash + m_fastOutputStrides[i] = internal::TensorIntDivisor(degenerate ? 1 : m_outputStrides[i]); + } + } + m_block_total_size_max = numext::maxi(static_cast(1), + device.lastLevelCacheSize() / + sizeof(Scalar)); + } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type ScalarNonConst; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Strides Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + return m_impl.coeff(srcCoeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const { + return NULL; + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const + { + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i >= 0; --i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i] + m_offsets[i]; + index -= idx * m_outputStrides[i]; + } + } else { + for (int i = 0; i < NumDims; ++i) { + const Index idx = index / m_fastOutputStrides[i]; + inputIndex += idx * m_inputStrides[i] + m_offsets[i]; + index -= idx * m_outputStrides[i]; + } + } + return inputIndex; + } + + static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) { + return numext::maxi(min, numext::mini(max,value)); + } + + array m_outputStrides; + array, NumDims> m_fastOutputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + const Device& m_device; + DSizes m_startIndices; // clamped startIndices + DSizes m_dimensions; + DSizes m_offsets; // offset in a flattened shape + const Strides m_strides; + std::size_t m_block_total_size_max; +}; + +// Eval as lvalue +template +struct TensorEvaluator, Device> + : public TensorEvaluator, Device> +{ + typedef TensorEvaluator, Device> Base; + typedef TensorStridingSlicingOp XprType; + static const int NumDims = internal::array_size::value; + + enum { + IsAligned = false, + PacketAccess = false, + BlockAccess = false, + Layout = TensorEvaluator::Layout, + CoordAccess = TensorEvaluator::CoordAccess, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : Base(op, device) + { } + + typedef typename XprType::Index Index; + typedef typename XprType::Scalar Scalar; + typedef typename internal::remove_const::type ScalarNonConst; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + typedef Strides Dimensions; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index) + { + return this->m_impl.coeffRef(this->srcCoeff(index)); + } +}; + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h new file mode 100644 index 0000000..647bcf1 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -0,0 +1,397 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H +#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H + +namespace Eigen { + +/** \class TensorPadding + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor padding class. + * At the moment only padding with a constant value is supported. + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorPaddingOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorPaddingOp type; +}; + +} // end namespace internal + + + +template +class TensorPaddingOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value) + : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {} + + EIGEN_DEVICE_FUNC + const PaddingDimensions& padding() const { return m_padding_dims; } + EIGEN_DEVICE_FUNC + Scalar padding_value() const { return m_padding_value; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const PaddingDimensions m_padding_dims; + const Scalar m_padding_value; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorPaddingOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::value; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = true, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = true, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()) + { + // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead + // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector + // of 1 element first and then pad. + EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + + // Compute dimensions + m_dimensions = m_impl.dimensions(); + for (int i = 0; i < NumDims; ++i) { + m_dimensions[i] += m_padding[i].first + m_padding[i].second; + } + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputStrides[0] = 1; + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1]; + } else { + m_inputStrides[NumDims - 1] = 1; + m_outputStrides[NumDims] = 1; + for (int i = NumDims - 2; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1]; + } + m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0]; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar*) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + eigen_assert(index < dimensions().TotalSize()); + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 1; i > 0; --i) { + const Index idx = index / m_outputStrides[i]; + if (isPaddingAtIndexForDim(idx, i)) { + return m_paddingValue; + } + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (isPaddingAtIndexForDim(index, 0)) { + return m_paddingValue; + } + inputIndex += (index - m_padding[0].first); + } else { + for (int i = 0; i < NumDims - 1; ++i) { + const Index idx = index / m_outputStrides[i+1]; + if (isPaddingAtIndexForDim(idx, i)) { + return m_paddingValue; + } + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i+1]; + } + if (isPaddingAtIndexForDim(index, NumDims-1)) { + return m_paddingValue; + } + inputIndex += (index - m_padding[NumDims-1].first); + } + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + if (static_cast(Layout) == static_cast(ColMajor)) { + return packetColMajor(index); + } + return packetRowMajor(index); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + TensorOpCost cost = m_impl.costPerCoeff(vectorized); + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims; ++i) + updateCostPerDimension(cost, i, i == 0); + } else { + for (int i = NumDims - 1; i >= 0; --i) + updateCostPerDimension(cost, i, i == NumDims - 1); + } + return cost; + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + private: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim( + Index index, int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return (!internal::index_pair_first_statically_eq(dim_index, 0) && + index < m_padding[dim_index].first) || + (!internal::index_pair_second_statically_eq(dim_index, 0) && + index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#else + return (index < m_padding[dim_index].first) || + (index >= m_dimensions[dim_index] - m_padding[dim_index].second); +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_first_statically_eq(dim_index, 0); +#else + EIGEN_UNUSED_VARIABLE(dim_index); + return false; +#endif + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero( + int dim_index) const { +#if defined(EIGEN_HAS_INDEX_LIST) + return internal::index_pair_second_statically_eq(dim_index, 0); +#else + EIGEN_UNUSED_VARIABLE(dim_index); + return false; +#endif + } + + + void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const { + const double in = static_cast(m_impl.dimensions()[i]); + const double out = in + m_padding[i].first + m_padding[i].second; + if (out == 0) + return; + const double reduction = in / out; + cost *= reduction; + if (first) { + cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost() + + reduction * (1 * TensorOpCost::AddCost())); + } else { + cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost() + + 2 * TensorOpCost::MulCost() + + reduction * (2 * TensorOpCost::MulCost() + + 1 * TensorOpCost::DivCost())); + } + } + + protected: + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index initialIndex = index; + Index inputIndex = 0; + for (int i = NumDims - 1; i > 0; --i) { + const Index first = index; + const Index last = index + PacketSize - 1; + const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i]; + const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i]; + const Index lastPaddedRight = m_outputStrides[i+1]; + + if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + const Index idx = index / m_outputStrides[i]; + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i]; + } + else { + // Every other case + return packetWithPossibleZero(initialIndex); + } + } + + const Index last = index + PacketSize - 1; + const Index first = index; + const Index lastPaddedLeft = m_padding[0].first; + const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second); + const Index lastPaddedRight = m_outputStrides[1]; + + if (!isLeftPaddingCompileTimeZero(0) && last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(0) && first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + inputIndex += (index - m_padding[0].first); + return m_impl.template packet(inputIndex); + } + // Every other case + return packetWithPossibleZero(initialIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + const Index initialIndex = index; + Index inputIndex = 0; + + for (int i = 0; i < NumDims - 1; ++i) { + const Index first = index; + const Index last = index + PacketSize - 1; + const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1]; + const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1]; + const Index lastPaddedRight = m_outputStrides[i]; + + if (!isLeftPaddingCompileTimeZero(i) && last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(i) && first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + const Index idx = index / m_outputStrides[i+1]; + inputIndex += (idx - m_padding[i].first) * m_inputStrides[i]; + index -= idx * m_outputStrides[i+1]; + } + else { + // Every other case + return packetWithPossibleZero(initialIndex); + } + } + + const Index last = index + PacketSize - 1; + const Index first = index; + const Index lastPaddedLeft = m_padding[NumDims-1].first; + const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second); + const Index lastPaddedRight = m_outputStrides[NumDims-1]; + + if (!isLeftPaddingCompileTimeZero(NumDims-1) && last < lastPaddedLeft) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if (!isRightPaddingCompileTimeZero(NumDims-1) && first >= firstPaddedRight && last < lastPaddedRight) { + // all the coefficient are in the padding zone. + return internal::pset1(m_paddingValue); + } + else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (first >= lastPaddedLeft && last < firstPaddedRight)) { + // all the coefficient are between the 2 padding zones. + inputIndex += (index - m_padding[NumDims-1].first); + return m_impl.template packet(inputIndex); + } + // Every other case + return packetWithPossibleZero(initialIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const + { + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + TensorEvaluator m_impl; + PaddingDimensions m_padding; + + Scalar m_paddingValue; +}; + + + + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h new file mode 100644 index 0000000..886a254 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -0,0 +1,269 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H +#define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H + +namespace Eigen { + +/** \class TensorPatch + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor patch class. + * + * + */ +namespace internal { +template +struct traits > : public traits +{ + typedef typename XprType::Scalar Scalar; + typedef traits XprTraits; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions + 1; + static const int Layout = XprTraits::Layout; +}; + +template +struct eval, Eigen::Dense> +{ + typedef const TensorPatchOp& type; +}; + +template +struct nested, 1, typename eval >::type> +{ + typedef TensorPatchOp type; +}; + +} // end namespace internal + + + +template +class TensorPatchOp : public TensorBase, ReadOnlyAccessors> +{ + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPatchOp(const XprType& expr, const PatchDim& patch_dims) + : m_xpr(expr), m_patch_dims(patch_dims) {} + + EIGEN_DEVICE_FUNC + const PatchDim& patch_dims() const { return m_patch_dims; } + + EIGEN_DEVICE_FUNC + const typename internal::remove_all::type& + expression() const { return m_xpr; } + + protected: + typename XprType::Nested m_xpr; + const PatchDim m_patch_dims; +}; + + +// Eval as rvalue +template +struct TensorEvaluator, Device> +{ + typedef TensorPatchOp XprType; + typedef typename XprType::Index Index; + static const int NumDims = internal::array_size::Dimensions>::value + 1; + typedef DSizes Dimensions; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + + enum { + IsAligned = false, + PacketAccess = TensorEvaluator::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, + RawAccess = false + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device) + { + Index num_patches = 1; + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + const PatchDim& patch_dims = op.patch_dims(); + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = 0; i < NumDims-1; ++i) { + m_dimensions[i] = patch_dims[i]; + num_patches *= (input_dims[i] - patch_dims[i] + 1); + } + m_dimensions[NumDims-1] = num_patches; + + m_inputStrides[0] = 1; + m_patchStrides[0] = 1; + for (int i = 1; i < NumDims-1; ++i) { + m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1]; + m_patchStrides[i] = m_patchStrides[i-1] * (input_dims[i-1] - patch_dims[i-1] + 1); + } + m_outputStrides[0] = 1; + for (int i = 1; i < NumDims; ++i) { + m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1]; + } + } else { + for (int i = 0; i < NumDims-1; ++i) { + m_dimensions[i+1] = patch_dims[i]; + num_patches *= (input_dims[i] - patch_dims[i] + 1); + } + m_dimensions[0] = num_patches; + + m_inputStrides[NumDims-2] = 1; + m_patchStrides[NumDims-2] = 1; + for (int i = NumDims-3; i >= 0; --i) { + m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1]; + m_patchStrides[i] = m_patchStrides[i+1] * (input_dims[i+1] - patch_dims[i+1] + 1); + } + m_outputStrides[NumDims-1] = 1; + for (int i = NumDims-2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1]; + } + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { + m_impl.evalSubExprsIfNeeded(NULL); + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + Index output_stride_index = (static_cast(Layout) == static_cast(ColMajor)) ? NumDims - 1 : 0; + // Find the location of the first element of the patch. + Index patchIndex = index / m_outputStrides[output_stride_index]; + // Find the offset of the element wrt the location of the first element. + Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index]; + Index inputIndex = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 2; i > 0; --i) { + const Index patchIdx = patchIndex / m_patchStrides[i]; + patchIndex -= patchIdx * m_patchStrides[i]; + const Index offsetIdx = patchOffset / m_outputStrides[i]; + patchOffset -= offsetIdx * m_outputStrides[i]; + inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; + } + } else { + for (int i = 0; i < NumDims - 2; ++i) { + const Index patchIdx = patchIndex / m_patchStrides[i]; + patchIndex -= patchIdx * m_patchStrides[i]; + const Index offsetIdx = patchOffset / m_outputStrides[i+1]; + patchOffset -= offsetIdx * m_outputStrides[i+1]; + inputIndex += (patchIdx + offsetIdx) * m_inputStrides[i]; + } + } + inputIndex += (patchIndex + patchOffset); + return m_impl.coeff(inputIndex); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); + + Index output_stride_index = (static_cast(Layout) == static_cast(ColMajor)) ? NumDims - 1 : 0; + Index indices[2] = {index, index + PacketSize - 1}; + Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index], + indices[1] / m_outputStrides[output_stride_index]}; + Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index], + indices[1] - patchIndices[1] * m_outputStrides[output_stride_index]}; + + Index inputIndices[2] = {0, 0}; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumDims - 2; i > 0; --i) { + const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], + patchIndices[1] / m_patchStrides[i]}; + patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; + patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; + + const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i], + patchOffsets[1] / m_outputStrides[i]}; + patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i]; + patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i]; + + inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; + inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; + } + } else { + for (int i = 0; i < NumDims - 2; ++i) { + const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[i], + patchIndices[1] / m_patchStrides[i]}; + patchIndices[0] -= patchIdx[0] * m_patchStrides[i]; + patchIndices[1] -= patchIdx[1] * m_patchStrides[i]; + + const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[i+1], + patchOffsets[1] / m_outputStrides[i+1]}; + patchOffsets[0] -= offsetIdx[0] * m_outputStrides[i+1]; + patchOffsets[1] -= offsetIdx[1] * m_outputStrides[i+1]; + + inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[i]; + inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[i]; + } + } + inputIndices[0] += (patchIndices[0] + patchOffsets[0]); + inputIndices[1] += (patchIndices[1] + patchOffsets[1]); + + if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { + PacketReturnType rslt = m_impl.template packet(inputIndices[0]); + return rslt; + } + else { + EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; + values[0] = m_impl.coeff(inputIndices[0]); + values[PacketSize-1] = m_impl.coeff(inputIndices[1]); + for (int i = 1; i < PacketSize-1; ++i) { + values[i] = coeff(index+i); + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (TensorOpCost::DivCost() + + TensorOpCost::MulCost() + + 2 * TensorOpCost::AddCost()); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + + protected: + Dimensions m_dimensions; + array m_outputStrides; + array m_inputStrides; + array m_patchStrides; + + TensorEvaluator m_impl; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_PATCH_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h new file mode 100644 index 0000000..1655a81 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorRandom.h @@ -0,0 +1,276 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H +#define EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H + +namespace Eigen { +namespace internal { + +namespace { + +EIGEN_DEVICE_FUNC uint64_t get_random_seed() { +#ifdef __CUDA_ARCH__ + // We don't support 3d kernels since we currently only use 1 and + // 2d kernels. + assert(threadIdx.z == 0); + return clock64() + + blockIdx.x * blockDim.x + threadIdx.x + + gridDim.x * blockDim.x * (blockIdx.y * blockDim.y + threadIdx.y); + +#elif defined _WIN32 + // Use the current time as a baseline. + SYSTEMTIME st; + GetSystemTime(&st); + int time = st.wSecond + 1000 * st.wMilliseconds; + // Mix in a random number to make sure that we get different seeds if + // we try to generate seeds faster than the clock resolution. + // We need 2 random values since the generator only generate 16 bits at + // a time (https://msdn.microsoft.com/en-us/library/398ax69y.aspx) + int rnd1 = ::rand(); + int rnd2 = ::rand(); + uint64_t rnd = (rnd1 | rnd2 << 16) ^ time; + return rnd; + +#elif defined __APPLE__ + // Same approach as for win32, except that the random number generator + // is better (// https://developer.apple.com/legacy/library/documentation/Darwin/Reference/ManPages/man3/random.3.html#//apple_ref/doc/man/3/random). + uint64_t rnd = ::random() ^ mach_absolute_time(); + return rnd; + +#else + // Augment the current time with pseudo random number generation + // to ensure that we get different seeds if we try to generate seeds + // faster than the clock resolution. + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + uint64_t rnd = ::random() ^ ts.tv_nsec; + return rnd; +#endif +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned PCG_XSH_RS_generator(uint64_t* state) { + // TODO: Unify with the implementation in the non blocking thread pool. + uint64_t current = *state; + // Update the internal state + *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; + // Generate the random output (using the PCG-XSH-RS scheme) + return static_cast((current ^ (current >> 22)) >> (22 + (current >> 61))); +} + +static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE uint64_t PCG_XSH_RS_state(uint64_t seed) { + seed = seed ? seed : get_random_seed(); + return seed * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL; +} + +} // namespace + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeUniform(uint64_t* state) { + unsigned rnd = PCG_XSH_RS_generator(state); + return static_cast(rnd); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +Eigen::half RandomToTypeUniform(uint64_t* state) { + Eigen::half result; + // Generate 10 random bits for the mantissa + unsigned rnd = PCG_XSH_RS_generator(state); + result.x = static_cast(rnd & 0x3ffu); + // Set the exponent + result.x |= (static_cast(15) << 10); + // Return the final result + return result - Eigen::half(1.0f); +} + + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float RandomToTypeUniform(uint64_t* state) { + typedef union { + uint32_t raw; + float fp; + } internal; + internal result; + // Generate 23 random bits for the mantissa mantissa + const unsigned rnd = PCG_XSH_RS_generator(state); + result.raw = rnd & 0x7fffffu; + // Set the exponent + result.raw |= (static_cast(127) << 23); + // Return the final result + return result.fp - 1.0f; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double RandomToTypeUniform(uint64_t* state) { + typedef union { + uint64_t raw; + double dp; + } internal; + internal result; + result.raw = 0; + // Generate 52 random bits for the mantissa + // First generate the upper 20 bits + unsigned rnd1 = PCG_XSH_RS_generator(state) & 0xfffffu; + // The generate the lower 32 bits + unsigned rnd2 = PCG_XSH_RS_generator(state); + result.raw = (static_cast(rnd1) << 32) | rnd2; + // Set the exponent + result.raw |= (static_cast(1023) << 52); + // Return the final result + return result.dp - 1.0; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state) { + return std::complex(RandomToTypeUniform(state), + RandomToTypeUniform(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeUniform >(uint64_t* state) { + return std::complex(RandomToTypeUniform(state), + RandomToTypeUniform(state)); +} + +template class UniformRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UniformRandomGenerator( + const UniformRandomGenerator& other) { + m_state = other.m_state; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeUniform(&local_state); + m_state = local_state; + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeUniform(&local_state); + } + m_state = local_state; + return internal::pload(values); + } + + private: + mutable uint64_t m_state; +}; + +template +struct functor_traits > { + enum { + // Rough estimate for floating point, multiplied by ceil(sizeof(T) / sizeof(float)). + Cost = 12 * NumTraits::AddCost * + ((sizeof(Scalar) + sizeof(float) - 1) / sizeof(float)), + PacketAccess = UniformRandomGenerator::PacketAccess + }; +}; + + + +template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +T RandomToTypeNormal(uint64_t* state) { + // Use the ratio of uniform method to generate numbers following a normal + // distribution. See for example Numerical Recipes chapter 7.3.9 for the + // details. + T u, v, q; + do { + u = RandomToTypeUniform(state); + v = T(1.7156) * (RandomToTypeUniform(state) - T(0.5)); + const T x = u - T(0.449871); + const T y = numext::abs(v) + T(0.386595); + q = x*x + y * (T(0.196)*y - T(0.25472)*x); + } while (q > T(0.27597) && + (q > T(0.27846) || v*v > T(-4) * numext::log(u) * u*u)); + + return v/u; +} + +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state) { + return std::complex(RandomToTypeNormal(state), + RandomToTypeNormal(state)); +} +template <> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +std::complex RandomToTypeNormal >(uint64_t* state) { + return std::complex(RandomToTypeNormal(state), + RandomToTypeNormal(state)); +} + + +template class NormalRandomGenerator { + public: + static const bool PacketAccess = true; + + // Uses the given "seed" if non-zero, otherwise uses a random seed. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator(uint64_t seed = 0) { + m_state = PCG_XSH_RS_state(seed); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NormalRandomGenerator( + const NormalRandomGenerator& other) { + m_state = other.m_state; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + T operator()(Index i) const { + uint64_t local_state = m_state + i; + T result = RandomToTypeNormal(&local_state); + m_state = local_state; + return result; + } + + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + Packet packetOp(Index i) const { + const int packetSize = internal::unpacket_traits::size; + EIGEN_ALIGN_MAX T values[packetSize]; + uint64_t local_state = m_state + i; + for (int j = 0; j < packetSize; ++j) { + values[j] = RandomToTypeNormal(&local_state); + } + m_state = local_state; + return internal::pload(values); + } + + private: + mutable uint64_t m_state; +}; + + +template +struct functor_traits > { + enum { + // On average, we need to generate about 3 random numbers + // 15 mul, 8 add, 1.5 logs + Cost = 3 * functor_traits >::Cost + + 15 * NumTraits::AddCost + 8 * NumTraits::AddCost + + 3 * functor_traits >::Cost / 2, + PacketAccess = NormalRandomGenerator::PacketAccess + }; +}; + + +} // end namespace internal +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_RANDOM_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h new file mode 100644 index 0000000..41d0d00 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -0,0 +1,781 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// Copyright (C) 2016 Mehdi Goli, Codeplay Software Ltd +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H + +namespace Eigen { + +/** \class TensorReduction + * \ingroup CXX11_Tensor_Module + * + * \brief Tensor reduction class. + * + */ + +namespace internal { + template class MakePointer_ > + struct traits > + : traits +{ + typedef traits XprTraits; + typedef typename XprTraits::Scalar Scalar; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; + typedef typename XprType::Nested Nested; + static const int NumDimensions = XprTraits::NumDimensions - array_size::value; + static const int Layout = XprTraits::Layout; + + template struct MakePointer { + // Intermediate typedef to workaround MSVC issue. + typedef MakePointer_ MakePointerT; + typedef typename MakePointerT::Type Type; + }; +}; + +template class MakePointer_> +struct eval, Eigen::Dense> +{ + typedef const TensorReductionOp& type; +}; + +template class MakePointer_> +struct nested, 1, typename eval >::type> +{ + typedef TensorReductionOp type; +}; + + +template struct DimInitializer { + template EIGEN_DEVICE_FUNC + static void run(const InputDims& input_dims, + const array::value>& reduced, + OutputDims* output_dims, ReducedDims* reduced_dims) { + const int NumInputDims = internal::array_size::value; + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (reduced[i]) { + (*reduced_dims)[reduceIndex] = input_dims[i]; + ++reduceIndex; + } else { + (*output_dims)[outputIndex] = input_dims[i]; + ++outputIndex; + } + } + } +}; + +template <> struct DimInitializer > { + template EIGEN_DEVICE_FUNC + static void run(const InputDims& input_dims, const array&, + Sizes<>*, array* reduced_dims) { + const int NumInputDims = internal::array_size::value; + for (int i = 0; i < NumInputDims; ++i) { + (*reduced_dims)[i] = input_dims[i]; + } + } +}; + + +template +struct are_inner_most_dims { + static const bool value = false; +}; +template +struct preserve_inner_most_dims { + static const bool value = false; +}; + +#if EIGEN_HAS_CONSTEXPR && EIGEN_HAS_VARIADIC_TEMPLATES +template +struct are_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_eq(0, 0); + static const bool tmp3 = index_statically_eq(array_size::value-1, array_size::value-1); + static const bool value = tmp1 & tmp2 & tmp3; +}; +template +struct are_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_eq(0, NumTensorDims - array_size::value); + static const bool tmp3 = index_statically_eq(array_size::value - 1, NumTensorDims - 1); + static const bool value = tmp1 & tmp2 & tmp3; + +}; +template +struct preserve_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_gt(0, 0); + static const bool value = tmp1 & tmp2; + +}; +template +struct preserve_inner_most_dims{ + static const bool tmp1 = indices_statically_known_to_increase(); + static const bool tmp2 = index_statically_lt(array_size::value - 1, NumTensorDims - 1); + static const bool value = tmp1 & tmp2; +}; +#endif + + +template +struct GenericDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { + EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + for (int j = 0; j < self.m_reducedDims[DimIndex]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; + GenericDimReducer::reduce(self, input, reducer, accum); + } + } +}; +template +struct GenericDimReducer<0, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::CoeffReturnType* accum) { + for (int j = 0; j < self.m_reducedDims[0]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; + reducer.reduce(self.m_impl.coeff(input), accum); + } + } +}; +template +struct GenericDimReducer<-1, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index index, Op& reducer, typename Self::CoeffReturnType* accum) { + reducer.reduce(self.m_impl.coeff(index), accum); + } +}; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { + typename Self::CoeffReturnType accum = reducer.initialize(); + for (typename Self::Index j = 0; j < numValuesToReduce; ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + return reducer.finalize(accum); + } +}; + +template +struct InnerMostDimReducer { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { + const int packetSize = internal::unpacket_traits::size; + const typename Self::Index VectorizedSize = (numValuesToReduce / packetSize) * packetSize; + typename Self::PacketReturnType p = reducer.template initializePacket(); + for (typename Self::Index j = 0; j < VectorizedSize; j += packetSize) { + reducer.reducePacket(self.m_impl.template packet(firstIndex + j), &p); + } + typename Self::CoeffReturnType accum = reducer.initialize(); + for (typename Self::Index j = VectorizedSize; j < numValuesToReduce; ++j) { + reducer.reduce(self.m_impl.coeff(firstIndex + j), &accum); + } + return reducer.finalizeBoth(accum, p); + } +}; + +template +struct InnerMostDimPreserver { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { + eigen_assert(false && "should never be called"); + } +}; + +template +struct InnerMostDimPreserver { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { + EIGEN_STATIC_ASSERT((DimIndex > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + for (typename Self::Index j = 0; j < self.m_reducedDims[DimIndex]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[DimIndex]; + InnerMostDimPreserver::reduce(self, input, reducer, accum); + } + } +}; + +template +struct InnerMostDimPreserver<0, Self, Op, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self& self, typename Self::Index firstIndex, Op& reducer, typename Self::PacketReturnType* accum) { + for (typename Self::Index j = 0; j < self.m_reducedDims[0]; ++j) { + const typename Self::Index input = firstIndex + j * self.m_reducedStrides[0]; + reducer.reducePacket(self.m_impl.template packet(input), accum); + } + } +}; +template +struct InnerMostDimPreserver<-1, Self, Op, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { + eigen_assert(false && "should never be called"); + } +}; + +// Default full reducer +template +struct FullReducer { + static const bool HasOptimizedImplementation = false; + + static EIGEN_DEVICE_FUNC void run(const Self& self, Op& reducer, const Device&, typename Self::CoeffReturnType* output) { + const typename Self::Index num_coeffs = array_prod(self.m_impl.dimensions()); + *output = InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); + } +}; + + +#ifdef EIGEN_USE_THREADS +// Multithreaded full reducers +template +struct FullReducerShard { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Self& self, typename Self::Index firstIndex, + typename Self::Index numValuesToReduce, Op& reducer, + typename Self::CoeffReturnType* output) { + *output = InnerMostDimReducer::reduce( + self, firstIndex, numValuesToReduce, reducer); + } +}; + +// Multithreaded full reducer +template +struct FullReducer { + static const bool HasOptimizedImplementation = !Op::IsStateful; + static const int PacketSize = + unpacket_traits::size; + + // launch one reducer per thread and accumulate the result. + static void run(const Self& self, Op& reducer, const ThreadPoolDevice& device, + typename Self::CoeffReturnType* output) { + typedef typename Self::Index Index; + const Index num_coeffs = array_prod(self.m_impl.dimensions()); + if (num_coeffs == 0) { + *output = reducer.finalize(reducer.initialize()); + return; + } + const TensorOpCost cost = + self.m_impl.costPerCoeff(Vectorizable) + + TensorOpCost(0, 0, internal::functor_traits::Cost, Vectorizable, + PacketSize); + const int num_threads = TensorCostModel::numThreads( + num_coeffs, cost, device.numThreads()); + if (num_threads == 1) { + *output = + InnerMostDimReducer::reduce(self, 0, num_coeffs, reducer); + return; + } + const Index blocksize = + std::floor(static_cast(num_coeffs) / num_threads); + const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; + eigen_assert(num_coeffs >= numblocks * blocksize); + + Barrier barrier(internal::convert_index(numblocks)); + MaxSizeVector shards(numblocks, reducer.initialize()); + for (Index i = 0; i < numblocks; ++i) { + device.enqueue_with_barrier(&barrier, &FullReducerShard::run, + self, i * blocksize, blocksize, reducer, + &shards[i]); + } + typename Self::CoeffReturnType finalShard; + if (numblocks * blocksize < num_coeffs) { + finalShard = InnerMostDimReducer::reduce( + self, numblocks * blocksize, num_coeffs - numblocks * blocksize, + reducer); + } else { + finalShard = reducer.initialize(); + } + barrier.Wait(); + + for (Index i = 0; i < numblocks; ++i) { + reducer.reduce(shards[i], &finalShard); + } + *output = reducer.finalize(finalShard); + } +}; + +#endif + + +// Default inner reducer +template +struct InnerReducer { + static const bool HasOptimizedImplementation = false; + + EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { + eigen_assert(false && "Not implemented"); + return true; + } +}; + +// Default outer reducer +template +struct OuterReducer { + static const bool HasOptimizedImplementation = false; + + EIGEN_DEVICE_FUNC static bool run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) { + eigen_assert(false && "Not implemented"); + return true; + } +}; + + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +template +__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); + + +#ifdef EIGEN_HAS_CUDA_FP16 +template +__global__ void ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); +template +__global__ void FullReductionKernelHalfFloat(R, const S, I, half*, half2*); +template +__global__ void InnerReductionKernelHalfFloat(R, const S, I, I, half*); + +#endif + +template +__global__ void InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); + +template +__global__ void OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); +#endif + +} // end namespace internal + + +template class MakePointer_> +class TensorReductionOp : public TensorBase, ReadOnlyAccessors> { + public: + typedef typename Eigen::internal::traits::Scalar Scalar; + typedef typename Eigen::NumTraits::Real RealScalar; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename Eigen::internal::nested::type Nested; + typedef typename Eigen::internal::traits::StorageKind StorageKind; + typedef typename Eigen::internal::traits::Index Index; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims) + { } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer) + { } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const XprType& expression() const { return m_expr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Dims& dims() const { return m_dims; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + const Op& reducer() const { return m_reducer; } + + protected: + typename XprType::Nested m_expr; + const Dims m_dims; + const Op m_reducer; +}; + + +// Eval as rvalue +template class MakePointer_, typename Device> +struct TensorEvaluator, Device> +{ + typedef TensorReductionOp XprType; + typedef typename XprType::Index Index; + typedef ArgType ChildType; + typedef typename TensorEvaluator::Dimensions InputDimensions; + static const int NumInputDims = internal::array_size::value; + static const int NumReducedDims = internal::array_size::value; + static const int NumOutputDims = NumInputDims - NumReducedDims; + typedef typename internal::conditional, DSizes >::type Dimensions; + typedef typename XprType::Scalar Scalar; + typedef TensorEvaluator, Device> Self; + static const bool InputPacketAccess = TensorEvaluator::PacketAccess; + typedef typename internal::remove_const::type CoeffReturnType; + typedef typename PacketType::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits::size; + + enum { + IsAligned = false, + PacketAccess = Self::InputPacketAccess && Op::PacketAccess, + Layout = TensorEvaluator::Layout, + CoordAccess = false, // to be implemented + RawAccess = false + }; + + static const bool ReducingInnerMostDims = internal::are_inner_most_dims::value; + static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims::value; + static const bool RunningFullReduction = (NumOutputDims==0); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) + : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device), m_xpr_dims(op.dims()) + { + EIGEN_STATIC_ASSERT((NumInputDims >= NumReducedDims), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((!ReducingInnerMostDims | !PreservingInnerMostDims | (NumReducedDims == NumInputDims)), + YOU_MADE_A_PROGRAMMING_MISTAKE); + + // Build the bitmap indicating if an input dimension is reduced or not. + for (int i = 0; i < NumInputDims; ++i) { + m_reduced[i] = false; + } + for (int i = 0; i < NumReducedDims; ++i) { + eigen_assert(op.dims()[i] >= 0); + eigen_assert(op.dims()[i] < NumInputDims); + m_reduced[op.dims()[i]] = true; + } + + const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); + internal::DimInitializer::run(input_dims, m_reduced, &m_dimensions, &m_reducedDims); + + // Precompute output strides. + if (NumOutputDims > 0) { + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + } + } else { + m_outputStrides.back() = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + } + } + } + + // Precompute input strides. + if (NumInputDims > 0) { + array input_strides; + if (static_cast(Layout) == static_cast(ColMajor)) { + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i-1] * input_dims[i-1]; + } + } else { + input_strides.back() = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } + } + + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (m_reduced[i]) { + m_reducedStrides[reduceIndex] = input_strides[i]; + ++reduceIndex; + } else { + m_preservedStrides[outputIndex] = input_strides[i]; + ++outputIndex; + } + } + } + + // Special case for full reductions + if (NumOutputDims == 0) { + m_preservedStrides[0] = internal::array_prod(input_dims); + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } + + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool evalSubExprsIfNeeded(typename MakePointer_::Type data) { + m_impl.evalSubExprsIfNeeded(NULL); + + // Use the FullReducer if possible. + if ((RunningFullReduction && RunningOnSycl) ||(RunningFullReduction && + internal::FullReducer::HasOptimizedImplementation && + ((RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) || + !RunningOnGPU))) { + bool need_assign = false; + if (!data) { + m_result = static_cast(m_device.allocate(sizeof(CoeffReturnType))); + data = m_result; + need_assign = true; + } + Op reducer(m_reducer); + internal::FullReducer::run(*this, reducer, m_device, data); + return need_assign; + } + else if(RunningOnSycl){ + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + Op reducer(m_reducer); + internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); + return (m_result != NULL); + } + + // Attempt to use an optimized reduction. + else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) { + bool reducing_inner_dims = true; + for (int i = 0; i < NumReducedDims; ++i) { + if (static_cast(Layout) == static_cast(ColMajor)) { + reducing_inner_dims &= m_reduced[i]; + } else { + reducing_inner_dims &= m_reduced[NumInputDims - 1 - i]; + } + } + if (internal::InnerReducer::HasOptimizedImplementation && + (reducing_inner_dims || ReducingInnerMostDims)) { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { + data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + else { + return true; + } + } + Op reducer(m_reducer); + if (internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } + } + + bool preserving_inner_dims = true; + for (int i = 0; i < NumReducedDims; ++i) { + if (static_cast(Layout) == static_cast(ColMajor)) { + preserving_inner_dims &= m_reduced[NumInputDims - 1 - i]; + } else { + preserving_inner_dims &= m_reduced[i]; + } + } + if (internal::OuterReducer::HasOptimizedImplementation && + preserving_inner_dims) { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data) { + if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { + data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + else { + return true; + } + } + Op reducer(m_reducer); + if (internal::OuterReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } + } + } + return true; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { + m_impl.cleanup(); + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const + { + if ((RunningOnSycl || RunningFullReduction || RunningOnGPU) && m_result) { + return *(m_result + index); + } + Op reducer(m_reducer); + if (ReducingInnerMostDims || RunningFullReduction) { + const Index num_values_to_reduce = + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + return internal::InnerMostDimReducer::reduce(*this, firstInput(index), + num_values_to_reduce, reducer); + } else { + typename Self::CoeffReturnType accum = reducer.initialize(); + internal::GenericDimReducer::reduce(*this, firstInput(index), reducer, &accum); + return reducer.finalize(accum); + } + } + + // TODO(bsteiner): provide a more efficient implementation. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const + { + EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions()))); + + if (RunningOnGPU && m_result) { + return internal::pload(m_result + index); + } + + EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; + if (ReducingInnerMostDims) { + const Index num_values_to_reduce = + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + const Index firstIndex = firstInput(index); + for (Index i = 0; i < PacketSize; ++i) { + Op reducer(m_reducer); + values[i] = internal::InnerMostDimReducer::reduce(*this, firstIndex + i * num_values_to_reduce, + num_values_to_reduce, reducer); + } + } else if (PreservingInnerMostDims) { + const Index firstIndex = firstInput(index); + const int innermost_dim = (static_cast(Layout) == static_cast(ColMajor)) ? 0 : NumOutputDims - 1; + // TBD: extend this the the n innermost dimensions that we preserve. + if (((firstIndex % m_dimensions[innermost_dim]) + PacketSize - 1) < m_dimensions[innermost_dim]) { + Op reducer(m_reducer); + typename Self::PacketReturnType accum = reducer.template initializePacket(); + internal::InnerMostDimPreserver::reduce(*this, firstIndex, reducer, &accum); + return reducer.finalizePacket(accum); + } else { + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index + i); + } + } + } else { + for (int i = 0; i < PacketSize; ++i) { + values[i] = coeff(index + i); + } + } + PacketReturnType rslt = internal::pload(values); + return rslt; + } + + // Must be called after evalSubExprsIfNeeded(). + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + if (RunningFullReduction && m_result) { + return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); + } else { + const Index num_values_to_reduce = internal::array_prod(m_reducedDims); + const double compute_cost = num_values_to_reduce * internal::functor_traits::Cost; + return m_impl.costPerCoeff(vectorized) * num_values_to_reduce + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + } + + EIGEN_DEVICE_FUNC typename MakePointer_::Type data() const { return m_result; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_impl; } + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} + /// added for sycl in order to re-construct the reduction eval on the device for the sub-kernel + const Dims& xprDims() const {return m_xpr_dims;} + + + private: + template friend struct internal::GenericDimReducer; + template friend struct internal::InnerMostDimReducer; + template friend struct internal::InnerMostDimPreserver; + template friend struct internal::FullReducer; +#ifdef EIGEN_USE_THREADS + template friend struct internal::FullReducerShard; +#endif +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + template friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*); +#ifdef EIGEN_HAS_CUDA_FP16 + template friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*); + template friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*); + template friend void internal::InnerReductionKernelHalfFloat(R, const S, I, I, half*); +#endif + template friend void internal::InnerReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); + + template friend void internal::OuterReductionKernel(R, const S, I, I, typename S::CoeffReturnType*); +#endif + + template friend struct internal::InnerReducer; + + // Returns the Index in the input tensor of the first value that needs to be + // used to compute the reduction at output index "index". + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const { + if (ReducingInnerMostDims) { + if (static_cast(Layout) == static_cast(ColMajor)) { + return index * m_preservedStrides[0]; + } else { + return index * m_preservedStrides[NumPreservedStrides - 1]; + } + } + // TBD: optimize the case where we preserve the innermost dimensions. + Index startInput = 0; + if (static_cast(Layout) == static_cast(ColMajor)) { + for (int i = NumOutputDims - 1; i > 0; --i) { + // This is index_i in the output tensor. + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (PreservingInnerMostDims) { + eigen_assert(m_preservedStrides[0] == 1); + startInput += index; + } else { + startInput += index * m_preservedStrides[0]; + } + } else { + for (int i = 0; i < NumOutputDims - 1; ++i) { + // This is index_i in the output tensor. + const Index idx = index / m_outputStrides[i]; + startInput += idx * m_preservedStrides[i]; + index -= idx * m_outputStrides[i]; + } + if (PreservingInnerMostDims) { + eigen_assert(m_preservedStrides[NumPreservedStrides - 1] == 1); + startInput += index; + } else { + startInput += index * m_preservedStrides[NumPreservedStrides - 1]; + } + } + return startInput; + } + + // Bitmap indicating if an input dimension is reduced or not. + array m_reduced; + // Dimensions of the output of the operation. + Dimensions m_dimensions; + // Precomputed strides for the output tensor. + array m_outputStrides; + // Subset of strides of the input tensor for the non-reduced dimensions. + // Indexed by output dimensions. + static const int NumPreservedStrides = max_n_1::size; + array m_preservedStrides; + + // Subset of strides of the input tensor for the reduced dimensions. + // Indexed by reduced dimensions. + array m_reducedStrides; + // Size of the input dimensions that are reduced. + // Indexed by reduced dimensions. + array m_reducedDims; + + // Evaluator for the input expression. + TensorEvaluator m_impl; + + // Operation to apply for computing the reduction. + Op m_reducer; + + // For full reductions +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) + static const bool RunningOnGPU = internal::is_same::value; + static const bool RunningOnSycl = false; +#elif defined(EIGEN_USE_SYCL) +static const bool RunningOnSycl = internal::is_same::type, Eigen::SyclDevice>::value; +static const bool RunningOnGPU = false; +#else + static const bool RunningOnGPU = false; + static const bool RunningOnSycl = false; +#endif + typename MakePointer_::Type m_result; + + const Device& m_device; + const Dims& m_xpr_dims; +}; + +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H diff --git a/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h new file mode 100644 index 0000000..65638b6 --- /dev/null +++ b/external/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -0,0 +1,750 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H +#define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_CUDA_H + +namespace Eigen { +namespace internal { + + +#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) +// Full reducers for GPU, don't vectorize for now + +// Reducer function that enables multiple cuda thread to safely accumulate at the same +// output address. It basically reads the current value of the output variable, and +// attempts to update it with the new value. If in the meantime another cuda thread +// updated the content of the output address it will try again. +template +__device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) { +#if __CUDA_ARCH__ >= 300 + if (sizeof(T) == 4) + { + unsigned int oldval = *reinterpret_cast(output); + unsigned int newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned int readback; + while ((readback = atomicCAS((unsigned int*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else if (sizeof(T) == 8) { + unsigned long long oldval = *reinterpret_cast(output); + unsigned long long newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + unsigned long long readback; + while ((readback = atomicCAS((unsigned long long*)output, oldval, newval)) != oldval) { + oldval = readback; + newval = oldval; + reducer.reduce(accum, reinterpret_cast(&newval)); + if (newval == oldval) { + return; + } + } + } + else { + assert(0 && "Wordsize not supported"); + } +#else + assert(0 && "Shouldn't be called on unsupported device"); +#endif +} + +// We extend atomicExch to support extra data types +template +__device__ inline Type atomicExchCustom(Type* address, Type val) { + return atomicExch(address, val); +} + +template <> +__device__ inline double atomicExchCustom(double* address, double val) { + unsigned long long int* address_as_ull = reinterpret_cast(address); + return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val))); +} + +#ifdef EIGEN_HAS_CUDA_FP16 +template