Browse Source

migrated a lot of the FAST code including interface to Sifakis' fast 3x3 svd with SSE and AVX support (determined during compile)

Former-commit-id: 7d1db3ff46ac0ca2e2e4be4277368254b9b55d2a
Alec Jacobson (jalec 11 years ago
parent
commit
d14905d818
41 changed files with 3660 additions and 26 deletions
  1. 1 0
      Makefile.conf
  2. 1 0
      RELEASE_HISTORY.txt
  3. 1 1
      VERSION.txt
  4. 2 13
      examples/randomly-sample-mesh/example.cpp
  5. 32 0
      include/igl/ARAPEnergyType.h
  6. 257 0
      include/igl/arap_linear_block.cpp
  7. 78 0
      include/igl/arap_linear_block.h
  8. 69 0
      include/igl/arap_rhs.cpp
  9. 39 0
      include/igl/arap_rhs.h
  10. 62 0
      include/igl/columnize.cpp
  11. 41 0
      include/igl/columnize.h
  12. 76 0
      include/igl/covariance_scatter_matrix.cpp
  13. 38 0
      include/igl/covariance_scatter_matrix.h
  14. 1 0
      include/igl/example_fun.cpp
  15. 46 0
      include/igl/group_sum_matrix.cpp
  16. 45 0
      include/igl/group_sum_matrix.h
  17. 42 0
      include/igl/intersect.cpp
  18. 25 0
      include/igl/intersect.h
  19. 11 0
      include/igl/is_planar.cpp
  20. 30 0
      include/igl/is_planar.h
  21. 143 0
      include/igl/lbs_matrix.cpp
  22. 76 0
      include/igl/lbs_matrix.h
  23. 52 0
      include/igl/partition.cpp
  24. 32 0
      include/igl/partition.h
  25. 6 6
      include/igl/random_points_on_mesh.cpp
  26. 6 6
      include/igl/random_points_on_mesh.h
  27. 43 0
      include/igl/svd3x3/Makefile
  28. 879 0
      include/igl/svd3x3/arap_dof.cpp
  29. 244 0
      include/igl/svd3x3/arap_dof.h
  30. 224 0
      include/igl/svd3x3/fit_rotations.cpp
  31. 55 0
      include/igl/svd3x3/fit_rotations.h
  32. 104 0
      include/igl/svd3x3/polar_svd3x3.cpp
  33. 38 0
      include/igl/svd3x3/polar_svd3x3.h
  34. 69 0
      include/igl/svd3x3/svd3x3.cpp
  35. 40 0
      include/igl/svd3x3/svd3x3.h
  36. 102 0
      include/igl/svd3x3/svd3x3_avx.cpp
  37. 40 0
      include/igl/svd3x3/svd3x3_avx.h
  38. 102 0
      include/igl/svd3x3/svd3x3_sse.cpp
  39. 37 0
      include/igl/svd3x3/svd3x3_sse.h
  40. 429 0
      include/igl/uniformly_sample_two_manifold.cpp
  41. 42 0
      include/igl/uniformly_sample_two_manifold.h

+ 1 - 0
Makefile.conf

@@ -35,6 +35,7 @@ ifeq ($(IGL_USERNAME),ajx)
 	IGL_WITH_MATLAB=1
 	IGL_WITH_MATLAB=1
 	IGL_WITH_MOSEK=1
 	IGL_WITH_MOSEK=1
 	IGL_WITH_BBW=1
 	IGL_WITH_BBW=1
+	IGL_WITH_SVD3X3=1
 	IGL_WITH_PNG=1
 	IGL_WITH_PNG=1
 	IGL_WITH_XML=1
 	IGL_WITH_XML=1
 	IGL_WITH_BOOST=1
 	IGL_WITH_BOOST=1

+ 1 - 0
RELEASE_HISTORY.txt

@@ -1,3 +1,4 @@
+0.4.1  Migrated much of the FAST code including extra for Sifakis' 3x3 svd
 0.4.0  Release under MPL2 license
 0.4.0  Release under MPL2 license
 0.3.7  Embree2.0 support
 0.3.7  Embree2.0 support
 0.3.6  boost extra, patches, mosek 7 support, libiglbbw (mosek optional)
 0.3.6  boost extra, patches, mosek 7 support, libiglbbw (mosek optional)

+ 1 - 1
VERSION.txt

@@ -3,4 +3,4 @@
 # Anyone may increment Minor to indicate a small change.
 # Anyone may increment Minor to indicate a small change.
 # Major indicates a large change or large number of changes (upload to website)
 # Major indicates a large change or large number of changes (upload to website)
 # World indicates a substantial change or release
 # World indicates a substantial change or release
-0.4.0
+0.4.1

+ 2 - 13
examples/randomly-sample-mesh/example.cpp

@@ -21,15 +21,13 @@
 #include <igl/Camera.h>
 #include <igl/Camera.h>
 #include <igl/ReAntTweakBar.h>
 #include <igl/ReAntTweakBar.h>
 #include <igl/get_seconds.h>
 #include <igl/get_seconds.h>
-#include <igl/sample_mesh.h>
+#include <igl/random_points_on_mesh.h>
 
 
 #include <Eigen/Core>
 #include <Eigen/Core>
 #include <Eigen/Geometry>
 #include <Eigen/Geometry>
 
 
 #include <GLUT/glut.h>
 #include <GLUT/glut.h>
 
 
-#include <Carbon/Carbon.h>
-
 #include <string>
 #include <string>
 #include <vector>
 #include <vector>
 #include <stack>
 #include <stack>
@@ -413,19 +411,10 @@ void init_samples()
   const int n = V.rows()*10;
   const int n = V.rows()*10;
   SparseMatrix<double> B;
   SparseMatrix<double> B;
   VectorXi FI;
   VectorXi FI;
-  sample_mesh(n,V,F,B,FI);
+  random_points_on_mesh(n,V,F,B,FI);
   S = B*V;
   S = B*V;
 }
 }
 
 
-
-KeyMap keyStates ;
-bool IS_KEYDOWN( uint16_t vKey )
-{
-  uint8_t index = vKey / 32 ;
-  uint8_t shift = vKey % 32 ;
-  return keyStates[index].bigEndianValue & (1 << shift) ;
-}
-
 void undo()
 void undo()
 {
 {
   using namespace std;
   using namespace std;

+ 32 - 0
include/igl/ARAPEnergyType.h

@@ -0,0 +1,32 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_ARAPENERGYTYPE_H
+#define IGL_ARAPENERGYTYPE_H
+namespace igl
+{
+  //     ARAP_ENERGY_TYPE_SPOKES  "As-rigid-as-possible Surface Modeling" by [Sorkine and
+  //       Alexa 2007], rotations defined at vertices affecting incident edges,
+  //       default
+  //     ARAP_ENERGY_TYPE_SPOKES-AND-RIMS  Adapted version of "As-rigid-as-possible Surface
+  //       Modeling" by [Sorkine and Alexa 2007] presented in section 4.2 of or
+  //       "A simple geometric model for elastic deformation" by [Chao et al.
+  //       2010], rotations defined at vertices affecting incident edges and
+  //       opposite edges
+  //     ARAP_ENERGY_TYPE_ELEMENTS  "A local-global approach to mesh parameterization" by
+  //       [Liu et al.  2010] or "A simple geometric model for elastic
+  //       deformation" by [Chao et al.  2010], rotations defined at elements
+  //       (triangles or tets) 
+  enum ARAPEnergyType
+  {
+    ARAP_ENERGY_TYPE_SPOKES = 0,
+    ARAP_ENERGY_TYPE_SPOKES_AND_RIMS = 1,
+    ARAP_ENERGY_TYPE_ELEMENTS = 2,
+    NUM_ARAP_ENERGY_TYPES = 3
+  };
+}
+#endif

+ 257 - 0
include/igl/arap_linear_block.cpp

@@ -0,0 +1,257 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "arap_linear_block.h"
+#include "verbose.h"
+#include "cotangent.h"
+#include <Eigen/Dense>
+
+template <typename MatV, typename MatF, typename Scalar>
+IGL_INLINE void igl::arap_linear_block(
+  const MatV & V,
+  const MatF & F,
+  const int d,
+  const igl::ARAPEnergyType energy,
+  Eigen::SparseMatrix<Scalar> & Kd)
+{
+  using namespace igl;
+  switch(energy)
+  {
+    case ARAP_ENERGY_TYPE_SPOKES:
+      return igl::arap_linear_block_spokes(V,F,d,Kd);
+      break;
+    case ARAP_ENERGY_TYPE_SPOKES_AND_RIMS:
+      return igl::arap_linear_block_spokes_and_rims(V,F,d,Kd);
+      break;
+    case ARAP_ENERGY_TYPE_ELEMENTS:
+      return igl::arap_linear_block_elements(V,F,d,Kd);
+      break;
+    default:
+      verbose("Unsupported energy type: %d\n",energy);
+      assert(false);
+  }
+}
+
+
+template <typename MatV, typename MatF, typename Scalar>
+IGL_INLINE void igl::arap_linear_block_spokes(
+  const MatV & V,
+  const MatF & F,
+  const int d,
+  Eigen::SparseMatrix<Scalar> & Kd)
+{
+  using namespace igl;
+  using namespace std;
+  using namespace Eigen;
+  // simplex size (3: triangles, 4: tetrahedra)
+  int simplex_size = F.cols();
+  // Number of elements
+  int m = F.rows();
+  // Temporary output
+  Matrix<int,Dynamic,2> edges;
+  Kd.resize(V.rows(), V.rows());
+  vector<Triplet<Scalar> > Kd_IJV;
+  if(simplex_size == 3)
+  {
+    // triangles
+    Kd.reserve(7*V.rows());
+    Kd_IJV.reserve(7*V.rows());
+    edges.resize(3,2);
+    edges << 
+      1,2,
+      2,0,
+      0,1;
+  }else if(simplex_size == 4)
+  {
+    // tets
+    Kd.reserve(17*V.rows());
+    Kd_IJV.reserve(17*V.rows());
+    edges.resize(6,2);
+    edges << 
+      1,2,
+      2,0,
+      0,1,
+      3,0,
+      3,1,
+      3,2;
+  }
+  // gather cotangent weights
+  Matrix<Scalar,Dynamic,Dynamic> C;
+  cotangent(V,F,C);
+  // should have weights for each edge
+  assert(C.cols() == edges.rows());
+  // loop over elements
+  for(int i = 0;i<m;i++)
+  {
+    // loop over edges of element
+    for(int e = 0;e<edges.rows();e++)
+    {
+      int source = F(i,edges(e,0));
+      int dest = F(i,edges(e,1));
+      double v = 0.5*C(i,e)*(V(source,d)-V(dest,d));
+      Kd_IJV.push_back(Triplet<Scalar>(source,dest,v));
+      Kd_IJV.push_back(Triplet<Scalar>(dest,source,-v));
+      Kd_IJV.push_back(Triplet<Scalar>(source,source,v));
+      Kd_IJV.push_back(Triplet<Scalar>(dest,dest,-v));
+    }
+  }
+  Kd.setFromTriplets(Kd_IJV.begin(),Kd_IJV.end());
+  Kd.makeCompressed();
+}
+
+template <typename MatV, typename MatF, typename Scalar>
+IGL_INLINE void igl::arap_linear_block_spokes_and_rims(
+  const MatV & V,
+  const MatF & F,
+  const int d,
+  Eigen::SparseMatrix<Scalar> & Kd)
+{
+  using namespace igl;
+  using namespace std;
+  using namespace Eigen;
+  // simplex size (3: triangles, 4: tetrahedra)
+  int simplex_size = F.cols();
+  // Number of elements
+  int m = F.rows();
+  // Temporary output
+  Kd.resize(V.rows(), V.rows());
+  vector<Triplet<Scalar> > Kd_IJV;
+  Matrix<int,Dynamic,2> edges;
+  if(simplex_size == 3)
+  {
+    // triangles
+    Kd.reserve(7*V.rows());
+    Kd_IJV.reserve(7*V.rows());
+    edges.resize(3,2);
+    edges << 
+      1,2,
+      2,0,
+      0,1;
+  }else if(simplex_size == 4)
+  {
+    // tets
+    Kd.reserve(17*V.rows());
+    Kd_IJV.reserve(17*V.rows());
+    edges.resize(6,2);
+    edges << 
+      1,2,
+      2,0,
+      0,1,
+      3,0,
+      3,1,
+      3,2;
+    // Not implemented yet for tets
+    assert(false);
+  }
+  // gather cotangent weights
+  Matrix<Scalar,Dynamic,Dynamic> C;
+  cotangent(V,F,C);
+  // should have weights for each edge
+  assert(C.cols() == edges.rows());
+  // loop over elements
+  for(int i = 0;i<m;i++)
+  {
+    // loop over edges of element
+    for(int e = 0;e<edges.rows();e++)
+    {
+      int source = F(i,edges(e,0));
+      int dest = F(i,edges(e,1));
+      double v = C(i,e)*(V(source,d)-V(dest,d))/3.0;
+      // loop over edges again
+      for(int f = 0;f<edges.rows();f++)
+      {
+        int Rs = F(i,edges(f,0));
+        int Rd = F(i,edges(f,1));
+        if(Rs == source && Rd == dest)
+        {
+          Kd_IJV.push_back(Triplet<Scalar>(Rs,Rd,v));
+          Kd_IJV.push_back(Triplet<Scalar>(Rd,Rs,-v));
+        }else if(Rd == source)
+        {
+          Kd_IJV.push_back(Triplet<Scalar>(Rd,Rs,v));
+        }else if(Rs == dest)
+        {
+          Kd_IJV.push_back(Triplet<Scalar>(Rs,Rd,-v));
+        }
+      }
+      Kd_IJV.push_back(Triplet<Scalar>(source,source,v));
+      Kd_IJV.push_back(Triplet<Scalar>(dest,dest,-v));
+    }
+  }
+  Kd.setFromTriplets(Kd_IJV.begin(),Kd_IJV.end());
+  Kd.makeCompressed();
+}
+
+template <typename MatV, typename MatF, typename Scalar>
+IGL_INLINE void igl::arap_linear_block_elements(
+  const MatV & V,
+  const MatF & F,
+  const int d,
+  Eigen::SparseMatrix<Scalar> & Kd)
+{
+  using namespace igl;
+  using namespace std;
+  using namespace Eigen;
+  // simplex size (3: triangles, 4: tetrahedra)
+  int simplex_size = F.cols();
+  // Number of elements
+  int m = F.rows();
+  // Temporary output
+  Kd.resize(V.rows(), F.rows());
+  vector<Triplet<Scalar> > Kd_IJV;
+  Matrix<int,Dynamic,2> edges;
+  if(simplex_size == 3)
+  {
+    // triangles
+    Kd.reserve(7*V.rows());
+    Kd_IJV.reserve(7*V.rows());
+    edges.resize(3,2);
+    edges << 
+      1,2,
+      2,0,
+      0,1;
+  }else if(simplex_size == 4)
+  {
+    // tets
+    Kd.reserve(17*V.rows());
+    Kd_IJV.reserve(17*V.rows());
+    edges.resize(6,2);
+    edges << 
+      1,2,
+      2,0,
+      0,1,
+      3,0,
+      3,1,
+      3,2;
+  }
+  // gather cotangent weights
+  Matrix<Scalar,Dynamic,Dynamic> C;
+  cotangent(V,F,C);
+  // should have weights for each edge
+  assert(C.cols() == edges.rows());
+  // loop over elements
+  for(int i = 0;i<m;i++)
+  {
+    // loop over edges of element
+    for(int e = 0;e<edges.rows();e++)
+    {
+      int source = F(i,edges(e,0));
+      int dest = F(i,edges(e,1));
+      double v = C(i,e)*(V(source,d)-V(dest,d));
+      Kd_IJV.push_back(Triplet<Scalar>(source,i,v));
+      Kd_IJV.push_back(Triplet<Scalar>(dest,i,-v));
+    }
+  }
+  Kd.setFromTriplets(Kd_IJV.begin(),Kd_IJV.end());
+  Kd.makeCompressed();
+}
+
+
+#ifndef IGL_HEADER_ONLY
+// Explicit template specialization
+template IGL_INLINE void igl::arap_linear_block<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, double>(Eigen::Matrix<double, -1, -1, 0, -1, -1> const&, Eigen::Matrix<int, -1, -1, 0, -1, -1> const&, int, igl::ARAPEnergyType, Eigen::SparseMatrix<double, 0, int>&);
+#endif

+ 78 - 0
include/igl/arap_linear_block.h

@@ -0,0 +1,78 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_ARAP_LINEAR_BLOCK_H
+#define IGL_ARAP_LINEAR_BLOCK_H
+#include "igl_inline.h"
+
+#include <Eigen/Sparse>
+#include <igl/ARAPEnergyType.h>
+
+namespace igl
+{
+  // ARAP_LINEAR_BLOCK constructs a block of the matrix which constructs the
+  // linear terms of a given arap energy. When treating rotations as knowns
+  // (arranged in a column) then this constructs Kd of K such that the linear
+  // portion of the energy is as a column:
+  //   K * R = [Kx Z  ... Ky Z  ... 
+  //            Z  Kx ... Z  Ky ... 
+  //            ... ]
+  // These blocks are also used to build the "covariance scatter matrices".
+  // Here we want to build a scatter matrix that multiplies against positions
+  // (treated as known) producing covariance matrices to fit each rotation.
+  // Notice that in the case of the RHS of the poisson solve the rotations are
+  // known and the positions unknown, and vice versa for rotation fitting.
+  // These linear block just relate the rotations to the positions, linearly in
+  // each.
+  //
+  // Templates:
+  //   MatV  vertex position matrix, e.g. Eigen::MatrixXd
+  //   MatF  face index matrix, e.g. Eigen::MatrixXd
+  //   Scalar  e.g. double
+  // Inputs:
+  //   V  #V by dim list of initial domain positions
+  //   F  #F by #simplex size list of triangle indices into V
+  //   d  coordinate of linear constructor to build
+  //   energy  ARAPEnergyType enum value defining which energy is being used.
+  //     See ARAPEnergyType.h for valid options and explanations.
+  // Outputs:
+  //   Kd  #V by #V/#F block of the linear constructor matrix corresponding to
+  //     coordinate d
+  //
+  template <typename MatV, typename MatF, typename Scalar>
+  IGL_INLINE void arap_linear_block(
+    const MatV & V,
+    const MatF & F,
+    const int d,
+    const igl::ARAPEnergyType energy,
+    Eigen::SparseMatrix<Scalar> & Kd);
+  // Helper functions for each energy type
+  template <typename MatV, typename MatF, typename Scalar>
+  IGL_INLINE void arap_linear_block_spokes(
+    const MatV & V,
+    const MatF & F,
+    const int d,
+    Eigen::SparseMatrix<Scalar> & Kd);
+  template <typename MatV, typename MatF, typename Scalar>
+  IGL_INLINE void arap_linear_block_spokes_and_rims(
+    const MatV & V,
+    const MatF & F,
+    const int d,
+    Eigen::SparseMatrix<Scalar> & Kd);
+  template <typename MatV, typename MatF, typename Scalar>
+  IGL_INLINE void arap_linear_block_elements(
+    const MatV & V,
+    const MatF & F,
+    const int d,
+    Eigen::SparseMatrix<Scalar> & Kd);
+}
+
+#ifdef IGL_HEADER_ONLY
+#  include "arap_linear_block.cpp"
+#endif
+
+#endif

+ 69 - 0
include/igl/arap_rhs.cpp

@@ -0,0 +1,69 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "arap_rhs.h"
+#include "arap_linear_block.h"
+#include "verbose.h"
+#include "repdiag.h"
+#include "cat.h"
+
+IGL_INLINE void igl::arap_rhs(
+  const Eigen::MatrixXd & V, 
+  const Eigen::MatrixXi & F,
+  const igl::ARAPEnergyType energy,
+  Eigen::SparseMatrix<double>& K)
+{
+  using namespace igl;
+  using namespace Eigen;
+  // Number of dimensions
+  int dim = V.cols();
+  //// Number of mesh vertices
+  //int n = V.rows();
+  //// Number of mesh elements
+  //int m = F.rows();
+  //// number of rotations
+  //int nr;
+  switch(energy)
+  {
+    case ARAP_ENERGY_TYPE_SPOKES:
+      //nr = n;
+      break;
+    case ARAP_ENERGY_TYPE_SPOKES_AND_RIMS:
+      //nr = n;
+      break;
+    case ARAP_ENERGY_TYPE_ELEMENTS:
+      //nr = m;
+      break;
+    default:
+      fprintf(
+        stderr,
+        "covariance_scatter_matrix.h: Error: Unsupported arap energy %d\n",
+        energy);
+      return;
+  }
+
+  SparseMatrix<double> KX,KY,KZ;
+  arap_linear_block(V,F,0,energy,KX);
+  arap_linear_block(V,F,1,energy,KY);
+  if(dim == 2)
+  {
+    K = cat(2,repdiag(KX,dim),repdiag(KY,dim));
+  }else if(dim == 3)
+  {
+    arap_linear_block(V,F,2,energy,KZ);
+    K = cat(2,cat(2,repdiag(KX,dim),repdiag(KY,dim)),repdiag(KZ,dim));
+  }else
+  {
+    fprintf(
+     stderr,
+     "covariance_scatter_matrix.h: Error: Unsupported dimension %d\n",
+     dim);
+    return;
+  }
+  
+}
+

+ 39 - 0
include/igl/arap_rhs.h

@@ -0,0 +1,39 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_ARAP_RHS_H
+#define IGL_ARAP_RHS_H
+#include "igl_inline.h"
+
+#include <Eigen/Dense>
+#include <Eigen/Sparse>
+#include <igl/ARAPEnergyType.h>
+
+namespace igl
+{
+  // ARAP_RHS build right-hand side constructor of global poisson solve for
+  // various Arap energies
+  // Inputs:
+  //   V  #V by dim list of initial domain positions
+  //   F  #F by 3 list of triangle indices into V
+  //   energy  igl::ARAPEnergyType enum value defining which energy is being
+  //     used. See igl::ARAPEnergyType.h for valid options and explanations.
+  // Outputs:
+  //   K  #V*dim by #(F|V)*dim*dim matrix such that: 
+  //     b = K * reshape(permute(R,[3 1 2]),size(V|F,1)*size(V,2)*size(V,2),1);
+  //   
+  // See also: arap_linear_block
+  void arap_rhs(
+    const Eigen::MatrixXd & V, 
+    const Eigen::MatrixXi & F,
+    const igl::ARAPEnergyType energy,
+    Eigen::SparseMatrix<double>& K);
+}
+#ifdef IGL_HEADER_ONLY
+#include "arap_rhs.cpp"
+#endif
+#endif

+ 62 - 0
include/igl/columnize.cpp

@@ -0,0 +1,62 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "columnize.h"
+#include <cassert>
+
+template <typename DerivedA, typename DerivedB>
+IGL_INLINE void igl::columnize(
+  const Eigen::PlainObjectBase<DerivedA> & A,
+  const int k,
+  const int dim,
+  Eigen::PlainObjectBase<DerivedB> & B)
+{
+  // Eigen matrices must be 2d so dim must be only 1 or 2
+  assert(dim == 1 || dim == 2);
+
+  // block height, width, and number of blocks
+  int m,n;
+  if(dim == 1)
+  {
+    m = A.rows()/k;
+    assert(m*(int)k == (int)A.rows());
+    n = A.cols();
+  }else// dim == 2
+  {
+    m = A.rows();
+    n = A.cols()/k;
+    assert(n*(int)k == (int)A.cols());
+  }
+
+  // resize output
+  B.resize(A.rows()*A.cols(),1);
+
+  for(int b = 0;b<(int)k;b++)
+  {
+    for(int i = 0;i<m;i++)
+    {
+      for(int j = 0;j<n;j++)
+      {
+        if(dim == 1)
+        {
+          B(j*m*k+i*k+b) = A(i+b*m,j);
+        }else
+        {
+          B(j*m*k+i*k+b) = A(i,b*n+j);
+        }
+      }
+    }
+  }
+}
+
+
+#ifndef IGL_HEADER_ONLY
+// Explicit template specialization
+template void igl::columnize<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<double, -1, 1, 0, -1, 1> >(Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > const&, int, int, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, 1, 0, -1, 1> >&);
+template void igl::columnize<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<double, -1, -1, 0, -1, -1> >(Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > const&, int, int, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> >&);
+template void igl::columnize<Eigen::Matrix<float, -1, -1, 0, -1, -1>, Eigen::Matrix<float, -1, -1, 0, -1, -1> >(Eigen::PlainObjectBase<Eigen::Matrix<float, -1, -1, 0, -1, -1> > const&, int, int, Eigen::PlainObjectBase<Eigen::Matrix<float, -1, -1, 0, -1, -1> >&);
+#endif

+ 41 - 0
include/igl/columnize.h

@@ -0,0 +1,41 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_COLUMNIZE_H
+#define IGL_COLUMNIZE_H
+#include "igl_inline.h"
+
+#include <Eigen/Core>
+namespace igl
+{
+  // "Columnize" a stack of block matrices. If A = [A1,A2,A3,...,Ak] with each A*
+  // an m by n block then this produces the column vector whose entries are 
+  // B(j*m*k+i*k+b) = A(i,b*n+j);
+  // or if A = [A1;A2;...;Ak] then
+  // B(j*m*k+i*k+b) = A(i+b*m,j);
+  //
+  // Templates:
+  //   T  should be a eigen matrix primitive type like int or double
+  // Inputs:
+  //   A  m*k by n (dim: 1) or m by n*k (dim: 2) eigen Matrix of type T values
+  //   k  number of blocks
+  //   dim  dimension in which blocks are stacked
+  // Output
+  //   B  m*n*k eigen vector of type T values,
+  //
+  // See also: transpose_blocks
+  template <typename DerivedA, typename DerivedB>
+  IGL_INLINE void columnize(
+    const Eigen::PlainObjectBase<DerivedA> & A,
+    const int k,
+    const int dim,
+    Eigen::PlainObjectBase<DerivedB> & B);
+}
+#ifdef IGL_HEADER_ONLY
+#  include "columnize.cpp"
+#endif
+#endif

+ 76 - 0
include/igl/covariance_scatter_matrix.cpp

@@ -0,0 +1,76 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "covariance_scatter_matrix.h"
+#include "arap_linear_block.h"
+#include "cotmatrix.h"
+#include "diag.h"
+#include "sum.h"
+#include "edges.h"
+#include "verbose.h"
+#include "cat.h"
+
+IGL_INLINE void igl::covariance_scatter_matrix(
+  const Eigen::MatrixXd & V, 
+  const Eigen::MatrixXi & F,
+  const ARAPEnergyType energy,
+  Eigen::SparseMatrix<double>& CSM)
+{
+  using namespace igl;
+  using namespace Eigen;
+  // number of mesh vertices
+  int n = V.rows();
+  assert(n > F.maxCoeff());
+  // dimension of mesh
+  int dim = V.cols();
+  // Number of mesh elements
+  int m = F.rows();
+
+  // number of rotations
+  int nr;
+  switch(energy)
+  {
+    case ARAP_ENERGY_TYPE_SPOKES:
+      nr = n;
+      break;
+    case ARAP_ENERGY_TYPE_SPOKES_AND_RIMS:
+      nr = n;
+      break;
+    case ARAP_ENERGY_TYPE_ELEMENTS:
+      nr = m;
+      break;
+    default:
+      fprintf(
+        stderr,
+        "covariance_scatter_matrix.h: Error: Unsupported arap energy %d\n",
+        energy);
+      return;
+  }
+
+  SparseMatrix<double> KX,KY,KZ;
+  arap_linear_block(V,F,0,energy,KX);
+  arap_linear_block(V,F,1,energy,KY);
+  SparseMatrix<double> Z(n,nr);
+  if(dim == 2)
+  {
+    CSM = cat(1,cat(2,KX,Z),cat(2,Z,KY)).transpose();
+  }else if(dim == 3)
+  {
+    arap_linear_block(V,F,2,energy,KZ);
+    SparseMatrix<double>ZZ(n,nr*2);
+    CSM = 
+      cat(1,cat(1,cat(2,KX,ZZ),cat(2,cat(2,Z,KY),Z)),cat(2,ZZ,KZ)).transpose();
+  }else
+  {
+    fprintf(
+     stderr,
+     "covariance_scatter_matrix.h: Error: Unsupported dimension %d\n",
+     dim);
+    return;
+  }
+
+}

+ 38 - 0
include/igl/covariance_scatter_matrix.h

@@ -0,0 +1,38 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_COVARIANCE_SCATTER_MATRIX_H
+#define IGL_COVARIANCE_SCATTER_MATRIX_H
+
+#include "igl_inline.h"
+#include "ARAPEnergyType.h"
+#include <Eigen/Dense>
+#include <Eigen/Sparse>
+ 
+namespace igl
+{
+  // Construct the covariance scatter matrix for a given arap energy
+  // Inputs:
+  //   V  #V by dim list of initial domain positions
+  //   F  #F by 3 list of triangle indices into V
+  //   energy  ARAPEnergyType enum value defining which energy is being used.
+  //     See ARAPEnergyType.h for valid options and explanations.
+  // Outputs:
+  //   CSM dim*#V by dim*#V sparse matrix containing special laplacians along
+  //     the diagonal so that when multiplied by V gives covariance matrix
+  //     elements, can be used to speed up covariance matrix computation
+  IGL_INLINE void covariance_scatter_matrix(
+    const Eigen::MatrixXd & V, 
+    const Eigen::MatrixXi & F,
+    const ARAPEnergyType energy,
+    Eigen::SparseMatrix<double>& CSM);
+}
+
+#ifdef IGL_HEADER_ONLY
+#include "covariance_scatter_matrix.cpp"
+#endif
+#endif

+ 1 - 0
include/igl/example_fun.cpp

@@ -17,6 +17,7 @@ IGL_INLINE bool igl::example_fun(const Printable & input)
 }
 }
 
 
 #ifndef IGL_HEADER_ONLY
 #ifndef IGL_HEADER_ONLY
+// Explicit template specialization
 template bool igl::example_fun<double>(const double& input);
 template bool igl::example_fun<double>(const double& input);
 template bool igl::example_fun<int>(const int& input);
 template bool igl::example_fun<int>(const int& input);
 #endif
 #endif

+ 46 - 0
include/igl/group_sum_matrix.cpp

@@ -0,0 +1,46 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "group_sum_matrix.h"
+
+template <typename T>
+IGL_INLINE void igl::group_sum_matrix(
+  const Eigen::Matrix<int,Eigen::Dynamic,1> & G,
+  const int k,
+  Eigen::SparseMatrix<T>& A)
+{
+  // number of vertices
+  int n = G.rows();
+  assert(k > G.maxCoeff());
+
+  A.resize(k,n);
+
+  // builds A such that A(i,j) = 1 where i corresponds to group i and j
+  // corresponds to vertex j
+
+  // Loop over vertices
+  for(int j = 0;j<n;j++)
+  {
+    A.insert(G(j),j) = 1;
+  }
+
+  A.makeCompressed();
+}
+
+template <typename T>
+IGL_INLINE void igl::group_sum_matrix(
+  const Eigen::Matrix<int,Eigen::Dynamic,1> & G,
+  Eigen::SparseMatrix<T>& A)
+{
+  return group_sum_matrix(G,G.maxCoeff()+1,A);
+}
+
+#ifndef IGL_HEADER_ONLY
+// Explicit template instanciation
+template void igl::group_sum_matrix<double>(Eigen::Matrix<int, -1, 1, 0, -1, 1> const&, int, Eigen::SparseMatrix<double, 0, int>&);
+template void igl::group_sum_matrix<double>(Eigen::Matrix<int, -1, 1, 0, -1, 1> const&, Eigen::SparseMatrix<double, 0, int>&);
+#endif

+ 45 - 0
include/igl/group_sum_matrix.h

@@ -0,0 +1,45 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_GROUP_SUM_MATRIX_H
+#define IGL_GROUP_SUM_MATRIX_H
+#include "igl_inline.h"
+
+#include <Eigen/Dense>
+#include <Eigen/Sparse>
+
+namespace igl
+{
+  // GROUP_SUM_MATRIX Builds a matrix A such that A*V computes the sum of
+  // vertices in each group specified by G
+  //
+  // group_sum_matrix(G,k,A);
+  // 
+  // Templates:
+  //   T  should be a eigen sparse matrix primitive type like int or double
+  // Inputs:
+  //   G  #V list of group indices (0 to k-1) for each vertex, such that vertex i 
+  //     is assigned to group G(i)
+  //   k  #groups, good choice is max(G)+1
+  // Outputs:
+  //   A  #groups by #V sparse matrix such that A*V = group_sums
+  //
+  template <typename T>
+  IGL_INLINE void group_sum_matrix(
+    const Eigen::Matrix<int,Eigen::Dynamic,1> & G,
+    const int k,
+    Eigen::SparseMatrix<T>& A);
+  // Wrapper with k = max(G)+1
+  template <typename T>
+  IGL_INLINE void group_sum_matrix(
+    const Eigen::Matrix<int,Eigen::Dynamic,1> & G,
+    Eigen::SparseMatrix<T>& A);
+}
+#ifdef IGL_HEADER_ONLY
+#  include "group_sum_matrix.cpp"
+#endif
+#endif

+ 42 - 0
include/igl/intersect.cpp

@@ -0,0 +1,42 @@
+#include "intersect.h"
+template <class M>
+IGL_INLINE void igl::intersect(const M & A, const M & B, M & C)
+{
+  // Stupid O(size(A) * size(B)) to do it
+  M dyn_C(A.size() > B.size() ? A.size() : B.size(),1);
+  // count of intersects
+  int c = 0;
+  // Loop over A
+  for(int i = 0;i<A.size();i++)
+  {
+    // Loop over B
+    for(int j = 0;j<B.size();j++)
+    {
+      if(A(i) == B(j))
+      {
+        dyn_C(c) = A(i);
+        c++;
+      }
+    }
+  }
+
+  // resize output
+  C.resize(c,1);
+  // Loop over intersects
+  for(int i = 0;i<c;i++)
+  {
+    C(i) = dyn_C(i);
+  }
+}
+
+template <class M>
+IGL_INLINE M igl::intersect(const M & A, const M & B)
+{
+  M C;
+  intersect(A,B,C);
+  return C;
+}
+#ifndef IGL_HEADER_ONLY
+// Explicit template specialization
+template Eigen::Matrix<int, -1, 1, 0, -1, 1> igl::intersect<Eigen::Matrix<int, -1, 1, 0, -1, 1> >(Eigen::Matrix<int, -1, 1, 0, -1, 1> const&, Eigen::Matrix<int, -1, 1, 0, -1, 1> const&); 
+#endif

+ 25 - 0
include/igl/intersect.h

@@ -0,0 +1,25 @@
+#ifndef IGL_INTERSECT_H
+#define IGL_INTERSECT_H
+#include "igl_inline.h"
+#include <Eigen/Core>
+namespace igl
+{
+  // Determine the intersect between two sets of coefficients using ==
+  // Templates:
+  //   M  matrix type that implements indexing by global index M(i)
+  // Inputs:
+  //   A  matrix of coefficients
+  //   B  matrix of coefficients
+  // Output:
+  //   C  matrix of elements appearing in both A and B, C is always resized to
+  //   have a single column
+  template <class M>
+  IGL_INLINE void intersect(const M & A, const M & B, M & C);
+  // Last argument as return
+  template <class M>
+  IGL_INLINE M intersect(const M & A, const M & B);
+}
+#ifdef IGL_HEADER_ONLY
+#include "intersect.cpp"
+#endif
+#endif

+ 11 - 0
include/igl/is_planar.cpp

@@ -0,0 +1,11 @@
+#include "is_planar.h"
+IGL_INLINE bool igl::is_planar(const Eigen::MatrixXd & V)
+{
+  if(V.size() == 0) return false;
+  if(V.cols() == 2) return true;
+  for(int i = 0;i<V.rows();i++)
+  {
+    if(V(i,2) != 0) return false;
+  }
+  return true;
+}

+ 30 - 0
include/igl/is_planar.h

@@ -0,0 +1,30 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_IS_PLANAR_H
+#define IGL_IS_PLANAR_H
+#include "igl_inline.h"
+
+#include <Eigen/Dense>
+
+namespace igl
+{
+  // Determin if a set of points lies on the XY plane
+  //
+  // Inputs:
+  //   V  #V by dim list of vertex positions
+  // Return true if a mesh has constant value of 0 in z coordinate
+  //
+  // Known bugs: Doesn't determine if vertex is flat if it doesn't lie on the
+  // XY plane.
+  IGL_INLINE bool is_planar(const Eigen::MatrixXd & V);
+}
+
+#ifdef IGL_HEADER_ONLY
+#  include "is_planar.cpp"
+#endif
+#endif

+ 143 - 0
include/igl/lbs_matrix.cpp

@@ -0,0 +1,143 @@
+#include "lbs_matrix.h"
+
+IGL_INLINE void igl::lbs_matrix(
+  const Eigen::MatrixXd & V, 
+  const Eigen::MatrixXd & W,
+  Eigen::SparseMatrix<double>& M)
+{
+  // number of mesh vertices
+  int n = V.rows();
+  assert(n == W.rows());
+  // dimension of mesh
+  int dim = V.cols();
+  // number of handles
+  int m = W.cols();
+
+  M.resize(n*dim,m*dim*(dim+1));
+
+  // loop over coordinates of mesh vertices
+  for(int x = 0; x < dim; x++)
+  {
+    // loop over mesh vertices
+    for(int j = 0; j < n; j++)
+    {
+      // loop over handles
+      for(int i = 0; i < m; i++)
+      {
+        // loop over cols of affine transformations
+        for(int c = 0; c < (dim+1); c++)
+        {
+          double value = W(j,i);
+          if(c<dim)
+          {
+            value *= V(j,c);
+          }
+          M.insert(x*n + j,x*m + c*m*dim + i) = value;
+        }
+      }
+    }
+  }
+
+  M.makeCompressed();
+}
+
+IGL_INLINE void igl::lbs_matrix(
+  const Eigen::MatrixXd & V, 
+  const Eigen::MatrixXd & W,
+  Eigen::MatrixXd & M)
+{
+  // number of mesh vertices
+  int n = V.rows();
+  assert(n == W.rows());
+  // dimension of mesh
+  int dim = V.cols();
+  // number of handles
+  int m = W.cols();
+  M.resize(n*dim,m*dim*(dim+1));
+
+  // loop over coordinates of mesh vertices
+  for(int x = 0; x < dim; x++)
+  {
+    // loop over mesh vertices
+    for(int j = 0; j < n; j++)
+    {
+      // loop over handles
+      for(int i = 0; i < m; i++)
+      {
+        // loop over cols of affine transformations
+        for(int c = 0; c < (dim+1); c++)
+        {
+          double value = W(j,i);
+          if(c<dim)
+          {
+            value *= V(j,c);
+          }
+          M(x*n + j,x*m + c*m*dim + i) = value;
+        }
+      }
+    }
+  }
+}
+
+IGL_INLINE void igl::lbs_matrix(
+  const Eigen::MatrixXd & V, 
+  const Eigen::MatrixXd & W,
+  const Eigen::MatrixXi & WI,
+  Eigen::SparseMatrix<double>& M)
+{
+  // number of mesh vertices
+  int n = V.rows();
+  assert(n == W.rows());
+  assert(n == WI.rows());
+  // dimension of mesh
+  int dim = V.cols();
+  // number of handles
+  int m = WI.maxCoeff()+1;
+  // max number of influencing handles
+  int k = W.cols();
+  assert(k == WI.cols());
+
+  M.resize(n*dim,m*dim*(dim+1));
+
+  // loop over coordinates of mesh vertices
+  for(int x = 0; x < dim; x++)
+  {
+    // loop over mesh vertices
+    for(int j = 0; j < n; j++)
+    {
+      // loop over handles
+      for(int i = 0; i < k; i++)
+      {
+        // loop over cols of affine transformations
+        for(int c = 0; c < (dim+1); c++)
+        {
+          double value = W(j,i);
+          if(c<dim)
+          {
+            value *= V(j,c);
+          }
+          if(value != 0)
+          {
+            M.insert(x*n + j,x*m + c*m*dim + WI(j,i)) = value;
+          }
+        }
+      }
+    }
+  }
+
+  M.makeCompressed();
+}
+
+
+IGL_INLINE void igl::lbs_matrix(
+  const Eigen::MatrixXd & V, 
+  const Eigen::MatrixXd & W,
+  const Eigen::MatrixXi & WI,
+  Eigen::MatrixXd & M)
+{
+  // Cheapskate wrapper
+  using namespace Eigen;
+  SparseMatrix<double> sM;
+  lbs_matrix(V,W,WI,sM);
+  M = MatrixXd(sM);
+}

+ 76 - 0
include/igl/lbs_matrix.h

@@ -0,0 +1,76 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_LBS_MATRIX_H
+#define IGL_LBS_MATRIX_H
+#include "igl_inline.h"
+
+#include <Eigen/Dense>
+#include <Eigen/Sparse>
+
+namespace igl
+{
+  // LBS_MATRIX  construct a matrix that when multiplied against a column of
+  // affine transformation entries computes new coordinates of the vertices
+  //
+  // I'm not sure it makes since that the result is stored as a sparse matrix.
+  // The number of non-zeros per row *is* dependent on the number of mesh
+  // vertices and handles.
+  //
+  // Inputs:
+  //   V  #V by dim list of vertex rest positions
+  //   W  #V by #handles list of correspondence weights
+  // Output:
+  //   M  #V * dim by #handles * dim * (dim+1) matrix such that
+  //     new_V(:) = LBS(V,W,A) = reshape(M * A,size(V)), where A is a column
+  //     vectors formed by the entries in each handle's dim by dim+1 
+  //     transformation matrix. Specifcally, A =
+  //       reshape(permute(Astack,[3 1 2]),n*dim*(dim+1),1)
+  //     or A = [Lxx;Lyx;Lxy;Lyy;tx;ty], and likewise for other dim
+  //     if Astack(:,:,i) is the dim by (dim+1) transformation at handle i
+  //
+  IGL_INLINE void lbs_matrix(
+    const Eigen::MatrixXd & V, 
+    const Eigen::MatrixXd & W,
+    Eigen::SparseMatrix<double>& M);
+  IGL_INLINE void lbs_matrix(
+    const Eigen::MatrixXd & V, 
+    const Eigen::MatrixXd & W,
+    Eigen::MatrixXd & M);
+  // Same as LBS_MATRIX above but instead of giving W as a full matrix of weights
+  // (each vertex has #handles weights), a constant number of weights are given
+  // for each vertex.
+  // 
+  // Inputs:
+  //   V  #V by dim list of vertex rest positions
+  //   W  #V by k  list of k correspondence weights per vertex
+  //   WI  #V by k  list of k correspondence weight indices per vertex. Such that
+  //     W(j,WI(i)) gives the ith most significant correspondence weight on vertex j
+  // Output:
+  //   M  #V * dim by #handles * dim * (dim+1) matrix such that
+  //     new_V(:) = LBS(V,W,A) = reshape(M * A,size(V)), where A is a column
+  //     vectors formed by the entries in each handle's dim by dim+1 
+  //     transformation matrix. Specifcally, A =
+  //       reshape(permute(Astack,[3 1 2]),n*dim*(dim+1),1)
+  //     or A = [Lxx;Lyx;Lxy;Lyy;tx;ty], and likewise for other dim
+  //     if Astack(:,:,i) is the dim by (dim+1) transformation at handle i
+  //
+  IGL_INLINE void lbs_matrix(
+    const Eigen::MatrixXd & V, 
+    const Eigen::MatrixXd & W,
+    const Eigen::MatrixXi & WI,
+    Eigen::SparseMatrix<double>& M);
+  IGL_INLINE void lbs_matrix(
+    const Eigen::MatrixXd & V, 
+    const Eigen::MatrixXd & W,
+    const Eigen::MatrixXi & WI,
+    Eigen::MatrixXd & M);
+}
+#ifdef IGL_HEADER_ONLY
+#include "lbs_matrix.cpp"
+#endif
+#endif

+ 52 - 0
include/igl/partition.cpp

@@ -0,0 +1,52 @@
+#include "partition.h"
+#include "mat_min.h"
+
+IGL_INLINE void igl::partition(
+  const Eigen::MatrixXd & W,
+  const int k,
+  Eigen::Matrix<int,Eigen::Dynamic,1> & G,
+  Eigen::Matrix<int,Eigen::Dynamic,1> & S,
+  Eigen::Matrix<double,Eigen::Dynamic,1> & D)
+{
+  // number of mesh vertices
+  int n = W.rows();
+
+  // Resize output
+  G.resize(n);
+  S.resize(k);
+
+  // "Randomly" choose first seed
+  // Pick a vertex farthest from 0
+  int s;
+  (W.array().square().matrix()).rowwise().sum().maxCoeff(&s);
+
+  S(0) = s;
+  // Initialize distance to closest seed
+  D = ((W.rowwise() - W.row(s)).array().square()).matrix().rowwise().sum();
+  G.setZero();
+
+  // greedily choose the remaining k-1 seeds
+  for(int i = 1;i<k;i++)
+  {
+    // Find maximum in D
+    D.maxCoeff(&s);
+    S(i) = s;
+    // distance to this seed
+    Eigen::Matrix<double,Eigen::Dynamic,1> Ds =
+      ((W.rowwise() - W.row(s)).array().square()).matrix().rowwise().sum();
+    // Concatenation of D and Ds: DDs = [D Ds];
+    Eigen::Matrix<double,Eigen::Dynamic,Eigen::Dynamic> DDs;
+    // Make space for two columns
+    DDs.resize(D.rows(),2);
+    DDs.col(0) = D;
+    DDs.col(1) = Ds;
+    // Update D
+    // get minimum of old D and distance to this seed, C == 1 if new distance
+    // was smaller
+    Eigen::Matrix<int,Eigen::Dynamic,1> C;
+    igl::mat_min(DDs,2,D,C);
+    G = (C.array() ==0).select(G,i);
+  }
+
+
+}

+ 32 - 0
include/igl/partition.h

@@ -0,0 +1,32 @@
+#ifndef IGL_PARTITION_H
+#define IGL_PARTITION_H
+#include "igl_inline.h"
+#include <Eigen/Dense>
+
+namespace igl
+{
+  // PARTITION partition vertices into groups based on each
+  // vertex's vector: vertices with similar coordinates (close in 
+  // space) will be put in the same group.
+  //
+  // Inputs:
+  //   W  #W by dim coordinate matrix
+  //   k  desired number of groups default is dim
+  // Output:
+  //   G  #W list of group indices (1 to k) for each vertex, such that vertex i 
+  //     is assigned to group G(i)
+  //   S  k  list of seed vertices
+  //   D  #W list of squared distances for each vertex to it's corresponding
+  //     closest seed
+  IGL_INLINE void partition(
+    const Eigen::MatrixXd & W,
+    const int k,
+    Eigen::Matrix<int,Eigen::Dynamic,1> & G,
+    Eigen::Matrix<int,Eigen::Dynamic,1> & S,
+    Eigen::Matrix<double,Eigen::Dynamic,1> & D);
+}
+
+#ifdef IGL_HEADER_ONLY
+#include "partition.cpp"
+#endif
+#endif

+ 6 - 6
include/igl/sample_mesh.cpp → include/igl/random_points_on_mesh.cpp

@@ -1,4 +1,4 @@
-#include "sample_mesh.h"
+#include "random_points_on_mesh.h"
 #include "doublearea.h"
 #include "doublearea.h"
 #include "cumsum.h"
 #include "cumsum.h"
 #include "histc.h"
 #include "histc.h"
@@ -7,7 +7,7 @@
 #include <cassert>
 #include <cassert>
 
 
 template <typename DerivedV, typename DerivedF, typename DerivedB, typename DerivedFI>
 template <typename DerivedV, typename DerivedF, typename DerivedB, typename DerivedFI>
-IGL_INLINE void igl::sample_mesh(
+IGL_INLINE void igl::random_points_on_mesh(
   const int n,
   const int n,
   const Eigen::PlainObjectBase<DerivedV > & V,
   const Eigen::PlainObjectBase<DerivedV > & V,
   const Eigen::PlainObjectBase<DerivedF > & F,
   const Eigen::PlainObjectBase<DerivedF > & F,
@@ -41,7 +41,7 @@ IGL_INLINE void igl::sample_mesh(
 }
 }
 
 
 template <typename DerivedV, typename DerivedF, typename ScalarB, typename DerivedFI>
 template <typename DerivedV, typename DerivedF, typename ScalarB, typename DerivedFI>
-IGL_INLINE void igl::sample_mesh(
+IGL_INLINE void igl::random_points_on_mesh(
   const int n,
   const int n,
   const Eigen::PlainObjectBase<DerivedV > & V,
   const Eigen::PlainObjectBase<DerivedV > & V,
   const Eigen::PlainObjectBase<DerivedF > & F,
   const Eigen::PlainObjectBase<DerivedF > & F,
@@ -51,7 +51,7 @@ IGL_INLINE void igl::sample_mesh(
   using namespace Eigen;
   using namespace Eigen;
   using namespace std;
   using namespace std;
   Matrix<ScalarB,Dynamic,3> BC;
   Matrix<ScalarB,Dynamic,3> BC;
-  sample_mesh(n,V,F,BC,FI);
+  random_points_on_mesh(n,V,F,BC,FI);
   vector<Triplet<ScalarB> > BIJV;
   vector<Triplet<ScalarB> > BIJV;
   BIJV.reserve(n*3);
   BIJV.reserve(n*3);
   for(int s = 0;s<n;s++)
   for(int s = 0;s<n;s++)
@@ -71,6 +71,6 @@ IGL_INLINE void igl::sample_mesh(
 
 
 #ifndef IGL_HEADER_ONLY
 #ifndef IGL_HEADER_ONLY
 // Explicit template specialization
 // Explicit template specialization
-template void igl::sample_mesh<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, double, Eigen::Matrix<int, -1, -1, 0, -1, -1> >(int, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > const&, Eigen::PlainObjectBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> > const&, Eigen::SparseMatrix<double, 0, int>&, Eigen::PlainObjectBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> >&);
-template void igl::sample_mesh<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, double, Eigen::Matrix<int, -1, 1, 0, -1, 1> >(int, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > const&, Eigen::PlainObjectBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> > const&, Eigen::SparseMatrix<double, 0, int>&, Eigen::PlainObjectBase<Eigen::Matrix<int, -1, 1, 0, -1, 1> >&);
+template void igl::random_points_on_mesh<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, double, Eigen::Matrix<int, -1, -1, 0, -1, -1> >(int, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > const&, Eigen::PlainObjectBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> > const&, Eigen::SparseMatrix<double, 0, int>&, Eigen::PlainObjectBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> >&);
+template void igl::random_points_on_mesh<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, double, Eigen::Matrix<int, -1, 1, 0, -1, 1> >(int, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > const&, Eigen::PlainObjectBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> > const&, Eigen::SparseMatrix<double, 0, int>&, Eigen::PlainObjectBase<Eigen::Matrix<int, -1, 1, 0, -1, 1> >&);
 #endif
 #endif

+ 6 - 6
include/igl/sample_mesh.h → include/igl/random_points_on_mesh.h

@@ -5,8 +5,8 @@
 // This Source Code Form is subject to the terms of the Mozilla Public License 
 // This Source Code Form is subject to the terms of the Mozilla Public License 
 // v. 2.0. If a copy of the MPL was not distributed with this file, You can 
 // v. 2.0. If a copy of the MPL was not distributed with this file, You can 
 // obtain one at http://mozilla.org/MPL/2.0/.
 // obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef IGL_SAMPLE_MESH_H
-#define IGL_SAMPLE_MESH_H
+#ifndef IGL_RANDOM_POINTS_ON_MESH_H
+#define IGL_RANDOM_POINTS_ON_MESH_H
 
 
 #include "igl_inline.h"
 #include "igl_inline.h"
 #include <Eigen/Core>
 #include <Eigen/Core>
@@ -14,7 +14,7 @@
 
 
 namespace igl
 namespace igl
 {
 {
-  // SAMPLE_MESH Randomly sample a mesh (V,F) n times.
+  // RANDOM_POINTS_ON_MESH Randomly sample a mesh (V,F) n times.
   //
   //
   // Inputs:
   // Inputs:
   //   n  number of samples
   //   n  number of samples
@@ -26,14 +26,14 @@ namespace igl
   //   FI  n list of indices into F 
   //   FI  n list of indices into F 
   //
   //
   template <typename DerivedV, typename DerivedF, typename DerivedB, typename DerivedFI>
   template <typename DerivedV, typename DerivedF, typename DerivedB, typename DerivedFI>
-  IGL_INLINE void sample_mesh(
+  IGL_INLINE void random_points_on_mesh(
     const int n,
     const int n,
     const Eigen::PlainObjectBase<DerivedV > & V,
     const Eigen::PlainObjectBase<DerivedV > & V,
     const Eigen::PlainObjectBase<DerivedF > & F,
     const Eigen::PlainObjectBase<DerivedF > & F,
     Eigen::PlainObjectBase<DerivedB > & B,
     Eigen::PlainObjectBase<DerivedB > & B,
     Eigen::PlainObjectBase<DerivedFI > & FI);
     Eigen::PlainObjectBase<DerivedFI > & FI);
   template <typename DerivedV, typename DerivedF, typename ScalarB, typename DerivedFI>
   template <typename DerivedV, typename DerivedF, typename ScalarB, typename DerivedFI>
-  IGL_INLINE void sample_mesh(
+  IGL_INLINE void random_points_on_mesh(
     const int n,
     const int n,
     const Eigen::PlainObjectBase<DerivedV > & V,
     const Eigen::PlainObjectBase<DerivedV > & V,
     const Eigen::PlainObjectBase<DerivedF > & F,
     const Eigen::PlainObjectBase<DerivedF > & F,
@@ -42,7 +42,7 @@ namespace igl
 }
 }
 
 
 #ifdef IGL_HEADER_ONLY
 #ifdef IGL_HEADER_ONLY
-#  include "sample_mesh.cpp"
+#  include "random_points_on_mesh.cpp"
 #endif
 #endif
 
 
 #endif
 #endif

+ 43 - 0
include/igl/svd3x3/Makefile

@@ -0,0 +1,43 @@
+include ../../../Makefile.conf
+
+.PHONY: all
+all: libiglsvd3x3
+debug: libiglsvd3x3
+
+include ../../../Makefile.conf
+all: OPTFLAGS += -O3 -DNDEBUG $(OPENMP)
+debug: OPTFLAGS += -g -Wall
+CFLAGS += $(OPTFLAGS)
+
+.PHONY: libiglsvd3x3
+libiglsvd3x3: obj ../../../lib/libiglsvd3x3.a
+
+CPP_FILES=$(wildcard *.cpp)
+OBJ_FILES=$(addprefix obj/,$(notdir $(CPP_FILES:.cpp=.o)))
+
+# include igl headers
+INC+=-I../../../include/
+
+# EXPECTS THAT CFLAGS IS ALREADY SET APPROPRIATELY 
+
+
+# SVD 
+SINGULAR_VALUE_DECOMPOSITION_INC=\
+	-I../../../external/Singular_Value_Decomposition/
+# Eigen dependency
+EIGEN3_INC=-I$(DEFAULT_PREFIX)/include/eigen3 -I$(DEFAULT_PREFIX)/include/eigen3/unsupported
+INC+=$(EIGEN3_INC) $(SINGULAR_VALUE_DECOMPOSITION_INC)
+
+obj: 
+	mkdir -p obj
+
+../../../lib/libiglsvd3x3.a: $(OBJ_FILES)
+	rm -f $@
+	ar cqs $@ $(OBJ_FILES)
+
+obj/%.o: %.cpp %.h
+	$(GG) $(AFLAGS) $(CFLAGS) -c -o $@ $< $(INC)
+
+clean:
+	rm -f obj/*.o
+	rm -f ../../../lib/libiglsvd3x3.a

+ 879 - 0
include/igl/svd3x3/arap_dof.cpp

@@ -0,0 +1,879 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "arap_dof.h"
+
+#include <igl/cotmatrix.h>
+#include <igl/massmatrix.h>
+#include <igl/speye.h>
+#include <igl/repdiag.h>
+#include <igl/repmat.h>
+#include <igl/slice.h>
+#include <igl/colon.h>
+#include <igl/full.h>
+#include <igl/is_sparse.h>
+#include <igl/mode.h>
+#include <igl/is_symmetric.h>
+#include <igl/group_sum_matrix.h>
+#include <igl/arap_rhs.h>
+#include <igl/covariance_scatter_matrix.h>
+#include <igl/svd3x3/fit_rotations.h>
+
+#include <igl/verbose.h>
+#include <igl/print_ijv.h>
+
+#include <igl/get_seconds_hires.h>
+//#include "MKLEigenInterface.h"
+#include <igl/min_quad_dense.h>
+#include <igl/get_seconds.h>
+#include <igl/columnize.h>
+
+// defined if no early exit is supported, i.e., always take a fixed number of iterations
+#define IGL_ARAP_DOF_FIXED_ITERATIONS_COUNT
+
+// A carefull derivation of this implementation is given in the corresponding
+// matlab function arap_dof.m
+template <typename LbsMatrixType, typename SSCALAR>
+IGL_INLINE bool igl::arap_dof_precomputation(
+  const Eigen::MatrixXd & V, 
+  const Eigen::MatrixXi & F,
+  const LbsMatrixType & M,
+  const Eigen::Matrix<int,Eigen::Dynamic,1> & G,
+  ArapDOFData<LbsMatrixType, SSCALAR> & data)
+{
+  using namespace Eigen;
+  typedef Matrix<SSCALAR, Dynamic, Dynamic> MatrixXS;
+  // number of mesh (domain) vertices
+  int n = V.rows();
+  // cache problem size
+  data.n = n;
+  // dimension of mesh
+  data.dim = V.cols();
+  assert(data.dim == M.rows()/n);
+  assert(data.dim*n == M.rows());
+  if(data.dim == 3)
+  {
+    // Check if z-coordinate is all zeros
+    if(V.col(2).minCoeff() == 0 && V.col(2).maxCoeff() == 0)
+    {
+      data.effective_dim = 2;
+    }
+  }else
+  {
+    data.effective_dim = data.dim;
+  }
+  // Number of handles
+  data.m = M.cols()/data.dim/(data.dim+1);
+  assert(data.m*data.dim*(data.dim+1) == M.cols());
+  //assert(m == C.rows());
+
+  //printf("n=%d; dim=%d; m=%d;\n",n,data.dim,data.m);
+
+  // Build cotangent laplacian
+  SparseMatrix<double> Lcot;
+  //printf("cotmatrix()\n");
+  cotmatrix(V,F,Lcot);
+  // Discrete laplacian (should be minus matlab version)
+  SparseMatrix<double> Lapl = -2.0*Lcot;
+#ifdef EXTREME_VERBOSE
+  cout<<"LaplIJV=["<<endl;print_ijv(Lapl,1);cout<<endl<<"];"<<
+    endl<<"Lapl=sparse(LaplIJV(:,1),LaplIJV(:,2),LaplIJV(:,3),"<<
+    Lapl.rows()<<","<<Lapl.cols()<<");"<<endl;
+#endif
+
+  // Get group sum scatter matrix, when applied sums all entries of the same
+  // group according to G
+  SparseMatrix<double> G_sum;
+  if(G.size() == 0)
+  {
+    speye(n,G_sum);
+  }else
+  {
+    // groups are defined per vertex, convert to per face using mode
+    Eigen::Matrix<int,Eigen::Dynamic,1> GG;
+    if(data.energy == ARAP_ENERGY_TYPE_ELEMENTS)
+    {
+      MatrixXi GF(F.rows(),F.cols());
+      for(int j = 0;j<F.cols();j++)
+      {
+        Matrix<int,Eigen::Dynamic,1> GFj;
+        slice(G,F.col(j),GFj);
+        GF.col(j) = GFj;
+      }
+      mode<int>(GF,2,GG);
+    }else
+    {
+      GG=G;
+    }
+
+    //printf("group_sum_matrix()\n");
+    group_sum_matrix(GG,G_sum);
+  }
+#ifdef EXTREME_VERBOSE
+  cout<<"G_sumIJV=["<<endl;print_ijv(G_sum,1);cout<<endl<<"];"<<
+    endl<<"G_sum=sparse(G_sumIJV(:,1),G_sumIJV(:,2),G_sumIJV(:,3),"<<
+    G_sum.rows()<<","<<G_sum.cols()<<");"<<endl;
+#endif
+
+  // Get covariance scatter matrix, when applied collects the covariance matrices
+  // used to fit rotations to during optimization
+  SparseMatrix<double> CSM;
+  //printf("covariance_scatter_matrix()\n");
+  covariance_scatter_matrix(V,F,data.energy,CSM);
+#ifdef EXTREME_VERBOSE
+  cout<<"CSMIJV=["<<endl;print_ijv(CSM,1);cout<<endl<<"];"<<
+    endl<<"CSM=sparse(CSMIJV(:,1),CSMIJV(:,2),CSMIJV(:,3),"<<
+    CSM.rows()<<","<<CSM.cols()<<");"<<endl;
+#endif
+  
+
+  // Build the covariance matrix "constructor". This is a set of *scatter*
+  // matrices that when multiplied on the right by column of the transformation
+  // matrix entries (the degrees of freedom) L, we get a stack of dim by 1
+  // covariance matrix column, with a column in the stack for each rotation
+  // *group*. The output is a list of matrices because we construct each column
+  // in the stack of covariance matrices with an independent matrix-vector
+  // multiplication.
+  //
+  // We want to build S which is a stack of dim by dim covariance matrices.
+  // Thus S is dim*g by dim, where dim is the number of dimensions and g is the
+  // number of groups. We can precompute dim matrices CSM_M such that column i
+  // in S is computed as S(:,i) = CSM_M{i} * L, where L is a column of the
+  // skinning transformation matrix values. To be clear, the covariance matrix
+  // for group k is then given as the dim by dim matrix pulled from the stack:
+  // S((k-1)*dim + 1:dim,:)
+
+  // Apply group sum to each dimension's block of covariance scatter matrix
+  SparseMatrix<double> G_sum_dim;
+  repdiag(G_sum,data.dim,G_sum_dim);
+  CSM = G_sum_dim * CSM;
+#ifdef EXTREME_VERBOSE
+  cout<<"CSMIJV=["<<endl;print_ijv(CSM,1);cout<<endl<<"];"<<
+    endl<<"CSM=sparse(CSMIJV(:,1),CSMIJV(:,2),CSMIJV(:,3),"<<
+    CSM.rows()<<","<<CSM.cols()<<");"<<endl;
+#endif
+
+  //printf("CSM_M()\n");
+  // Precompute CSM times M for each dimension
+  data.CSM_M.resize(data.dim);
+#ifdef EXTREME_VERBOSE
+  cout<<"data.CSM_M = cell("<<data.dim<<",1);"<<endl;
+#endif
+  // span of integers from 0 to n-1
+  Eigen::Matrix<int,Eigen::Dynamic,1> span_n(n);
+  for(int i = 0;i<n;i++)
+  {
+    span_n(i) = i;
+  }
+
+  // span of integers from 0 to M.cols()-1
+  Eigen::Matrix<int,Eigen::Dynamic,1> span_mlbs_cols(M.cols());
+  for(int i = 0;i<M.cols();i++)
+  {
+    span_mlbs_cols(i) = i;
+  }
+
+  // number of groups
+  int k = CSM.rows()/data.dim;
+  for(int i = 0;i<data.dim;i++)
+  {
+    //printf("CSM_M(): Mi\n");
+    LbsMatrixType M_i;
+    //printf("CSM_M(): slice\n");
+    slice(M,(span_n.array()+i*n).matrix(),span_mlbs_cols,M_i);
+    LbsMatrixType M_i_dim;
+    data.CSM_M[i].resize(k*data.dim,data.m*data.dim*(data.dim+1));
+    assert(data.CSM_M[i].cols() == M.cols());
+    for(int j = 0;j<data.dim;j++)
+    {
+      SparseMatrix<double> CSMj;
+      //printf("CSM_M(): slice\n");
+      slice(
+        CSM,
+        colon<int>(j*k,(j+1)*k-1),
+        colon<int>(j*n,(j+1)*n-1),
+        CSMj);
+      assert(CSMj.rows() == k);
+      assert(CSMj.cols() == n);
+      LbsMatrixType CSMjM_i = CSMj * M_i;
+      if(is_sparse(CSMjM_i))
+      {
+        // Convert to full
+        MatrixXd CSMjM_ifull;
+        //printf("CSM_M(): full\n");
+        full(CSMjM_i,CSMjM_ifull);
+//        printf("CSM_M[%d]: %d %d\n",i,data.CSM_M[i].rows(),data.CSM_M[i].cols());
+//        printf("CSM_M[%d].block(%d*%d=%d,0,%d,%d): %d %d\n",i,j,k,CSMjM_i.rows(),CSMjM_i.cols(),
+//            data.CSM_M[i].block(j*k,0,CSMjM_i.rows(),CSMjM_i.cols()).rows(),
+//            data.CSM_M[i].block(j*k,0,CSMjM_i.rows(),CSMjM_i.cols()).cols());
+//        printf("CSM_MjMi: %d %d\n",i,CSMjM_i.rows(),CSMjM_i.cols());
+//        printf("CSM_MjM_ifull: %d %d\n",i,CSMjM_ifull.rows(),CSMjM_ifull.cols());
+        data.CSM_M[i].block(j*k,0,CSMjM_i.rows(),CSMjM_i.cols()) = CSMjM_ifull;
+      }else
+      {
+        data.CSM_M[i].block(j*k,0,CSMjM_i.rows(),CSMjM_i.cols()) = CSMjM_i;
+      }
+    }
+#ifdef EXTREME_VERBOSE
+    cout<<"CSM_Mi=["<<endl<<data.CSM_M[i]<<endl<<"];"<<endl;
+#endif
+  }
+
+  // precompute arap_rhs matrix
+  //printf("arap_rhs()\n");
+  SparseMatrix<double> K;
+  arap_rhs(V,F,data.energy,K);
+//#ifdef EXTREME_VERBOSE
+//  cout<<"KIJV=["<<endl;print_ijv(K,1);cout<<endl<<"];"<<
+//    endl<<"K=sparse(KIJV(:,1),KIJV(:,2),KIJV(:,3),"<<
+//    K.rows()<<","<<K.cols()<<");"<<endl;
+//#endif
+  // Precompute left muliplication by M and right multiplication by G_sum
+  SparseMatrix<double> G_sumT = G_sum.transpose();
+  SparseMatrix<double> G_sumT_dim_dim;
+  repdiag(G_sumT,data.dim*data.dim,G_sumT_dim_dim);
+  LbsMatrixType MT = M.transpose();
+  // If this is a bottle neck then consider reordering matrix multiplication
+  data.M_KG = -4.0 * (MT * (K * G_sumT_dim_dim));
+//#ifdef EXTREME_VERBOSE
+//  cout<<"data.M_KGIJV=["<<endl;print_ijv(data.M_KG,1);cout<<endl<<"];"<<
+//    endl<<"data.M_KG=sparse(data.M_KGIJV(:,1),data.M_KGIJV(:,2),data.M_KGIJV(:,3),"<<
+//    data.M_KG.rows()<<","<<data.M_KG.cols()<<");"<<endl;
+//#endif
+
+  // Precompute system matrix
+  //printf("A()\n");
+  SparseMatrix<double> A;
+  repdiag(Lapl,data.dim,A);
+  data.Q = MT * (A * M);
+//#ifdef EXTREME_VERBOSE
+//  cout<<"QIJV=["<<endl;print_ijv(data.Q,1);cout<<endl<<"];"<<
+//    endl<<"Q=sparse(QIJV(:,1),QIJV(:,2),QIJV(:,3),"<<
+//    data.Q.rows()<<","<<data.Q.cols()<<");"<<endl;
+//#endif
+
+  // Always do dynamics precomputation so we can hot-switch
+  //if(data.with_dynamics)
+  //{
+    // Build cotangent laplacian
+    SparseMatrix<double> Mass;
+    //printf("massmatrix()\n");
+    massmatrix(V,F,(F.cols()>3?MASSMATRIX_BARYCENTRIC:MASSMATRIX_VORONOI),Mass);
+    //cout<<"MIJV=["<<endl;print_ijv(Mass,1);cout<<endl<<"];"<<
+    //  endl<<"M=sparse(MIJV(:,1),MIJV(:,2),MIJV(:,3),"<<
+    //  Mass.rows()<<","<<Mass.cols()<<");"<<endl;
+    //speye(data.n,Mass);
+    SparseMatrix<double> Mass_rep;
+    repdiag(Mass,data.dim,Mass_rep);
+
+    // Multiply either side by weights matrix (should be dense)
+    data.Mass_tilde = MT * Mass_rep * M;
+    MatrixXd ones(data.dim*data.n,data.dim);
+    for(int i = 0;i<data.n;i++)
+    {
+      for(int d = 0;d<data.dim;d++)
+      {
+        ones(i+d*data.n,d) = 1;
+      }
+    }
+    data.fgrav = MT * (Mass_rep * ones);
+    data.fext = MatrixXS::Zero(MT.rows(),1);
+    //data.fgrav = MT * (ones);
+  //}
+
+
+  // This may/should be superfluous
+  //printf("is_symmetric()\n");
+  if(!is_symmetric(data.Q))
+  {
+    //printf("Fixing symmetry...\n");
+    // "Fix" symmetry
+    LbsMatrixType QT = data.Q.transpose();
+    LbsMatrixType Q_copy = data.Q;
+    data.Q = 0.5*(Q_copy+QT);
+    // Check that ^^^ this really worked. It doesn't always
+    //assert(is_symmetric(*Q));
+  }
+
+  //printf("arap_dof_precomputation() succeeded... so far...\n");
+  verbose("Number of handles: %i\n", data.m);
+  return true;
+}
+
+/////////////////////////////////////////////////////////////////////////
+//
+// STATIC FUNCTIONS (These should be removed or properly defined)
+//
+/////////////////////////////////////////////////////////////////////////
+namespace igl
+{
+  // returns maximal difference of 'blok' from scalar times 3x3 identity:
+  template <typename SSCALAR>
+  inline static SSCALAR maxBlokErr(const Eigen::Matrix3f &blok)
+  {
+    SSCALAR mD;
+    SSCALAR value = blok(0,0);
+    SSCALAR diff1 = fabs(blok(1,1) - value);
+    SSCALAR diff2 = fabs(blok(2,2) - value);
+    if (diff1 > diff2) mD = diff1;
+    else mD = diff2;
+    
+    for (int v=0; v<3; v++)
+    {
+      for (int w=0; w<3; w++)
+      {
+        if (v == w)
+        {
+          continue;
+        }
+        if (mD < fabs(blok(v, w)))
+        {
+          mD = fabs(blok(v, w));
+        }
+      }
+    }
+    
+    return mD;
+  }
+  
+  // converts CSM_M_SSCALAR[0], CSM_M_SSCALAR[1], CSM_M_SSCALAR[2] into one "condensed" matrix CSM while checking we're not 
+  // loosing any information by this process; specifically, returns maximal difference from scaled 3x3 identity blocks,
+  // which should be pretty small number
+  template <typename MatrixXS>
+  static typename MatrixXS::Scalar condense_CSM(const std::vector<MatrixXS> &CSM_M_SSCALAR, int numBones, int dim, MatrixXS &CSM)
+  {
+    const int numRows = CSM_M_SSCALAR[0].rows();
+    assert(CSM_M_SSCALAR[0].cols() == dim*(dim+1)*numBones);
+    assert(CSM_M_SSCALAR[1].cols() == dim*(dim+1)*numBones);
+    assert(CSM_M_SSCALAR[2].cols() == dim*(dim+1)*numBones);
+    assert(CSM_M_SSCALAR[1].rows() == numRows);
+    assert(CSM_M_SSCALAR[2].rows() == numRows);
+  
+    const int numCols = (dim + 1)*numBones;
+    CSM.resize(numRows, numCols);
+  
+    typedef typename MatrixXS::Scalar SSCALAR;
+    SSCALAR maxDiff = 0.0f;
+  
+    for (int r=0; r<numRows; r++)
+    {
+      for (int coord=0; coord<dim+1; coord++)
+      {
+        for (int b=0; b<numBones; b++)
+        {
+          // this is just a test if we really have a multiple of 3x3 identity
+          Eigen::Matrix3f blok;
+          for (int v=0; v<3; v++)
+          {
+            for (int w=0; w<3; w++)
+            {
+              blok(v,w) = CSM_M_SSCALAR[v](r, coord*(numBones*dim) + b + w*numBones);
+            }          
+          }
+  
+          //SSCALAR value[3];
+          //for (int v=0; v<3; v++)
+          //  CSM_M_SSCALAR[v](r, coord*(numBones*dim) + b + v*numBones);
+  
+          SSCALAR mD = maxBlokErr<SSCALAR>(blok);
+          if (mD > maxDiff) maxDiff = mD;
+  
+          // use the first value:
+          CSM(r, coord*numBones + b) = blok(0,0);
+        }
+      }
+    }
+  
+    return maxDiff;
+  }
+  
+  // splits x_0, ... , x_dim coordinates in column vector 'L' into a numBones*(dimp1) x dim matrix 'Lsep';
+  // assumes 'Lsep' has already been preallocated
+  //
+  // is this the same as uncolumnize? no.
+  template <typename MatL, typename MatLsep>
+  static void splitColumns(
+   const MatL &L, 
+   int numBones, 
+   int dim, 
+   int dimp1, 
+   MatLsep &Lsep)
+  {
+    assert(L.cols() == 1);
+    assert(L.rows() == dim*(dimp1)*numBones);
+  
+    assert(Lsep.rows() == (dimp1)*numBones && Lsep.cols() == dim);
+  
+    for (int b=0; b<numBones; b++)
+    {
+      for (int coord=0; coord<dimp1; coord++)
+      {
+        for (int c=0; c<dim; c++)
+        {
+          Lsep(coord*numBones + b, c) = L(coord*numBones*dim + c*numBones + b, 0);
+        }
+      }
+    }
+  }
+  
+  
+  // the inverse of splitColumns, i.e., takes numBones*(dimp1) x dim matrix 'Lsep' and merges the dimensions
+  // into columns vector 'L' (which is assumed to be already allocated):
+  //
+  // is this the same as columnize? no.
+  template <typename MatrixXS>
+  static void mergeColumns(const MatrixXS &Lsep, int numBones, int dim, int dimp1, MatrixXS &L)
+  {
+    assert(L.cols() == 1);
+    assert(L.rows() == dim*(dimp1)*numBones);
+  
+    assert(Lsep.rows() == (dimp1)*numBones && Lsep.cols() == dim);
+  
+    for (int b=0; b<numBones; b++)
+    {
+      for (int coord=0; coord<dimp1; coord++)
+      {
+        for (int c=0; c<dim; c++)
+        {
+          L(coord*numBones*dim + c*numBones + b, 0) = Lsep(coord*numBones + b, c);
+        }
+      }
+    }
+  }
+  
+  // converts "Solve1" the "rotations" part of FullSolve matrix (the first part)
+  // into one "condensed" matrix CSolve1 while checking we're not loosing any
+  // information by this process; specifically, returns maximal difference from
+  // scaled 3x3 identity blocks, which should be pretty small number
+  template <typename MatrixXS>
+  static typename MatrixXS::Scalar condense_Solve1(MatrixXS &Solve1, int numBones, int numGroups, int dim, MatrixXS &CSolve1)
+  {
+    assert(Solve1.rows() == dim*(dim + 1)*numBones);
+    assert(Solve1.cols() == dim*dim*numGroups);
+  
+    typedef typename MatrixXS::Scalar SSCALAR;
+    SSCALAR maxDiff = 0.0f;
+  
+    CSolve1.resize((dim + 1)*numBones, dim*numGroups);  
+    for (int rowCoord=0; rowCoord<dim+1; rowCoord++)
+    {
+      for (int b=0; b<numBones; b++)
+      {
+        for (int colCoord=0; colCoord<dim; colCoord++)
+        {
+          for (int g=0; g<numGroups; g++)
+          {
+            Eigen::Matrix3f blok;
+            for (int r=0; r<3; r++)
+            {
+              for (int c=0; c<3; c++)
+              {
+                blok(r, c) = Solve1(rowCoord*numBones*dim + r*numBones + b, colCoord*numGroups*dim + c*numGroups + g);
+              }
+            }
+  
+            SSCALAR mD = maxBlokErr<SSCALAR>(blok);
+            if (mD > maxDiff) maxDiff = mD;
+  
+            CSolve1(rowCoord*numBones + b, colCoord*numGroups + g) = blok(0,0);
+          }
+        }
+      }
+    }  
+    
+    return maxDiff;
+  }
+}
+
+template <typename LbsMatrixType, typename SSCALAR>
+IGL_INLINE bool igl::arap_dof_recomputation(
+  const Eigen::Matrix<int,Eigen::Dynamic,1> & fixed_dim,
+  const Eigen::SparseMatrix<double> & A_eq,
+  ArapDOFData<LbsMatrixType, SSCALAR> & data)
+{
+  using namespace Eigen;
+  typedef Matrix<SSCALAR, Dynamic, Dynamic> MatrixXS;
+
+  LbsMatrixType * Q;
+  LbsMatrixType Qdyn;
+  if(data.with_dynamics)
+  {
+    // multiply by 1/timestep and to quadratic coefficients matrix
+    // Might be missing a 0.5 here
+    LbsMatrixType Q_copy = data.Q;
+    Qdyn = Q_copy + (1.0/(data.h*data.h))*data.Mass_tilde;
+    Q = &Qdyn;
+
+    // This may/should be superfluous
+    //printf("is_symmetric()\n");
+    if(!is_symmetric(*Q))
+    {
+      //printf("Fixing symmetry...\n");
+      // "Fix" symmetry
+      LbsMatrixType QT = (*Q).transpose();
+      LbsMatrixType Q_copy = *Q;
+      *Q = 0.5*(Q_copy+QT);
+      // Check that ^^^ this really worked. It doesn't always
+      //assert(is_symmetric(*Q));
+    }
+  }else
+  {
+    Q = &data.Q;
+  }
+
+  assert((int)data.CSM_M.size() == data.dim);
+  assert(A_eq.cols() == data.m*data.dim*(data.dim+1));
+  data.fixed_dim = fixed_dim;
+
+  if(fixed_dim.size() > 0)
+  {
+    assert(fixed_dim.maxCoeff() < data.m*data.dim*(data.dim+1));
+    assert(fixed_dim.minCoeff() >= 0);
+  }
+
+#ifdef EXTREME_VERBOSE
+  cout<<"data.fixed_dim=["<<endl<<data.fixed_dim<<endl<<"]+1;"<<endl;
+#endif
+
+  // Compute dense solve matrix (alternative of matrix factorization)
+  //printf("min_quad_dense_precompute()\n");
+  MatrixXd Qfull; full(*Q, Qfull);  
+  MatrixXd A_eqfull; full(A_eq, A_eqfull);
+  MatrixXd M_Solve;
+
+  double timer0_start = get_seconds_hires();
+  bool use_lu = data.effective_dim != 2;
+  //use_lu = false;
+  //printf("use_lu: %s\n",(use_lu?"TRUE":"FALSE"));
+  min_quad_dense_precompute(Qfull, A_eqfull, use_lu,M_Solve);
+  double timer0_end = get_seconds_hires();
+  verbose("Bob timing: %.20f\n", (timer0_end - timer0_start)*1000.0);
+
+  // Precompute full solve matrix:
+  const int fsRows = data.m * data.dim * (data.dim + 1); // 12 * number_of_bones
+  const int fsCols1 = data.M_KG.cols(); // 9 * number_of_posConstraints
+  const int fsCols2 = A_eq.rows(); // number_of_posConstraints
+  data.M_FullSolve.resize(fsRows, fsCols1 + fsCols2);
+  // note the magical multiplicative constant "-0.5", I've no idea why it has
+  // to be there :)
+  data.M_FullSolve << 
+    (-0.5 * M_Solve.block(0, 0, fsRows, fsRows) * data.M_KG).template cast<SSCALAR>(), 
+    M_Solve.block(0, fsRows, fsRows, fsCols2).template cast<SSCALAR>();
+
+  if(data.with_dynamics)
+  {
+    printf(
+      "---------------------------------------------------------------------\n"
+      "\n\n\nWITH DYNAMICS recomputation\n\n\n"
+      "---------------------------------------------------------------------\n"
+      );
+    // Also need to save Π1 before it gets multiplied by Ktilde (aka M_KG)
+    data.Pi_1 = M_Solve.block(0, 0, fsRows, fsRows).template cast<SSCALAR>();
+  }
+
+  // Precompute condensed matrices,
+  // first CSM:
+  std::vector<MatrixXS> CSM_M_SSCALAR;
+  CSM_M_SSCALAR.resize(data.dim);
+  for (int i=0; i<data.dim; i++) CSM_M_SSCALAR[i] = data.CSM_M[i].template cast<SSCALAR>();
+  SSCALAR maxErr1 = condense_CSM(CSM_M_SSCALAR, data.m, data.dim, data.CSM);  
+  verbose("condense_CSM maxErr = %.15f (this should be close to zero)\n", maxErr1);
+  assert(fabs(maxErr1) < 1e-5);
+  
+  // and then solveBlock1:
+  // number of groups
+  const int k = data.CSM_M[0].rows()/data.dim;
+  MatrixXS SolveBlock1 = data.M_FullSolve.block(0, 0, data.M_FullSolve.rows(), data.dim * data.dim * k);
+  SSCALAR maxErr2 = condense_Solve1(SolveBlock1, data.m, k, data.dim, data.CSolveBlock1);  
+  verbose("condense_Solve1 maxErr = %.15f (this should be close to zero)\n", maxErr2);
+  assert(fabs(maxErr2) < 1e-5);
+
+  return true;
+}
+
+template <typename LbsMatrixType, typename SSCALAR>
+IGL_INLINE bool igl::arap_dof_update(
+  const ArapDOFData<LbsMatrixType, SSCALAR> & data,
+  const Eigen::Matrix<double,Eigen::Dynamic,1> & B_eq,
+  const Eigen::MatrixXd & L0,
+  const int max_iters,
+  const double 
+#ifdef IGL_ARAP_DOF_FIXED_ITERATIONS_COUNT
+  tol,
+#else
+  /*tol*/,
+#endif
+  Eigen::MatrixXd & L
+  )
+{
+  using namespace Eigen;
+  typedef Matrix<SSCALAR, Dynamic, Dynamic> MatrixXS;
+#ifdef ARAP_GLOBAL_TIMING
+  double timer_start = get_seconds_hires();
+#endif
+
+  // number of dimensions
+  assert((int)data.CSM_M.size() == data.dim);
+  assert((int)L0.size() == (data.m)*data.dim*(data.dim+1));
+  assert(max_iters >= 0);
+  assert(tol >= 0);
+
+  // timing variables
+  double 
+    sec_start, 
+    sec_covGather, 
+    sec_fitRotations, 
+    //sec_rhs, 
+    sec_prepMult, 
+    sec_solve, sec_end;
+
+  assert(L0.cols() == 1);
+#ifdef EXTREME_VERBOSE
+  cout<<"dim="<<data.dim<<";"<<endl;
+  cout<<"m="<<data.m<<";"<<endl;
+#endif
+
+  // number of groups
+  const int k = data.CSM_M[0].rows()/data.dim;
+  for(int i = 0;i<data.dim;i++)
+  {
+    assert(data.CSM_M[i].rows()/data.dim == k);
+  }
+#ifdef EXTREME_VERBOSE
+  cout<<"k="<<k<<";"<<endl;
+#endif
+
+  // resize output and initialize with initial guess
+  L = L0;
+#ifndef IGL_ARAP_DOF_FIXED_ITERATIONS_COUNT
+  // Keep track of last solution
+  MatrixXS L_prev;
+#endif
+  // We will be iterating on L_SSCALAR, only at the end we convert back to double
+  MatrixXS L_SSCALAR = L.cast<SSCALAR>();
+
+  int iters = 0;
+#ifndef IGL_ARAP_DOF_FIXED_ITERATIONS_COUNT
+  double max_diff = tol+1;  
+#endif
+
+  MatrixXS S(k*data.dim,data.dim);
+  MatrixXS R(data.dim,data.dim*k);
+  Eigen::Matrix<SSCALAR,Eigen::Dynamic,1> Rcol(data.dim * data.dim * k);
+  Matrix<SSCALAR,Dynamic,1> B_eq_SSCALAR = B_eq.cast<SSCALAR>();
+  Matrix<SSCALAR,Dynamic,1> B_eq_fix_SSCALAR;
+  Matrix<SSCALAR,Dynamic,1> L0SSCALAR = L0.cast<SSCALAR>();
+  slice(L0SSCALAR, data.fixed_dim, B_eq_fix_SSCALAR);    
+  //MatrixXS rhsFull(Rcol.rows() + B_eq.rows() + B_eq_fix_SSCALAR.rows(), 1); 
+
+  MatrixXS Lsep(data.m*(data.dim + 1), 3);  
+  const MatrixXS L_part2 = 
+    data.M_FullSolve.block(0, Rcol.rows(), data.M_FullSolve.rows(), B_eq_SSCALAR.rows()) * B_eq_SSCALAR;
+  const MatrixXS L_part3 = 
+    data.M_FullSolve.block(0, Rcol.rows() + B_eq_SSCALAR.rows(), data.M_FullSolve.rows(), B_eq_fix_SSCALAR.rows()) * B_eq_fix_SSCALAR;
+  MatrixXS L_part2and3 = L_part2 + L_part3;
+
+  // preallocate workspace variables:
+  MatrixXS Rxyz(k*data.dim, data.dim);  
+  MatrixXS L_part1xyz((data.dim + 1) * data.m, data.dim);
+  MatrixXS L_part1(data.dim * (data.dim + 1) * data.m, 1);
+
+#ifdef ARAP_GLOBAL_TIMING
+    double timer_prepFinished = get_seconds_hires();
+#endif
+
+#ifdef IGL_ARAP_DOF_FIXED_ITERATIONS_COUNT
+  while(iters < max_iters)
+#else
+  while(iters < max_iters && max_diff > tol)
+#endif
+  {  
+    if(data.print_timings)
+    {
+      sec_start = get_seconds_hires();
+    }
+
+#ifndef IGL_ARAP_DOF_FIXED_ITERATIONS_COUNT
+    L_prev = L_SSCALAR;
+#endif
+    ///////////////////////////////////////////////////////////////////////////
+    // Local step: Fix positions, fit rotations
+    ///////////////////////////////////////////////////////////////////////////    
+  
+    // Gather covariance matrices    
+
+    splitColumns(L_SSCALAR, data.m, data.dim, data.dim + 1, Lsep);
+
+    S = data.CSM * Lsep; 
+    // interestingly, this doesn't seem to be so slow, but
+    //MKL is still 2x faster (probably due to AVX)
+    //#ifdef IGL_ARAP_DOF_DOUBLE_PRECISION_SOLVE
+    //    MKL_matMatMult_double(S, data.CSM, Lsep);
+    //#else
+    //    MKL_matMatMult_single(S, data.CSM, Lsep);
+    //#endif
+    
+    if(data.print_timings)
+    {
+      sec_covGather = get_seconds_hires();
+    }
+
+#ifdef EXTREME_VERBOSE
+    cout<<"S=["<<endl<<S<<endl<<"];"<<endl;
+#endif
+    // Fit rotations to covariance matrices
+    if(data.effective_dim == 2)
+    {
+      fit_rotations_planar(S,R);
+    }else
+    {
+#ifdef __SSE__
+      // fit_rotations_SSE will convert to float if necessary
+      fit_rotations_SSE(S,R);
+#else
+      fit_rotations(S,R);
+#endif
+    }
+
+#ifdef EXTREME_VERBOSE
+    cout<<"R=["<<endl<<R<<endl<<"];"<<endl;
+#endif  
+
+    if(data.print_timings)
+    {
+      sec_fitRotations = get_seconds_hires();
+    }
+  
+    ///////////////////////////////////////////////////////////////////////////
+    // "Global" step: fix rotations per mesh vertex, solve for
+    // linear transformations at handles
+    ///////////////////////////////////////////////////////////////////////////
+
+    // all this shuffling is retarded and not completely negligible time-wise;
+    // TODO: change fit_rotations_XXX so it returns R in the format ready for
+    // CSolveBlock1 multiplication
+    columnize(R, k, 2, Rcol);
+#ifdef EXTREME_VERBOSE
+    cout<<"Rcol=["<<endl<<Rcol<<endl<<"];"<<endl;
+#endif  
+    splitColumns(Rcol, k, data.dim, data.dim, Rxyz);
+    
+    if(data.print_timings)
+    {
+      sec_prepMult = get_seconds_hires();
+    }  
+    
+    L_part1xyz = data.CSolveBlock1 * Rxyz;
+    //#ifdef IGL_ARAP_DOF_DOUBLE_PRECISION_SOLVE
+    //    MKL_matMatMult_double(L_part1xyz, data.CSolveBlock1, Rxyz);    
+    //#else
+    //    MKL_matMatMult_single(L_part1xyz, data.CSolveBlock1, Rxyz);    
+    //#endif
+    mergeColumns(L_part1xyz, data.m, data.dim, data.dim + 1, L_part1);
+
+    if(data.with_dynamics)
+    {
+      // Consider reordering or precomputing matrix multiplications
+      MatrixXS L_part1_dyn(data.dim * (data.dim + 1) * data.m, 1);
+      // Eigen can't parse this:
+      //L_part1_dyn = 
+      //  -(2.0/(data.h*data.h)) * data.Pi_1 * data.Mass_tilde * data.L0 +
+      //   (1.0/(data.h*data.h)) * data.Pi_1 * data.Mass_tilde * data.Lm1;
+      // -1.0 because we've moved these linear terms to the right hand side
+      //MatrixXS temp = -1.0 * 
+      //    ((-2.0/(data.h*data.h)) * data.L0.array() + 
+      //      (1.0/(data.h*data.h)) * data.Lm1.array()).matrix();
+      //MatrixXS temp = -1.0 * 
+      //    ( (-1.0/(data.h*data.h)) * data.L0.array() + 
+      //      (1.0/(data.h*data.h)) * data.Lm1.array()
+      //      (-1.0/(data.h*data.h)) * data.L0.array() + 
+      //      ).matrix();
+      //Lvel0 = (1.0/(data.h)) * data.Lm1.array() - data.L0.array();
+      MatrixXS temp = -1.0 * 
+          ( (-1.0/(data.h*data.h)) * data.L0.array() + 
+            (1.0/(data.h)) * data.Lvel0.array()
+            ).matrix();
+      MatrixXd temp_d = temp.template cast<double>();
+
+      MatrixXd temp_g = data.fgrav*(data.grav_mag*data.grav_dir);
+
+      assert(data.fext.rows() == temp_g.rows());
+      assert(data.fext.cols() == temp_g.cols());
+      MatrixXd temp2 = data.Mass_tilde * temp_d + temp_g + data.fext;
+      MatrixXS temp2_f = temp2.cast<SSCALAR>();
+      L_part1_dyn = data.Pi_1 * temp2_f;
+      L_part1.array() = L_part1.array() + L_part1_dyn.array();
+    }
+
+    //L_SSCALAR = L_part1 + L_part2and3;
+    assert(L_SSCALAR.rows() == L_part1.rows() && L_SSCALAR.rows() == L_part2and3.rows());
+    for (int i=0; i<L_SSCALAR.rows(); i++)
+    {
+      L_SSCALAR(i, 0) = L_part1(i, 0) + L_part2and3(i, 0);
+    }
+
+#ifdef EXTREME_VERBOSE
+    cout<<"L=["<<endl<<L<<endl<<"];"<<endl;
+#endif  
+
+    if(data.print_timings)
+    {
+      sec_solve = get_seconds_hires();
+    }
+
+#ifndef IGL_ARAP_DOF_FIXED_ITERATIONS_COUNT
+    // Compute maximum absolute difference with last iteration's solution
+    max_diff = (L_SSCALAR-L_prev).eval().array().abs().matrix().maxCoeff();
+#endif
+    iters++;  
+
+    if(data.print_timings)
+    {
+      sec_end = get_seconds_hires();
+#ifndef WIN32
+      // trick to get sec_* variables to compile without warning on mac
+      if(false)
+#endif
+      printf(
+        "\ntotal iteration time = %f "
+        "[local: covGather = %f, "
+        "fitRotations = %f, "
+        "global: prep = %f, "
+        "solve = %f, "
+        "error = %f [ms]]\n", 
+        (sec_end - sec_start)*1000.0, 
+        (sec_covGather - sec_start)*1000.0, 
+        (sec_fitRotations - sec_covGather)*1000.0, 
+        (sec_prepMult - sec_fitRotations)*1000.0, 
+        (sec_solve - sec_prepMult)*1000.0, 
+        (sec_end - sec_solve)*1000.0 );
+    }
+  }
+
+
+  L = L_SSCALAR.template cast<double>();
+  assert(L.cols() == 1);
+
+#ifdef ARAP_GLOBAL_TIMING
+  double timer_finito = get_seconds_hires();
+  printf(
+    "ARAP preparation = %f, "
+    "all %i iterations = %f [ms]\n", 
+    (timer_prepFinished - timer_start)*1000.0, 
+    max_iters, 
+    (timer_finito - timer_prepFinished)*1000.0);  
+#endif
+
+  return true;
+}
+
+#ifndef IGL_HEADER_ONLY
+// Explicit instanciation
+template bool igl::arap_dof_update<Eigen::Matrix<double, -1, -1, 0, -1, -1>, double>(ArapDOFData<Eigen::Matrix<double, -1, -1, 0, -1, -1>, double> const&, Eigen::Matrix<double, -1, 1, 0, -1, 1> const&, Eigen::Matrix<double, -1, -1, 0, -1, -1> const&, int, double, Eigen::Matrix<double, -1, -1, 0, -1, -1>&);
+template bool igl::arap_dof_recomputation<Eigen::Matrix<double, -1, -1, 0, -1, -1>, double>(Eigen::Matrix<int, -1, 1, 0, -1, 1> const&, Eigen::SparseMatrix<double, 0, int> const&, ArapDOFData<Eigen::Matrix<double, -1, -1, 0, -1, -1>, double>&);
+template bool igl::arap_dof_precomputation<Eigen::Matrix<double, -1, -1, 0, -1, -1>, double>(Eigen::Matrix<double, -1, -1, 0, -1, -1> const&, Eigen::Matrix<int, -1, -1, 0, -1, -1> const&, Eigen::Matrix<double, -1, -1, 0, -1, -1> const&, Eigen::Matrix<int, -1, 1, 0, -1, 1> const&, ArapDOFData<Eigen::Matrix<double, -1, -1, 0, -1, -1>, double>&);
+#endif

+ 244 - 0
include/igl/svd3x3/arap_dof.h

@@ -0,0 +1,244 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_ARAP_ENERGY_TYPE_DOF_H
+#define IGL_ARAP_ENERGY_TYPE_DOF_H
+#include <igl/igl_inline.h>
+
+#include <Eigen/Dense>
+#include <Eigen/Sparse>
+#include <igl/ARAPEnergyType.h>
+#include <vector>
+
+namespace igl
+{
+  // Caller example:
+  //
+  // Once:
+  // arap_dof_precomputation(...)
+  //
+  // Each frame:
+  // while(not satisfied)
+  //   arap_dof_update(...)
+  // end
+  
+  template <typename LbsMatrixType, typename SSCALAR>
+  struct ArapDOFData;
+  
+  ///////////////////////////////////////////////////////////////////////////
+  //
+  // Arap DOF precomputation consists of two parts the computation. The first is
+  // that which depends solely on the mesh (V,F), the linear blend skinning
+  // weights (M) and the groups G. Then there's the part that depends on the
+  // previous precomputation and the list of free and fixed vertices. 
+  //
+  ///////////////////////////////////////////////////////////////////////////
+  
+  
+  // The code and variables differ from the description in Section 3 of "Fast
+  // Automatic Skinning Transformations" by [Jacobson et al. 2012]
+  // 
+  // Here is a useful conversion table:
+  //
+  // [article]                             [code]
+  // S = \tilde{K} T                       S = CSM * Lsep
+  // S --> R                               S --> R --shuffled--> Rxyz
+  // Gamma_solve RT = Pi_1 \tilde{K} RT    L_part1xyz = CSolveBlock1 * Rxyz 
+  // Pi_1 \tilde{K}                        CSolveBlock1
+  // Peq = [T_full; P_pos]                 
+  // T_full                                B_eq_fix <--- L0
+  // P_pos                                 B_eq
+  // Pi_2 * P_eq =                         Lpart2and3 = Lpart2 + Lpart3
+  //   Pi_2_left T_full +                  Lpart3 = M_fullsolve(right) * B_eq_fix
+  //   Pi_2_right P_pos                    Lpart2 = M_fullsolve(left) * B_eq
+  // T = [Pi_1 Pi_2] [\tilde{K}TRT P_eq]   L = Lpart1 + Lpart2and3
+  //
+  
+  // Precomputes the system we are going to optimize. This consists of building
+  // constructor matrices (to compute covariance matrices from transformations
+  // and to build the poisson solve right hand side from rotation matrix entries)
+  // and also prefactoring the poisson system.
+  //
+  // Inputs:
+  //   V  #V by dim list of vertex positions
+  //   F  #F by {3|4} list of face indices
+  //   M  #V * dim by #handles * dim * (dim+1) matrix such that
+  //     new_V(:) = LBS(V,W,A) = reshape(M * A,size(V)), where A is a column
+  //     vectors formed by the entries in each handle's dim by dim+1 
+  //     transformation matrix. Specifcally, A =
+  //       reshape(permute(Astack,[3 1 2]),n*dim*(dim+1),1)
+  //     or A = [Lxx;Lyx;Lxy;Lyy;tx;ty], and likewise for other dim
+  //     if Astack(:,:,i) is the dim by (dim+1) transformation at handle i
+  //     handles are ordered according to P then BE (point handles before bone
+  //     handles)
+  //   G  #V list of group indices (1 to k) for each vertex, such that vertex i 
+  //     is assigned to group G(i)
+  // Outputs:
+  //   data  structure containing all necessary precomputation for calling
+  //     arap_dof_update
+  // Returns true on success, false on error
+  //
+  // See also: lbs_matrix
+  template <typename LbsMatrixType, typename SSCALAR>
+  IGL_INLINE bool arap_dof_precomputation(
+    const Eigen::MatrixXd & V, 
+    const Eigen::MatrixXi & F,
+    const LbsMatrixType & M,
+    const Eigen::Matrix<int,Eigen::Dynamic,1> & G,
+    ArapDOFData<LbsMatrixType, SSCALAR> & data);
+  
+  // Should always be called after arap_dof_precomputation, but may be called in
+  // between successive calls to arap_dof_update, recomputes precomputation
+  // given that there are only changes in free and fixed
+  //
+  // Inputs:
+  //   fixed_dim  list of transformation element indices for fixed (or partailly
+  //   fixed) handles: not necessarily the complement of 'free'
+  //	  NOTE: the constraints for fixed transformations still need to be
+  //	  present in A_eq
+  //   A_eq  dim*#constraint_points by m*dim*(dim+1)  matrix of linear equality
+  //     constraint coefficients. Each row corresponds to a linear constraint,
+  //     so that A_eq * L = Beq says that the linear transformation entries in
+  //     the column L should produce the user supplied positional constraints
+  //     for each handle in Beq. The row A_eq(i*dim+d) corresponds to the
+  //     constrain on coordinate d of position i
+  // Outputs:
+  //   data  structure containing all necessary precomputation for calling
+  //     arap_dof_update
+  // Returns true on success, false on error
+  //
+  // See also: lbs_matrix
+  template <typename LbsMatrixType, typename SSCALAR>
+  IGL_INLINE bool arap_dof_recomputation(
+    const Eigen::Matrix<int,Eigen::Dynamic,1> & fixed_dim,
+    const Eigen::SparseMatrix<double> & A_eq,
+    ArapDOFData<LbsMatrixType, SSCALAR> & data);
+  
+  // Optimizes the transformations attached to each weight function based on
+  // precomputed system.
+  //
+  // Inputs:
+  //   data  precomputation data struct output from arap_dof_precomputation
+  //   Beq  dim*#constraint_points constraint values.
+  //   L0  #handles * dim * dim+1 list of initial guess transformation entries,
+  //     also holds fixed transformation entries for fixed handles
+  //   max_iters  maximum number of iterations
+  //   tol  stopping critera parameter. If variables (linear transformation
+  //     matrix entries) change by less than 'tol' the optimization terminates,
+  //       0.75 (weak tolerance)
+  //       0.0 (extreme tolerance)
+  // Outputs:
+  //   L  #handles * dim * dim+1 list of final optimized transformation entries,
+  //     allowed to be the same as L
+  template <typename LbsMatrixType, typename SSCALAR>
+  IGL_INLINE bool arap_dof_update(
+    const ArapDOFData<LbsMatrixType,SSCALAR> & data,
+    const Eigen::Matrix<double,Eigen::Dynamic,1> & B_eq,
+    const Eigen::MatrixXd & L0,
+    const int max_iters,
+    const double tol,
+    Eigen::MatrixXd & L
+    );
+  
+  // Structure that contains fields for all precomputed data or data that needs
+  // to be remembered at update
+  template <typename LbsMatrixType, typename SSCALAR>
+  struct ArapDOFData
+  {
+    typedef Eigen::Matrix<SSCALAR, Eigen::Dynamic, Eigen::Dynamic> MatrixXS;
+    // Type of arap energy we're solving
+    igl::ARAPEnergyType energy;
+    //// LU decomposition precomptation data; note: not used by araf_dop_update
+    //// any more, replaced by M_FullSolve
+    //igl::min_quad_with_fixed_data<double> lu_data;
+    // List of indices of fixed transformation entries
+    Eigen::Matrix<int,Eigen::Dynamic,1> fixed_dim;
+    // List of precomputed covariance scatter matrices multiplied by lbs
+    // matrices
+    //std::vector<Eigen::SparseMatrix<double> > CSM_M;
+    std::vector<Eigen::MatrixXd> CSM_M;
+    LbsMatrixType M_KG;
+    // Number of mesh vertices
+    int n;
+    // Number of weight functions
+    int m;
+    // Number of dimensions
+    int dim;
+    // Effective dimensions
+    int effective_dim;
+    // List of indices into C of positional constraints
+    Eigen::Matrix<int,Eigen::Dynamic,1> interpolated;
+    std::vector<bool> free_mask;
+    // Full quadratic coefficients matrix before lagrangian (should be dense)
+    LbsMatrixType Q;
+  
+  
+    //// Solve matrix for the global step
+    //Eigen::MatrixXd M_Solve; // TODO: remove from here
+  
+    // Full solve matrix that contains also conversion from rotations to the right hand side, 
+    // i.e., solves Poisson transformations just from rotations and positional constraints
+    MatrixXS M_FullSolve;
+  
+    // Precomputed condensed matrices (3x3 commutators folded to 1x1):
+    MatrixXS CSM;
+    MatrixXS CSolveBlock1;
+  
+    // Print timings at each update
+    bool print_timings;
+  
+    // Dynamics
+    bool with_dynamics;
+    // I'm hiding the extra dynamics stuff in this struct, which sort of defeats
+    // the purpose of this function-based coding style...
+  
+    // Time step
+    double h;
+  
+    // L0  #handles * dim * dim+1 list of transformation entries from
+    // previous solve
+    MatrixXS L0;
+    //// Lm1  #handles * dim * dim+1 list of transformation entries from
+    //// previous-previous solve
+    //MatrixXS Lm1;
+    // "Velocity"
+    MatrixXS Lvel0;
+  
+    // #V by dim matrix of external forces
+    // fext
+    MatrixXS fext;
+  
+    // Mass_tilde: MT * Mass * M
+    LbsMatrixType Mass_tilde;
+  
+    // Force due to gravity (premultiplier)
+    Eigen::MatrixXd fgrav;
+    // Direction of gravity
+    Eigen::Vector3d grav_dir;
+    // Magnitude of gravity
+    double grav_mag;
+    
+    // Π1 from the paper
+    MatrixXS Pi_1;
+  
+    // Default values
+    ArapDOFData(): 
+      energy(igl::ARAP_ENERGY_TYPE_SPOKES), 
+      with_dynamics(false),
+      h(1),
+      grav_dir(0,-1,0),
+      grav_mag(0)
+    {
+    }
+  };
+}
+
+#ifdef IGL_HEADER_ONLY
+#  include "arap_dof.cpp"
+#endif
+
+#endif

+ 224 - 0
include/igl/svd3x3/fit_rotations.cpp

@@ -0,0 +1,224 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "fit_rotations.h"
+#include "polar_svd3x3.h"
+#include <igl/repmat.h>
+#include <igl/verbose.h>
+#include <igl/polar_dec.h>
+#include <igl/polar_svd.h>
+#include <iostream>
+
+template <typename DerivedS, typename DerivedD>
+IGL_INLINE void igl::fit_rotations(
+  const Eigen::PlainObjectBase<DerivedS> & S,
+        Eigen::PlainObjectBase<DerivedD> & R)
+{
+  using namespace std;
+  const int dim = S.cols();
+  const int nr = S.rows()/dim;
+  assert(nr * dim == S.rows());
+
+  // resize output
+  R.resize(dim,dim*nr); // hopefully no op (should be already allocated)
+
+  //std::cout<<"S=["<<std::endl<<S<<std::endl<<"];"<<std::endl;
+  //MatrixXd si(dim,dim);
+  Eigen::Matrix<typename DerivedS::Scalar,3,3> si;// = Eigen::Matrix3d::Identity();
+  // loop over number of rotations we're computing
+  for(int r = 0;r<nr;r++)
+  {
+    // build this covariance matrix
+    for(int i = 0;i<dim;i++)
+    {
+      for(int j = 0;j<dim;j++)
+      {
+        si(i,j) = S(i*nr+r,j);
+      }
+    }
+    Eigen::Matrix<typename DerivedD::Scalar,3,3> ri;
+    Eigen::Matrix<typename DerivedD::Scalar,3,3> ti;
+    //polar_dec(si,ri,ti);
+    //polar_svd(si,ri,ti);
+    polar_svd3x3(si, ri);
+    assert(ri.determinant() >= 0);
+#ifndef FIT_ROTATIONS_ALLOW_FLIPS
+    // Check for reflection
+    if(ri.determinant() < 0)
+    {
+      cerr<<"Error: Warning: flipping is wrong..."<<endl;
+      assert(false && "This is wrong. Need to flip column in U and recompute R = U*V'");
+      // flip sign of last row
+      ri.row(2) *= -1;
+    }
+    assert(ri.determinant() >= 0);
+#endif  
+    // Not sure why polar_dec computes transpose...
+    R.block(0,r*dim,dim,dim) = ri.block(0,0,dim,dim).transpose();
+  }
+}
+
+template <typename DerivedS, typename DerivedD>
+IGL_INLINE void igl::fit_rotations_planar(
+  const Eigen::PlainObjectBase<DerivedS> & S,
+        Eigen::PlainObjectBase<DerivedD> & R)
+{ 
+  using namespace std;
+  const int dim = S.cols();
+  const int nr = S.rows()/dim;
+  assert(nr * dim == S.rows());
+
+  // resize output
+  R.resize(dim,dim*nr); // hopefully no op (should be already allocated)
+
+  Eigen::Matrix<typename DerivedS::Scalar,2,2> si;
+  // loop over number of rotations we're computing
+  for(int r = 0;r<nr;r++)
+  {
+    // build this covariance matrix
+    for(int i = 0;i<2;i++)
+    {
+      for(int j = 0;j<2;j++)
+      {
+        si(i,j) = S(i*nr+r,j);
+      }
+    }
+    Eigen::Matrix<typename DerivedD::Scalar,2,2> ri;
+    Eigen::Matrix<typename DerivedD::Scalar,2,2> ti;
+    igl::polar_svd(si,ri,ti);
+#ifndef FIT_ROTATIONS_ALLOW_FLIPS
+    // Check for reflection
+    if(ri.determinant() < 0)
+    {
+      cerr<<"Error: Warning: flipping is wrong..."<<endl;
+      assert(false && "This is wrong. Need to flip column in U and recompute R = U*V'");
+      // flip sign of last row
+      ri.row(1) *= -1;
+    }
+    assert(ri.determinant() >= 0);
+#endif  
+    // Not sure why polar_dec computes transpose...
+    R.block(0,r*dim,2,2) = ri.block(0,0,2,2).transpose();
+    // Set remaining part to identity
+    R(0,r*3+2) = 0;
+    R(1,r*3+2) = 0;
+    R(2,r*3+0) = 0;
+    R(2,r*3+1) = 0;
+    R(2,r*3+2) = 1;
+  }
+}
+
+
+#ifdef __SSE__
+IGL_INLINE void igl::fit_rotations_SSE(
+  const Eigen::MatrixXf & S, 
+  Eigen::MatrixXf & R)
+{
+  const int cStep = 4;
+
+  assert(S.cols() == 3);
+  const int dim = 3; //S.cols();
+  const int nr = S.rows()/dim;  
+  assert(nr * dim == S.rows());
+
+  // resize output
+  R.resize(dim,dim*nr); // hopefully no op (should be already allocated)
+
+  Eigen::Matrix<float, 3*cStep, 3> siBig;
+  // using SSE decompose cStep matrices at a time:
+  int r = 0;
+  for( ; r<nr; r+=cStep)
+  {
+    int numMats = cStep;
+    if (r + cStep >= nr) numMats = nr - r;
+    // build siBig:
+    for (int k=0; k<numMats; k++)
+    {
+      for(int i = 0;i<dim;i++)
+      {
+        for(int j = 0;j<dim;j++)
+        {
+          siBig(i + 3*k, j) = S(i*nr + r + k, j);
+        }
+      }
+    }
+    Eigen::Matrix<float, 3*cStep, 3> ri;
+    polar_svd3x3_sse(siBig, ri);    
+
+    for (int k=0; k<cStep; k++)
+      assert(ri.block(3*k, 0, 3, 3).determinant() >= 0);
+
+    // Not sure why polar_dec computes transpose...
+    for (int k=0; k<numMats; k++)
+    {
+      R.block(0, (r + k)*dim, dim, dim) = ri.block(3*k, 0, dim, dim).transpose();
+    }    
+  }
+}
+
+IGL_INLINE void igl::fit_rotations_SSE(
+  const Eigen::MatrixXd & S,
+  Eigen::MatrixXd & R)
+{
+  const Eigen::MatrixXf Sf = S.cast<float>();
+  Eigen::MatrixXf Rf;
+  fit_rotations_SSE(Sf,Rf);
+  R = Rf.cast<double>();
+}
+#endif
+
+#ifdef __AVX__
+IGL_INLINE void igl::fit_rotations_AVX(
+  const Eigen::MatrixXf & S,
+  Eigen::MatrixXf & R)
+{
+  const int cStep = 8;
+
+  assert(S.cols() == 3);
+  const int dim = 3; //S.cols();
+  const int nr = S.rows()/dim;  
+  assert(nr * dim == S.rows());
+
+  // resize output
+  R.resize(dim,dim*nr); // hopefully no op (should be already allocated)
+
+  Eigen::Matrix<float, 3*cStep, 3> siBig;
+  // using SSE decompose cStep matrices at a time:
+  int r = 0;
+  for( ; r<nr; r+=cStep)
+  {
+    int numMats = cStep;
+    if (r + cStep >= nr) numMats = nr - r;
+    // build siBig:
+    for (int k=0; k<numMats; k++)
+    {
+      for(int i = 0;i<dim;i++)
+      {
+        for(int j = 0;j<dim;j++)
+        {
+          siBig(i + 3*k, j) = S(i*nr + r + k, j);
+        }
+      }
+    }
+    Eigen::Matrix<float, 3*cStep, 3> ri;
+    polar_svd3x3_avx(siBig, ri);    
+
+    for (int k=0; k<cStep; k++)
+      assert(ri.block(3*k, 0, 3, 3).determinant() >= 0);
+
+    // Not sure why polar_dec computes transpose...
+    for (int k=0; k<numMats; k++)
+    {
+      R.block(0, (r + k)*dim, dim, dim) = ri.block(3*k, 0, dim, dim).transpose();
+    }    
+  }
+}
+#endif
+
+// Explicit template instanciation
+template void igl::fit_rotations<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<double, -1, -1, 0, -1, -1> >(Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > const&, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> >&);
+template void igl::fit_rotations_planar<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<double, -1, -1, 0, -1, -1> >(Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > const&, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> >&);

+ 55 - 0
include/igl/svd3x3/fit_rotations.h

@@ -0,0 +1,55 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_FIT_ROTATIONS_H
+#define IGL_FIT_ROTATIONS_H
+#include <igl/igl_inline.h>
+#include <Eigen/Core>
+
+namespace igl
+{
+  // FIT_ROTATIONS Given an input mesh and new positions find rotations for
+  // every covariance matrix in a stack of covariance matrices
+  // 
+  // Inputs:
+  //   S  nr*dim by dim stack of covariance matrices
+  // Outputs:
+  //   R  dim by dim * nr list of rotations
+  //
+  template <typename DerivedS, typename DerivedD>
+  IGL_INLINE void fit_rotations(
+    const Eigen::PlainObjectBase<DerivedS> & S,
+          Eigen::PlainObjectBase<DerivedD> & R);
+  
+  // FIT_ROTATIONS Given an input mesh and new positions find 2D rotations for
+  // every vertex that best maps its one ring to the new one ring
+  // 
+  // Inputs:
+  //   S  nr*dim by dim stack of covariance matrices, third column and every
+  //   third row will be ignored
+  // Outputs:
+  //   R  dim by dim * nr list of rotations, third row and third column of each
+  //   rotation will just be identity
+  //
+  template <typename DerivedS, typename DerivedD>
+  IGL_INLINE void fit_rotations_planar(
+    const Eigen::PlainObjectBase<DerivedS> & S,
+          Eigen::PlainObjectBase<DerivedD> & R);
+#ifdef __SSE__
+  IGL_INLINE void fit_rotations_SSE( const Eigen::MatrixXf & S, Eigen::MatrixXf & R);
+  IGL_INLINE void fit_rotations_SSE( const Eigen::MatrixXd & S, Eigen::MatrixXd & R);
+#endif
+#ifdef __AVX__
+  IGL_INLINE void fit_rotations_AVX( const Eigen::MatrixXf & S, Eigen::MatrixXf & R);
+#endif
+}
+
+#ifdef IGL_HEADER_ONLY
+#  include "fit_rotations.cpp"
+#endif
+
+#endif

+ 104 - 0
include/igl/svd3x3/polar_svd3x3.cpp

@@ -0,0 +1,104 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "polar_svd3x3.h"
+#include "svd3x3.h"
+#ifdef __SSE__
+#  include "svd3x3_sse.h"
+#endif
+#ifdef __AVX__
+#  include "svd3x3_avx.h"
+#endif
+
+template<typename Mat>
+IGL_INLINE void igl::polar_svd3x3(const Mat& A, Mat& R)
+{
+  // should be caught at compile time, but just to be 150% sure:
+  assert(A.rows() == 3 && A.cols() == 3);
+
+  Eigen::Matrix<typename Mat::Scalar, 3, 3> U, Vt;
+  Eigen::Matrix<typename Mat::Scalar, 3, 1> S;  
+  svd3x3(A, U, S, Vt);
+  R = U * Vt.transpose();
+}
+
+#ifdef __SSE__
+template<typename T>
+IGL_INLINE void igl::polar_svd3x3_sse(const Eigen::Matrix<T, 3*4, 3>& A, Eigen::Matrix<T, 3*4, 3> &R)
+{
+  // should be caught at compile time, but just to be 150% sure:
+  assert(A.rows() == 3*4 && A.cols() == 3);
+
+  Eigen::Matrix<T, 3*4, 3> U, Vt;
+  Eigen::Matrix<T, 3*4, 1> S;  
+  svd3x3_sse(A, U, S, Vt);
+
+  for (int k=0; k<4; k++)
+  {
+    R.block(3*k, 0, 3, 3) = U.block(3*k, 0, 3, 3) * Vt.block(3*k, 0, 3, 3).transpose();
+  }
+
+  //// test:
+  //for (int k=0; k<4; k++)
+  //{
+  //  Eigen::Matrix3f Apart = A.block(3*k, 0, 3, 3);
+  //  Eigen::Matrix3f Rpart;
+  //  polar_svd3x3(Apart, Rpart);
+
+  //  Eigen::Matrix3f Rpart_SSE = R.block(3*k, 0, 3, 3);
+  //  Eigen::Matrix3f diff = Rpart - Rpart_SSE;
+  //  float diffNorm = diff.norm();
+
+  //  int hu = 1;
+  //}
+  //// eof test
+}
+#endif
+
+#ifdef __AVX__
+template<typename T>
+IGL_INLINE void igl::polar_svd3x3_avx(const Eigen::Matrix<T, 3*8, 3>& A, Eigen::Matrix<T, 3*8, 3> &R)
+{
+  // should be caught at compile time, but just to be 150% sure:
+  assert(A.rows() == 3*8 && A.cols() == 3);
+
+  Eigen::Matrix<T, 3*8, 3> U, Vt;
+  Eigen::Matrix<T, 3*8, 1> S;  
+  svd3x3_avx(A, U, S, Vt);
+
+  for (int k=0; k<8; k++)
+  {
+    R.block(3*k, 0, 3, 3) = U.block(3*k, 0, 3, 3) * Vt.block(3*k, 0, 3, 3).transpose();
+  }
+
+  // test:
+  for (int k=0; k<8; k++)
+  {
+    Eigen::Matrix3f Apart = A.block(3*k, 0, 3, 3);
+    Eigen::Matrix3f Rpart;
+    polar_svd3x3(Apart, Rpart);
+
+    Eigen::Matrix3f Rpart_SSE = R.block(3*k, 0, 3, 3);
+    Eigen::Matrix3f diff = Rpart - Rpart_SSE;
+    float diffNorm = diff.norm();
+    if (abs(diffNorm) > 0.001) 
+    {
+      printf("Huh: diffNorm = %15f (k = %i)\n", diffNorm, k);
+    }
+
+    // Unused
+    //int hu = 1;
+  }
+  // eof test
+}
+#endif
+
+#ifndef IGL_HEADER_ONLY
+// Explicit template instanciation
+template void igl::polar_svd3x3<Eigen::Matrix<double, 3, 3, 0, 3, 3> >(Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, Eigen::Matrix<double, 3, 3, 0, 3, 3>&);
+template void igl::polar_svd3x3_sse<float>(Eigen::Matrix<float, 12, 3, 0, 12, 3> const&, Eigen::Matrix<float, 12, 3, 0, 12, 3>&);
+#endif

+ 38 - 0
include/igl/svd3x3/polar_svd3x3.h

@@ -0,0 +1,38 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_POLAR_SVD3X3_H
+#define IGL_POLAR_SVD3X3_H
+#include <Eigen/Core>
+#include <igl/igl_inline.h>
+namespace igl
+{
+  // Computes the closest rotation to input matrix A using specialized 3x3 SVD singular value decomposition (WunderSVD3x3)
+  // Inputs:
+  //   A  3 by 3 matrix to be decomposed
+  // Outputs:
+  //   R  3 by 3 closest element in SO(3) (closeness in terms of Frobenius metric)
+  //
+  //	This means that det(R) = 1. Technically it's not polar decomposition which guarantees positive semidefinite
+  //   stretch factor (at the cost of having det(R) = -1).
+  //
+  template<typename Mat>
+  IGL_INLINE void polar_svd3x3(const Mat& A, Mat& R);
+  #ifdef __SSE__
+  template<typename T>
+  IGL_INLINE void polar_svd3x3_sse(const Eigen::Matrix<T, 3*4, 3>& A, Eigen::Matrix<T, 3*4, 3> &R);
+  #endif
+  #ifdef __AVX__
+  template<typename T>
+  IGL_INLINE void polar_svd3x3_avx(const Eigen::Matrix<T, 3*8, 3>& A, Eigen::Matrix<T, 3*8, 3> &R);
+  #endif
+}
+#ifdef IGL_HEADER_ONLY
+#  include "polar_svd3x3.cpp"
+#endif
+#endif
+

+ 69 - 0
include/igl/svd3x3/svd3x3.cpp

@@ -0,0 +1,69 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "svd3x3.h"
+
+#include <cmath>
+#include <algorithm>
+
+#define USE_SCALAR_IMPLEMENTATION
+#undef USE_SSE_IMPLEMENTATION
+#undef USE_AVX_IMPLEMENTATION
+#define COMPUTE_U_AS_MATRIX
+#define COMPUTE_V_AS_MATRIX
+#include <Singular_Value_Decomposition_Preamble.hpp>
+
+#pragma runtime_checks( "u", off )  // disable runtime asserts on xor eax,eax type of stuff (doesn't always work, disable explicitly in compiler settings)
+template<typename T>
+IGL_INLINE void igl::svd3x3(const Eigen::Matrix<T, 3, 3>& A, Eigen::Matrix<T, 3, 3> &U, Eigen::Matrix<T, 3, 1> &S, Eigen::Matrix<T, 3, 3>&V)
+{
+	// this code only supports the scalar version (otherwise we'd need to pass arrays of matrices)	
+
+#include "Singular_Value_Decomposition_Kernel_Declarations.hpp"
+
+	ENABLE_SCALAR_IMPLEMENTATION(Sa11.f=A(0,0);)                                      ENABLE_SSE_IMPLEMENTATION(Va11=_mm_loadu_ps(a11);)                                  ENABLE_AVX_IMPLEMENTATION(Va11=_mm256_loadu_ps(a11);)
+		ENABLE_SCALAR_IMPLEMENTATION(Sa21.f=A(1,0);)                                      ENABLE_SSE_IMPLEMENTATION(Va21=_mm_loadu_ps(a21);)                                  ENABLE_AVX_IMPLEMENTATION(Va21=_mm256_loadu_ps(a21);)
+		ENABLE_SCALAR_IMPLEMENTATION(Sa31.f=A(2,0);)                                      ENABLE_SSE_IMPLEMENTATION(Va31=_mm_loadu_ps(a31);)                                  ENABLE_AVX_IMPLEMENTATION(Va31=_mm256_loadu_ps(a31);)
+		ENABLE_SCALAR_IMPLEMENTATION(Sa12.f=A(0,1);)                                      ENABLE_SSE_IMPLEMENTATION(Va12=_mm_loadu_ps(a12);)                                  ENABLE_AVX_IMPLEMENTATION(Va12=_mm256_loadu_ps(a12);)
+		ENABLE_SCALAR_IMPLEMENTATION(Sa22.f=A(1,1);)                                      ENABLE_SSE_IMPLEMENTATION(Va22=_mm_loadu_ps(a22);)                                  ENABLE_AVX_IMPLEMENTATION(Va22=_mm256_loadu_ps(a22);)
+		ENABLE_SCALAR_IMPLEMENTATION(Sa32.f=A(2,1);)                                      ENABLE_SSE_IMPLEMENTATION(Va32=_mm_loadu_ps(a32);)                                  ENABLE_AVX_IMPLEMENTATION(Va32=_mm256_loadu_ps(a32);)
+		ENABLE_SCALAR_IMPLEMENTATION(Sa13.f=A(0,2);)                                      ENABLE_SSE_IMPLEMENTATION(Va13=_mm_loadu_ps(a13);)                                  ENABLE_AVX_IMPLEMENTATION(Va13=_mm256_loadu_ps(a13);)
+		ENABLE_SCALAR_IMPLEMENTATION(Sa23.f=A(1,2);)                                      ENABLE_SSE_IMPLEMENTATION(Va23=_mm_loadu_ps(a23);)                                  ENABLE_AVX_IMPLEMENTATION(Va23=_mm256_loadu_ps(a23);)
+		ENABLE_SCALAR_IMPLEMENTATION(Sa33.f=A(2,2);)                                      ENABLE_SSE_IMPLEMENTATION(Va33=_mm_loadu_ps(a33);)                                  ENABLE_AVX_IMPLEMENTATION(Va33=_mm256_loadu_ps(a33);)
+
+#include "Singular_Value_Decomposition_Main_Kernel_Body.hpp"
+
+		ENABLE_SCALAR_IMPLEMENTATION(U(0,0)=Su11.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(u11,Vu11);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(u11,Vu11);)
+		ENABLE_SCALAR_IMPLEMENTATION(U(1,0)=Su21.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(u21,Vu21);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(u21,Vu21);)
+		ENABLE_SCALAR_IMPLEMENTATION(U(2,0)=Su31.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(u31,Vu31);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(u31,Vu31);)
+		ENABLE_SCALAR_IMPLEMENTATION(U(0,1)=Su12.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(u12,Vu12);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(u12,Vu12);)
+		ENABLE_SCALAR_IMPLEMENTATION(U(1,1)=Su22.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(u22,Vu22);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(u22,Vu22);)
+		ENABLE_SCALAR_IMPLEMENTATION(U(2,1)=Su32.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(u32,Vu32);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(u32,Vu32);)
+		ENABLE_SCALAR_IMPLEMENTATION(U(0,2)=Su13.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(u13,Vu13);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(u13,Vu13);)
+		ENABLE_SCALAR_IMPLEMENTATION(U(1,2)=Su23.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(u23,Vu23);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(u23,Vu23);)
+		ENABLE_SCALAR_IMPLEMENTATION(U(2,2)=Su33.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(u33,Vu33);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(u33,Vu33);)
+
+		ENABLE_SCALAR_IMPLEMENTATION(V(0,0)=Sv11.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(v11,Vv11);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(v11,Vv11);)
+		ENABLE_SCALAR_IMPLEMENTATION(V(1,0)=Sv21.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(v21,Vv21);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(v21,Vv21);)
+		ENABLE_SCALAR_IMPLEMENTATION(V(2,0)=Sv31.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(v31,Vv31);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(v31,Vv31);)
+		ENABLE_SCALAR_IMPLEMENTATION(V(0,1)=Sv12.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(v12,Vv12);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(v12,Vv12);)
+		ENABLE_SCALAR_IMPLEMENTATION(V(1,1)=Sv22.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(v22,Vv22);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(v22,Vv22);)
+		ENABLE_SCALAR_IMPLEMENTATION(V(2,1)=Sv32.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(v32,Vv32);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(v32,Vv32);)
+		ENABLE_SCALAR_IMPLEMENTATION(V(0,2)=Sv13.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(v13,Vv13);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(v13,Vv13);)
+		ENABLE_SCALAR_IMPLEMENTATION(V(1,2)=Sv23.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(v23,Vv23);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(v23,Vv23);)
+		ENABLE_SCALAR_IMPLEMENTATION(V(2,2)=Sv33.f;)                                      ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(v33,Vv33);)                                 ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(v33,Vv33);)
+
+		ENABLE_SCALAR_IMPLEMENTATION(S(0,0)=Sa11.f;)                                   ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(sigma1,Va11);)                              ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(sigma1,Va11);)
+		ENABLE_SCALAR_IMPLEMENTATION(S(1,0)=Sa22.f;)                                   ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(sigma2,Va22);)                              ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(sigma2,Va22);)
+		ENABLE_SCALAR_IMPLEMENTATION(S(2,0)=Sa33.f;)                                   ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(sigma3,Va33);)                              ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(sigma3,Va33);)
+}
+#pragma runtime_checks( "u", restore ) 
+
+// forced instantiation
+template void igl::svd3x3(const Eigen::Matrix<float, 3, 3>& A, Eigen::Matrix<float, 3, 3> &U, Eigen::Matrix<float, 3, 1> &S, Eigen::Matrix<float, 3, 3>&V);
+template void igl::svd3x3<double>(Eigen::Matrix<double, 3, 3, 0, 3, 3> const&, Eigen::Matrix<double, 3, 3, 0, 3, 3>&, Eigen::Matrix<double, 3, 1, 0, 3, 1>&, Eigen::Matrix<double, 3, 3, 0, 3, 3>&);
+// doesn't even make sense with double because this SVD code is only single precision anyway...

+ 40 - 0
include/igl/svd3x3/svd3x3.h

@@ -0,0 +1,40 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_SVD3X3_H
+#define IGL_SVD3X3_H
+#include <igl/igl_inline.h>
+#include <Eigen/Dense>
+
+namespace igl
+{
+  // Super fast 3x3 SVD according to http://pages.cs.wisc.edu/~sifakis/project_pages/svd.html
+  // The resulting decomposition is A = U * diag(S[0], S[1], S[2]) * V'
+  // BEWARE: this SVD algorithm guarantees that det(U) = det(V) = 1, but this 
+  // comes at the cost that 'sigma3' can be negative
+  // for computing polar decomposition it's great because all we need to do is U*V'
+  // and the result will automatically have positive determinant
+  //
+  // Inputs:
+  //   A  3x3 matrix
+  // Outputs:
+  //   U  3x3 left singular vectors
+  //   S  3x1 singular values
+  //   V  3x3 right singular vectors
+  //
+  // Known bugs: this will not work correctly for double precision.
+  template<typename T>
+  IGL_INLINE void svd3x3(
+    const Eigen::Matrix<T, 3, 3>& A, 
+    Eigen::Matrix<T, 3, 3> &U, 
+    Eigen::Matrix<T, 3, 1> &S, 
+    Eigen::Matrix<T, 3, 3>&V);
+}
+#ifdef IGL_HEADER_ONLY
+#  include "svd3x3.cpp"
+#endif
+#endif

+ 102 - 0
include/igl/svd3x3/svd3x3_avx.cpp

@@ -0,0 +1,102 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifdef __AVX__
+#include "svd3x3_avx.h"
+
+#include <cmath>
+#include <algorithm>
+
+#undef USE_SCALAR_IMPLEMENTATION
+#undef USE_SSE_IMPLEMENTATION
+#define USE_AVX_IMPLEMENTATION
+#define COMPUTE_U_AS_MATRIX
+#define COMPUTE_V_AS_MATRIX
+#include "Singular_Value_Decomposition_Preamble.hpp"
+
+#pragma runtime_checks( "u", off )  // disable runtime asserts on xor eax,eax type of stuff (doesn't always work, disable explicitly in compiler settings)
+template<typename T>
+IGL_INLINE void igl::svd3x3_avx(const Eigen::Matrix<T, 3*8, 3>& A, Eigen::Matrix<T, 3*8, 3> &U, Eigen::Matrix<T, 3*8, 1> &S, Eigen::Matrix<T, 3*8, 3>&V)
+{
+	// this code assumes USE_AVX_IMPLEMENTATION is defined 
+	float Ashuffle[9][8], Ushuffle[9][8], Vshuffle[9][8], Sshuffle[3][8];
+	for (int i=0; i<3; i++)
+	{
+		for (int j=0; j<3; j++)
+		{
+			for (int k=0; k<8; k++)
+			{
+				Ashuffle[i + j*3][k] = A(i + 3*k, j);
+			}
+		}
+	}
+
+#include "Singular_Value_Decomposition_Kernel_Declarations.hpp"
+
+		ENABLE_AVX_IMPLEMENTATION(Va11=_mm256_loadu_ps(Ashuffle[0]);)
+		ENABLE_AVX_IMPLEMENTATION(Va21=_mm256_loadu_ps(Ashuffle[1]);)
+		ENABLE_AVX_IMPLEMENTATION(Va31=_mm256_loadu_ps(Ashuffle[2]);)
+		ENABLE_AVX_IMPLEMENTATION(Va12=_mm256_loadu_ps(Ashuffle[3]);)
+		ENABLE_AVX_IMPLEMENTATION(Va22=_mm256_loadu_ps(Ashuffle[4]);)
+		ENABLE_AVX_IMPLEMENTATION(Va32=_mm256_loadu_ps(Ashuffle[5]);)
+		ENABLE_AVX_IMPLEMENTATION(Va13=_mm256_loadu_ps(Ashuffle[6]);)
+		ENABLE_AVX_IMPLEMENTATION(Va23=_mm256_loadu_ps(Ashuffle[7]);)
+		ENABLE_AVX_IMPLEMENTATION(Va33=_mm256_loadu_ps(Ashuffle[8]);)
+
+#include "Singular_Value_Decomposition_Main_Kernel_Body.hpp"
+
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Ushuffle[0],Vu11);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Ushuffle[1],Vu21);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Ushuffle[2],Vu31);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Ushuffle[3],Vu12);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Ushuffle[4],Vu22);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Ushuffle[5],Vu32);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Ushuffle[6],Vu13);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Ushuffle[7],Vu23);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Ushuffle[8],Vu33);)
+
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Vshuffle[0],Vv11);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Vshuffle[1],Vv21);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Vshuffle[2],Vv31);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Vshuffle[3],Vv12);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Vshuffle[4],Vv22);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Vshuffle[5],Vv32);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Vshuffle[6],Vv13);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Vshuffle[7],Vv23);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Vshuffle[8],Vv33);)
+
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Sshuffle[0],Va11);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Sshuffle[1],Va22);)
+		ENABLE_AVX_IMPLEMENTATION(_mm256_storeu_ps(Sshuffle[2],Va33);)
+
+		for (int i=0; i<3; i++)
+		{
+			for (int j=0; j<3; j++)
+			{
+				for (int k=0; k<8; k++)
+				{
+					U(i + 3*k, j) = Ushuffle[i + j*3][k];
+					V(i + 3*k, j) = Vshuffle[i + j*3][k];
+				}
+			}
+		}
+
+		for (int i=0; i<3; i++)
+		{
+			for (int k=0; k<8; k++)
+			{
+				S(i + 3*k, 0) = Sshuffle[i][k];
+			}
+		}
+}
+#pragma runtime_checks( "u", restore )
+
+// forced instantiation
+template void igl::svd3x3_avx(const Eigen::Matrix<float, 3*8, 3>& A, Eigen::Matrix<float, 3*8, 3> &U, Eigen::Matrix<float, 3*8, 1> &S, Eigen::Matrix<float, 3*8, 3>&V);
+// doesn't even make sense with double because the wunder-SVD code is only single precision anyway...
+template void igl::svd3x3_avx<float>(Eigen::Matrix<float, 24, 3, 0, 24, 3> const&, Eigen::Matrix<float, 24, 3, 0, 24, 3>&, Eigen::Matrix<float, 24, 1, 0, 24, 1>&, Eigen::Matrix<float, 24, 3, 0, 24, 3>&);
+#endif

+ 40 - 0
include/igl/svd3x3/svd3x3_avx.h

@@ -0,0 +1,40 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_SVD3X3_AVX_H
+#define IGL_SVD3X3_AVX_H
+#include <igl/igl_inline.h>
+#include <Eigen/Dense>
+
+namespace igl
+{
+  // Super fast 3x3 SVD according to
+  // http://pages.cs.wisc.edu/~sifakis/project_pages/svd.html This is AVX
+  // version of svd3x3 (see svd3x3.h) which works on 8 matrices at a time These
+  // eight matrices are simply stacked in columns, the rest is the same as for
+  // svd3x3
+  //
+  // Inputs:
+  //   A  12 by 3 stack of 3x3 matrices
+  // Outputs:
+  //   U  12x3 left singular vectors stacked
+  //   S  12x1 singular values stacked
+  //   V  12x3 right singular vectors stacked
+  //
+  // Known bugs: this will not work correctly for double precision.
+  template<typename T>
+  IGL_INLINE void svd3x3_avx(
+    const Eigen::Matrix<T, 3*8, 3>& A, 
+    Eigen::Matrix<T, 3*8, 3> &U, 
+    Eigen::Matrix<T, 3*8, 1> &S, 
+    Eigen::Matrix<T, 3*8, 3>&V);
+}
+#ifdef IGL_HEADER_ONLY
+#  include "svd3x3_avx.cpp"
+#endif
+#endif
+

+ 102 - 0
include/igl/svd3x3/svd3x3_sse.cpp

@@ -0,0 +1,102 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifdef __SSE__
+#include "svd3x3_sse.h"
+
+#include <cmath>
+#include <algorithm>
+
+#undef USE_SCALAR_IMPLEMENTATION
+#define USE_SSE_IMPLEMENTATION
+#undef USE_AVX_IMPLEMENTATION
+#define COMPUTE_U_AS_MATRIX
+#define COMPUTE_V_AS_MATRIX
+#include "Singular_Value_Decomposition_Preamble.hpp"
+
+#pragma runtime_checks( "u", off )  // disable runtime asserts on xor eax,eax type of stuff (doesn't always work, disable explicitly in compiler settings)
+template<typename T>
+IGL_INLINE void igl::svd3x3_sse(const Eigen::Matrix<T, 3*4, 3>& A, Eigen::Matrix<T, 3*4, 3> &U, Eigen::Matrix<T, 3*4, 1> &S, Eigen::Matrix<T, 3*4, 3>&V)
+{
+	// this code assumes USE_SSE_IMPLEMENTATION is defined 
+	float Ashuffle[9][4], Ushuffle[9][4], Vshuffle[9][4], Sshuffle[3][4];
+	for (int i=0; i<3; i++)
+	{
+		for (int j=0; j<3; j++)
+		{
+			for (int k=0; k<4; k++)
+			{
+				Ashuffle[i + j*3][k] = A(i + 3*k, j);
+			}
+		}
+	}
+
+#include "Singular_Value_Decomposition_Kernel_Declarations.hpp"
+
+		ENABLE_SSE_IMPLEMENTATION(Va11=_mm_loadu_ps(Ashuffle[0]);)
+		ENABLE_SSE_IMPLEMENTATION(Va21=_mm_loadu_ps(Ashuffle[1]);)
+		ENABLE_SSE_IMPLEMENTATION(Va31=_mm_loadu_ps(Ashuffle[2]);)
+		ENABLE_SSE_IMPLEMENTATION(Va12=_mm_loadu_ps(Ashuffle[3]);)
+		ENABLE_SSE_IMPLEMENTATION(Va22=_mm_loadu_ps(Ashuffle[4]);)
+		ENABLE_SSE_IMPLEMENTATION(Va32=_mm_loadu_ps(Ashuffle[5]);)
+		ENABLE_SSE_IMPLEMENTATION(Va13=_mm_loadu_ps(Ashuffle[6]);)
+		ENABLE_SSE_IMPLEMENTATION(Va23=_mm_loadu_ps(Ashuffle[7]);)
+		ENABLE_SSE_IMPLEMENTATION(Va33=_mm_loadu_ps(Ashuffle[8]);)
+
+#include "Singular_Value_Decomposition_Main_Kernel_Body.hpp"
+
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[0],Vu11);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[1],Vu21);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[2],Vu31);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[3],Vu12);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[4],Vu22);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[5],Vu32);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[6],Vu13);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[7],Vu23);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Ushuffle[8],Vu33);)
+
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[0],Vv11);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[1],Vv21);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[2],Vv31);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[3],Vv12);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[4],Vv22);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[5],Vv32);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[6],Vv13);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[7],Vv23);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Vshuffle[8],Vv33);)
+
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Sshuffle[0],Va11);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Sshuffle[1],Va22);)
+		ENABLE_SSE_IMPLEMENTATION(_mm_storeu_ps(Sshuffle[2],Va33);)
+
+		for (int i=0; i<3; i++)
+		{
+			for (int j=0; j<3; j++)
+			{
+				for (int k=0; k<4; k++)
+				{
+					U(i + 3*k, j) = Ushuffle[i + j*3][k];
+					V(i + 3*k, j) = Vshuffle[i + j*3][k];
+				}
+			}
+		}
+
+		for (int i=0; i<3; i++)
+		{
+			for (int k=0; k<4; k++)
+			{
+				S(i + 3*k, 0) = Sshuffle[i][k];
+			}
+		}
+}
+#pragma runtime_checks( "u", restore )
+
+// forced instantiation
+template void igl::svd3x3_sse(const Eigen::Matrix<float, 3*4, 3>& A, Eigen::Matrix<float, 3*4, 3> &U, Eigen::Matrix<float, 3*4, 1> &S, Eigen::Matrix<float, 3*4, 3>&V);
+//// doesn't even make sense with double because the wunder-SVD code is only single precision anyway...
+//template void wunderSVD3x3_SSE<float>(Eigen::Matrix<float, 12, 3, 0, 12, 3> const&, Eigen::Matrix<float, 12, 3, 0, 12, 3>&, Eigen::Matrix<float, 12, 1, 0, 12, 1>&, Eigen::Matrix<float, 12, 3, 0, 12, 3>&);
+#endif

+ 37 - 0
include/igl/svd3x3/svd3x3_sse.h

@@ -0,0 +1,37 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_SVD3X3_SSE_H
+#define IGL_SVD3X3_SSE_H
+#include <igl/igl_inline.h>
+#include <Eigen/Dense>
+
+namespace igl
+{
+  // Super fast 3x3 SVD according to http://pages.cs.wisc.edu/~sifakis/project_pages/svd.html
+  // This is SSE version of svd3x3 (see svd3x3.h) which works on 4 matrices at a time
+  // These four matrices are simply stacked in columns, the rest is the same as for svd3x3
+  //
+  // Inputs:
+  //   A  12 by 3 stack of 3x3 matrices
+  // Outputs:
+  //   U  12x3 left singular vectors stacked
+  //   S  12x1 singular values stacked
+  //   V  12x3 right singular vectors stacked
+  //
+  // Known bugs: this will not work correctly for double precision.
+  template<typename T>
+  IGL_INLINE void svd3x3_sse(
+    const Eigen::Matrix<T, 3*4, 3>& A, 
+    Eigen::Matrix<T, 3*4, 3> &U, 
+    Eigen::Matrix<T, 3*4, 1> &S, 
+    Eigen::Matrix<T, 3*4, 3>&V);
+}
+#ifdef IGL_HEADER_ONLY
+#  include "svd3x3_sse.cpp"
+#endif
+#endif

+ 429 - 0
include/igl/uniformly_sample_two_manifold.cpp

@@ -0,0 +1,429 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#include "uniformly_sample_two_manifold.h"
+#include "verbose.h"
+#include "slice.h"
+#include "colon.h"
+#include "all_pairs_distances.h"
+#include "mat_max.h"
+#include "vf.h"
+#include "get_seconds.h"
+#include "cat.h"
+//#include "MT19937.h"
+#include "partition.h"
+
+//////////////////////////////////////////////////////////////////////////////
+// Helper functions
+//////////////////////////////////////////////////////////////////////////////
+
+IGL_INLINE void igl::uniformly_sample_two_manifold(
+  const Eigen::MatrixXd & W,
+  const Eigen::MatrixXi & F, 
+  const int k, 
+  const double push,
+  Eigen::MatrixXd & WS)
+{
+  using namespace Eigen;
+  using namespace igl;
+  using namespace std;
+
+  // Euclidean distance between two points on a mesh given as barycentric
+  // coordinates
+  // Inputs:
+  //   W  #W by dim positions of mesh in weight space
+  //   F  #F by 3 indices of triangles
+  //   face_A  face index where 1st point lives
+  //   bary_A  barycentric coordinates of 1st point on face_A
+  //   face_B  face index where 2nd point lives
+  //   bary_B  barycentric coordinates of 2nd point on face_B
+  // Returns distance in euclidean space
+  const auto & bary_dist = [] (
+    const Eigen::MatrixXd & W,
+    const Eigen::MatrixXi & F,
+    const int face_A,
+    const Eigen::Vector3d & bary_A,
+    const int face_B,
+    const Eigen::Vector3d & bary_B) -> double
+  {
+    return
+      ((bary_A(0)*W.row(F(face_A,0)) +
+        bary_A(1)*W.row(F(face_A,1)) +
+        bary_A(2)*W.row(F(face_A,2)))
+        -
+        (bary_B(0)*W.row(F(face_B,0)) +
+        bary_B(1)*W.row(F(face_B,1)) +
+        bary_B(2)*W.row(F(face_B,2)))).norm();
+  };
+
+  // Base case if F is a tet list, find all faces and pass as non-manifold
+  // triangle mesh
+  if(F.cols() == 4)
+  {
+    verbose("uniform_sample.h: sampling tet mesh\n");
+    MatrixXi T0 = F.col(0);
+    MatrixXi T1 = F.col(1);
+    MatrixXi T2 = F.col(2);
+    MatrixXi T3 = F.col(3);
+    // Faces from tets
+    MatrixXi TF =
+      cat(1,
+        cat(1,
+          cat(2,T0, cat(2,T1,T2)),
+          cat(2,T0, cat(2,T2,T3))),
+        cat(1,
+          cat(2,T0, cat(2,T3,T1)),
+          cat(2,T1, cat(2,T3,T2)))
+      );
+    assert(TF.rows() == 4*F.rows());
+    assert(TF.cols() == 3);
+    uniformly_sample_two_manifold(W,TF,k,push,WS);
+    return;
+  }
+
+  double start = get_seconds();
+
+  VectorXi S;
+  // First get sampling as best as possible on mesh
+  uniformly_sample_two_manifold_at_vertices(W,k,push,S);
+  verbose("Lap: %g\n",get_seconds()-start);
+  slice(W,S,colon<int>(0,W.cols()-1),WS);
+  //cout<<"WSmesh=["<<endl<<WS<<endl<<"];"<<endl;
+
+//#ifdef EXTREME_VERBOSE
+  //cout<<"S=["<<endl<<S<<endl<<"];"<<endl;
+//#endif
+
+  // Build map from vertices to list of incident faces
+  vector<vector<int> > VF,VFi;
+  vf(W,F,VF,VFi);
+
+  // List of list of face indices, for each sample gives index to face it is on
+  vector<vector<int> > sample_faces; sample_faces.resize(k);
+  // List of list of barycentric coordinates, for each sample gives b-coords in
+  // face its on
+  vector<vector<Eigen::Vector3d> > sample_barys; sample_barys.resize(k);
+  // List of current maxmins amongst samples
+  vector<int> cur_maxmin; cur_maxmin.resize(k);
+  // List of distance matrices, D(i)(s,j) reveals distance from i's sth sample
+  // to jth seed if j<k or (j-k)th "pushed" corner
+  vector<MatrixXd> D; D.resize(k);
+
+  // Precompute an W.cols() by W.cols() identity matrix
+  MatrixXd I(MatrixXd::Identity(W.cols(),W.cols()));
+
+  // Describe each seed as a face index and barycentric coordinates
+  for(int i = 0;i < k;i++)
+  {
+    // Unreferenced vertex?
+    assert(VF[S(i)].size() > 0);
+    sample_faces[i].push_back(VF[S(i)][0]);
+    // We're right on a face vertex so barycentric coordinates are 0, but 1 at
+    // that vertex
+    Eigen::Vector3d bary(0,0,0);
+    bary( VFi[S(i)][0] ) = 1;
+    sample_barys[i].push_back(bary);
+    // initialize this to current maxmin
+    cur_maxmin[i] = 0;
+  }
+
+  // initialize radius
+  double radius = 1.0;
+  // minimum radius (bound on precision)
+  //double min_radius = 1e-5;
+  double min_radius = 1e-5;
+  int max_num_rand_samples_per_triangle = 100;
+  int max_sample_attempts_per_triangle = 1000;
+  // Max number of outer iterations for a given radius
+  int max_iters = 1000;
+
+  // continue iterating until radius is smaller than some threshold
+  while(radius > min_radius)
+  {
+    // initialize each seed
+    for(int i = 0;i < k;i++)
+    {
+      // Keep track of cur_maxmin data
+      int face_i = sample_faces[i][cur_maxmin[i]];
+      Eigen::Vector3d bary(sample_barys[i][cur_maxmin[i]]);
+      // Find index in face of closest mesh vertex (on this face)
+      int index_in_face = 
+        (bary(0) > bary(1) ? (bary(0) > bary(2) ? 0 : 2)
+                           : (bary(1) > bary(2) ? 1 : 2));
+      // find closest mesh vertex
+      int vertex_i = F(face_i,index_in_face);
+      // incident triangles
+      vector<int> incident_F = VF[vertex_i];
+      // We're going to try to place num_rand_samples_per_triangle samples on
+      // each sample *after* this location
+      sample_barys[i].clear();
+      sample_faces[i].clear();
+      cur_maxmin[i] = 0;
+      sample_barys[i].push_back(bary);
+      sample_faces[i].push_back(face_i);
+      // Current seed location in weight space
+      VectorXd seed = 
+        bary(0)*W.row(F(face_i,0)) +
+        bary(1)*W.row(F(face_i,1)) +
+        bary(2)*W.row(F(face_i,2));
+#ifdef EXTREME_VERBOSE
+      verbose("i: %d\n",i);
+      verbose("face_i: %d\n",face_i);
+      //cout<<"bary: "<<bary<<endl;
+      verbose("index_in_face: %d\n",index_in_face);
+      verbose("vertex_i: %d\n",vertex_i);
+      verbose("incident_F.size(): %d\n",incident_F.size());
+      //cout<<"seed: "<<seed<<endl;
+#endif
+      // loop over indcident triangles
+      for(int f=0;f<(int)incident_F.size();f++)
+      {
+#ifdef EXTREME_VERBOSE
+        verbose("incident_F[%d]: %d\n",f,incident_F[f]);
+#endif
+        int face_f = incident_F[f];
+        int num_samples_f = 0;
+        for(int s=0;s<max_sample_attempts_per_triangle;s++)
+        {
+          // Randomly sample unit square
+          double u,v;
+//		  double ru = fgenrand();
+//		  double rv = fgenrand();
+          double ru = (double)rand() / RAND_MAX;
+          double rv = (double)rand() / RAND_MAX;
+          // Reflect to lower triangle if above
+          if((ru+rv)>1)
+          {
+            u = 1-rv;
+            v = 1-ru;
+          }else
+          {
+            u = ru;
+            v = rv;
+          }
+          Eigen::Vector3d sample_bary(u,v,1-u-v);
+          double d = bary_dist(W,F,face_i,bary,face_f,sample_bary);
+          // check that sample is close enough
+          if(d<radius)
+          {
+            // add sample to list
+            sample_faces[i].push_back(face_f);
+            sample_barys[i].push_back(sample_bary);
+            num_samples_f++;
+          }
+          // Keep track of which random samples came from which face
+          if(num_samples_f >= max_num_rand_samples_per_triangle)
+          {
+#ifdef EXTREME_VERBOSE
+            verbose("Reached maximum number of samples per face\n");
+#endif
+            break;
+          }
+          if(s==(max_sample_attempts_per_triangle-1))
+          {
+#ifdef EXTREME_VERBOSE
+            verbose("Reached maximum sample attempts per triangle\n");
+#endif
+          }
+        }
+#ifdef EXTREME_VERBOSE
+        verbose("sample_faces[%d].size(): %d\n",i,sample_faces[i].size());
+        verbose("sample_barys[%d].size(): %d\n",i,sample_barys[i].size());
+#endif
+      }
+    }
+
+    // Precompute distances from each seed's random samples to each "pushed"
+    // corner
+    // Put -1 in entries corresponding distance of a seed's random samples to
+    // self
+    // Loop over seeds
+    for(int i = 0;i < k;i++)
+    {
+      // resize distance matrix for new samples
+      D[i].resize(sample_faces[i].size(),k+W.cols());
+      // Loop over i's samples
+      for(int s = 0;s<(int)sample_faces[i].size();s++)
+      {
+        int sample_face = sample_faces[i][s];
+        Eigen::Vector3d sample_bary = sample_barys[i][s];
+        // Loop over other seeds
+        for(int j = 0;j < k;j++)
+        {
+          // distance from sample(i,s) to seed j
+          double d;
+          if(i==j)
+          {
+            // phony self distance: Ilya's idea of infinite
+            d = 10;
+          }else
+          {
+            int seed_j_face = sample_faces[j][cur_maxmin[j]];
+            Eigen::Vector3d seed_j_bary(sample_barys[j][cur_maxmin[j]]);
+            d = bary_dist(W,F,sample_face,sample_bary,seed_j_face,seed_j_bary);
+          }
+          D[i](s,j) = d;
+        }
+        // Loop over corners
+        for(int j = 0;j < W.cols();j++)
+        {
+          // distance from sample(i,s) to corner j
+          double d = 
+            ((sample_bary(0)*W.row(F(sample_face,0)) +
+              sample_bary(1)*W.row(F(sample_face,1)) +
+              sample_bary(2)*W.row(F(sample_face,2))) 
+              - I.row(j)).norm()/push;
+          // append after distances to seeds
+          D[i](s,k+j) = d;
+        }
+      }
+    }
+
+    int iters = 0;
+    while(true)
+    {
+      bool has_changed = false;
+      // try to move each seed
+      for(int i = 0;i < k;i++)
+      {
+        // for each sample look at distance to closest seed/corner
+        VectorXd minD = D[i].rowwise().minCoeff();
+        assert(minD.size() == (int)sample_faces[i].size());
+        // find random sample with maximum minimum distance to other seeds
+        int old_cur_maxmin = cur_maxmin[i];
+        double max_min = -2;
+        for(int s = 0;s<(int)sample_faces[i].size();s++)
+        {
+          if(max_min < minD(s))
+          {
+            max_min = minD(s);
+            // Set this as the new seed location
+            cur_maxmin[i] = s;
+          }
+        }
+#ifdef EXTREME_VERBOSE
+        verbose("max_min: %g\n",max_min);
+        verbose("cur_maxmin[%d]: %d->%d\n",i,old_cur_maxmin,cur_maxmin[i]);
+#endif
+        // did location change?
+        has_changed |= (old_cur_maxmin!=cur_maxmin[i]);
+        // update distances of random samples of other seeds
+      }
+      // if no seed moved, exit
+      if(!has_changed)
+      {
+        break;
+      }
+      iters++;
+      if(iters>=max_iters)
+      {
+        verbose("Hit max iters (%d) before converging\n",iters);
+      }
+    }
+    // shrink radius
+    //radius *= 0.9;
+    //radius *= 0.99;
+    radius *= 0.9;
+  }
+  // Collect weight space locations
+  WS.resize(k,W.cols());
+  for(int i = 0;i<k;i++)
+  {
+    int face_i = sample_faces[i][cur_maxmin[i]];
+    Eigen::Vector3d bary(sample_barys[i][cur_maxmin[i]]);
+    WS.row(i) =
+        bary(0)*W.row(F(face_i,0)) +
+        bary(1)*W.row(F(face_i,1)) +
+        bary(2)*W.row(F(face_i,2));
+  }
+  verbose("Lap: %g\n",get_seconds()-start);
+  //cout<<"WSafter=["<<endl<<WS<<endl<<"];"<<endl;
+}
+
+IGL_INLINE void igl::uniformly_sample_two_manifold_at_vertices(
+  const Eigen::MatrixXd & OW,
+  const int k, 
+  const double push,
+  Eigen::VectorXi & S)
+{
+  using namespace Eigen;
+  using namespace igl;
+  using namespace std;
+
+  // Copy weights and faces
+  const MatrixXd & W = OW;
+  /*const MatrixXi & F = OF;*/
+
+  // Initialize seeds
+  VectorXi G;
+  Matrix<double,Dynamic,1>  ignore;
+  partition(W,k+W.cols(),G,S,ignore);
+  // Remove corners, which better be at top
+  S = S.segment(W.cols(),k).eval();
+
+  MatrixXd WS;
+  slice(W,S,colon<int>(0,W.cols()-1),WS);
+  //cout<<"WSpartition=["<<endl<<WS<<endl<<"];"<<endl;
+
+  // number of vertices
+  int n = W.rows();
+  // number of dimensions in weight space
+  int m = W.cols();
+  // Corners of weight space
+  MatrixXd I = MatrixXd::Identity(m,m);
+  // append corners to bottom of weights
+  MatrixXd WI(n+m,m);
+  WI << W,I;
+  // Weights at seeds and corners
+  MatrixXd WSC(k+m,m);
+  for(int i = 0;i<k;i++)
+  {
+    WSC.row(i) = W.row(S(i));
+  }
+  for(int i = 0;i<m;i++)
+  {
+    WSC.row(i+k) = WI.row(n+i);
+  }
+  // initialize all pairs sqaured distances
+  MatrixXd sqrD;
+  all_pairs_distances(WI,WSC,true,sqrD);
+  // bring in corners by push factor (squared because distances are squared)
+  sqrD.block(0,k,sqrD.rows(),m) /= push*push;
+
+  int max_iters = 30;
+  int j = 0;
+  for(;j<max_iters;j++)
+  {
+    bool has_changed = false;
+    // loop over seeds
+    for(int i =0;i<k;i++)
+    {
+      int old_si = S(i);
+      // set distance to ilya's idea of infinity
+      sqrD.col(i).setZero();
+      sqrD.col(i).array() += 10;
+      // find vertex farthers from all other seeds
+      MatrixXd minsqrD = sqrD.rowwise().minCoeff();
+      MatrixXd::Index si,PHONY;
+      minsqrD.maxCoeff(&si,&PHONY);
+      MatrixXd Wsi = W.row(si);
+      MatrixXd sqrDi;
+      all_pairs_distances(WI,Wsi,true,sqrDi);
+      sqrD.col(i) = sqrDi;
+      S(i) = si;
+      has_changed |= si!=old_si;
+    }
+    if(j == max_iters)
+    {
+      verbose("uniform_sample.h: Warning: hit max iters\n");
+    }
+    if(!has_changed)
+    {
+      break;
+    }
+  }
+}

+ 42 - 0
include/igl/uniformly_sample_two_manifold.h

@@ -0,0 +1,42 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_UNIFORMLY_SAMPLE_TWO_MANIFOLD_H
+#define IGL_UNIFORMLY_SAMPLE_TWO_MANIFOLD_H
+#include "igl_inline.h"
+#include <Eigen/Dense>
+namespace igl
+{
+  // UNIFORMLY_SAMPLE_TWO_MANIFOLD Attempt to sample a mesh uniformly by furthest
+  // point relaxation as described in "Fast Automatic Skinning Transformations"
+  // [Jacobson et al. 12] Section 3.3.
+  //
+  // Inputs:
+  //   W  #W by dim positions of mesh in weight space
+  //   F  #F by 3 indices of triangles
+  //   k  number of samplse
+  //   push  factor by which corners should be pushed away
+  // Outputs
+  //   WS  k by dim locations in weights space
+  //
+  IGL_INLINE void uniformly_sample_two_manifold(
+    const Eigen::MatrixXd & W,
+    const Eigen::MatrixXi & F, 
+    const int k, 
+    const double push,
+    Eigen::MatrixXd & WS);
+  // Find uniform sampling up to placing samples on mesh vertices
+  IGL_INLINE void uniformly_sample_two_manifold_at_vertices(
+    const Eigen::MatrixXd & OW,
+    const int k, 
+    const double push,
+    Eigen::VectorXi & S);
+}
+#ifdef IGL_HEADER_ONLY
+#  include "uniformly_sample_two_manifold.h"
+#endif
+#endif