Browse Source

some performance optimization

Former-commit-id: 1fcdb64dd45ae4ea3b03935233c15fcdd7568e1a
Alec Jacobson 10 years ago
parent
commit
4a382e039a

+ 1 - 1
include/igl/all_pairs_distances.cpp

@@ -23,7 +23,7 @@ IGL_INLINE void igl::all_pairs_distances(
   {
     for(int j=0;j<U.rows();j++)
     {
-      D(i,j) = (V.row(i)-U.row(j)).array().pow(2).sum();
+      D(i,j) = (V.row(i)-U.row(j)).squaredNorm();
       if(!squared)
       {
         D(i,j) = sqrt(D(i,j));

+ 1 - 1
include/igl/boundary_conditions.cpp

@@ -53,7 +53,7 @@ IGL_INLINE bool igl::boundary_conditions(
       // double sqrd = (V.row(i)-pos).array().pow(2).sum();
       // Must first store in temporary
       VectorXd vi = V.row(i);
-      double sqrd = (vi-pos).array().pow(2).sum();
+      double sqrd = (vi-pos).squaredNorm();
       if(sqrd <= FLOAT_EPS)
       {
         //cout<<"sum((["<<

+ 6 - 0
include/igl/cgal/point_mesh_squared_distance.cpp

@@ -45,6 +45,7 @@ IGL_INLINE void igl::point_mesh_squared_distance_precompute(
   using namespace std;
 
   typedef CGAL::Triangle_3<Kernel> Triangle_3; 
+  typedef CGAL::Point_3<Kernel> Point_3; 
   typedef typename std::vector<Triangle_3>::iterator Iterator;
   typedef CGAL::AABB_triangle_primitive<Kernel, Iterator> Primitive;
   typedef CGAL::AABB_traits<Kernel, Primitive> AABB_triangle_traits;
@@ -59,6 +60,11 @@ IGL_INLINE void igl::point_mesh_squared_distance_precompute(
   tree.clear();
   tree.insert(T.begin(),T.end());
   tree.accelerate_distance_queries();
+  // accelerate_distance_queries doesn't seem actually to do _all_ of the
+  // precomputation. the tree (despite being const) will still do more
+  // precomputation and reorganizing on the first call of `closest_point` or
+  // `closest_point_and_primitive`. Therefor, call it once here.
+  tree.closest_point_and_primitive(Point_3(0,0,0));
 }
 
 template <typename Kernel>

+ 49 - 48
include/igl/cgal/signed_distance.cpp

@@ -9,6 +9,7 @@
 #include "../per_vertex_normals.h"
 #include "../per_edge_normals.h"
 #include "../per_face_normals.h"
+#include "../get_seconds.h"
 #include "point_mesh_squared_distance.h"
 
 
@@ -62,7 +63,7 @@ IGL_INLINE void igl::signed_distance(
       per_face_normals(V,F,FN);
       per_vertex_normals(V,F,PER_VERTEX_NORMALS_WEIGHTING_TYPE_ANGLE,FN,VN);
       per_edge_normals(
-        V,F,PER_EDGE_NORMALS_WEIGHTING_TYPE_UNIFORM,EN,E,EMAP);
+        V,F,PER_EDGE_NORMALS_WEIGHTING_TYPE_UNIFORM,FN,EN,E,EMAP);
       N.resize(P.rows(),3);
       break;
   }
@@ -167,53 +168,53 @@ IGL_INLINE void igl::signed_distance_pseudonormal(
   typedef typename Tree::Point_and_primitive_id Point_and_primitive_id;
 
   pp = tree.closest_point_and_primitive(q);
-  Point_3 & p = pp.first;
-  const auto & qp = q-p;
-  sqrd = qp.squared_length();
-  Vector3d v(qp.x(),qp.y(),qp.z());
-  const int f = pp.second - T.begin();
-  const Triangle_3 & t = *pp.second;
-  // barycentric coordinates
-  const auto & area = [&p,&t](const int i, const int j)->FT
-  {
-    return sqrt(Triangle_3(p,t.vertex(i),t.vertex(j)).squared_area());
-  };
-  Vector3d b(area(1,2),area(2,0),area(0,1));
-  b /= b.sum();
-  // Determine which normal to use
-  const double epsilon = 1e-12;
-  const int type = (b.array()<=epsilon).cast<int>().sum();
-  switch(type)
-  {
-    case 2:
-      // Find vertex
-      for(int c = 0;c<3;c++)
-      {
-        if(b(c)>epsilon)
-        {
-          n = VN.row(F(f,c));
-          break;
-        }
-      }
-      break;
-    case 1:
-      // Find edge
-      for(int c = 0;c<3;c++)
-      {
-        if(b(c)<=epsilon)
-        {
-          n = EN.row(EMAP(F.rows()*c+f));
-          break;
-        }
-      }
-      break;
-    default:
-      assert(false && "all barycentric coords zero.");
-    case 0:
-      n = FN.row(f);
-      break;
-  }
-  s = (v.dot(n) >= 0 ? 1. : -1.);
+  //Point_3 & p = pp.first;
+  //const auto & qp = q-p;
+  //sqrd = qp.squared_length();
+  //Vector3d v(qp.x(),qp.y(),qp.z());
+  //const int f = pp.second - T.begin();
+  //const Triangle_3 & t = *pp.second;
+  //// barycentric coordinates
+  //const auto & area = [&p,&t](const int i, const int j)->FT
+  //{
+  //  return sqrt(Triangle_3(p,t.vertex(i),t.vertex(j)).squared_area());
+  //};
+  //Vector3d b(area(1,2),area(2,0),area(0,1));
+  //b /= b.sum();
+  //// Determine which normal to use
+  //const double epsilon = 1e-12;
+  //const int type = (b.array()<=epsilon).cast<int>().sum();
+  //switch(type)
+  //{
+  //  case 2:
+  //    // Find vertex
+  //    for(int c = 0;c<3;c++)
+  //    {
+  //      if(b(c)>epsilon)
+  //      {
+  //        n = VN.row(F(f,c));
+  //        break;
+  //      }
+  //    }
+  //    break;
+  //  case 1:
+  //    // Find edge
+  //    for(int c = 0;c<3;c++)
+  //    {
+  //      if(b(c)<=epsilon)
+  //      {
+  //        n = EN.row(EMAP(F.rows()*c+f));
+  //        break;
+  //      }
+  //    }
+  //    break;
+  //  default:
+  //    assert(false && "all barycentric coords zero.");
+  //  case 0:
+  //    n = FN.row(f);
+  //    break;
+  //}
+  //s = (v.dot(n) >= 0 ? 1. : -1.);
 }
 
 template <typename Kernel>

+ 5 - 0
include/igl/doublearea.cpp

@@ -124,6 +124,11 @@ IGL_INLINE void igl::doublearea(
   assert(s.rows() == m);
   // resize output
   dblA.resize(l.rows(),1);
+  // Minimum number of iterms per openmp thread
+  #ifndef IGL_OMP_MIN_VALUE
+  #  define IGL_OMP_MIN_VALUE 1000
+  #endif
+  #pragma omp parallel for if (m>IGL_OMP_MIN_VALUE)
   for(int i = 0;i<m;i++)
   {
     //// Heron's formula for area

+ 18 - 12
include/igl/edge_lengths.cpp

@@ -15,6 +15,11 @@ IGL_INLINE void igl::edge_lengths(
   Eigen::PlainObjectBase<DerivedL>& L)
 {
   using namespace std;
+  const int m = F.rows();
+  // Minimum number of iterms per openmp thread
+#ifndef IGL_OMP_MIN_VALUE
+#  define IGL_OMP_MIN_VALUE 1000
+#endif
   switch(F.cols())
   {
     case 2:
@@ -28,29 +33,30 @@ IGL_INLINE void igl::edge_lengths(
     }
     case 3:
     {
-      L.resize(F.rows(),3);
+      L.resize(m,3);
       // loop over faces
-      for(int i = 0;i<F.rows();i++)
+      #pragma omp parallel for if (m>IGL_OMP_MIN_VALUE)
+      for(int i = 0;i<m;i++)
       {
-        L(i,0) = sqrt((V.row(F(i,1))-V.row(F(i,2))).array().pow(2).sum());
-        L(i,1) = sqrt((V.row(F(i,2))-V.row(F(i,0))).array().pow(2).sum());
-        L(i,2) = sqrt((V.row(F(i,0))-V.row(F(i,1))).array().pow(2).sum());
+        L(i,0) = (V.row(F(i,1))-V.row(F(i,2))).norm();
+        L(i,1) = (V.row(F(i,2))-V.row(F(i,0))).norm();
+        L(i,2) = (V.row(F(i,0))-V.row(F(i,1))).norm();
       }
       break;
     }
     case 4:
     {
-      const int m = F.rows();
       L.resize(m,6);
       // loop over faces
+      #pragma omp parallel for if (m>IGL_OMP_MIN_VALUE)
       for(int i = 0;i<m;i++)
       {
-        L(i,0) = sqrt((V.row(F(i,3))-V.row(F(i,0))).array().pow(2).sum());
-        L(i,1) = sqrt((V.row(F(i,3))-V.row(F(i,1))).array().pow(2).sum());
-        L(i,2) = sqrt((V.row(F(i,3))-V.row(F(i,2))).array().pow(2).sum());
-        L(i,3) = sqrt((V.row(F(i,1))-V.row(F(i,2))).array().pow(2).sum());
-        L(i,4) = sqrt((V.row(F(i,2))-V.row(F(i,0))).array().pow(2).sum());
-        L(i,5) = sqrt((V.row(F(i,0))-V.row(F(i,1))).array().pow(2).sum());
+        L(i,0) = (V.row(F(i,3))-V.row(F(i,0))).norm();
+        L(i,1) = (V.row(F(i,3))-V.row(F(i,1))).norm();
+        L(i,2) = (V.row(F(i,3))-V.row(F(i,2))).norm();
+        L(i,3) = (V.row(F(i,1))-V.row(F(i,2))).norm();
+        L(i,4) = (V.row(F(i,2))-V.row(F(i,0))).norm();
+        L(i,5) = (V.row(F(i,0))-V.row(F(i,1))).norm();
       }
       break;
     }

+ 25 - 7
include/igl/internal_angles.cpp

@@ -7,6 +7,7 @@
 // obtain one at http://mozilla.org/MPL/2.0/.
 #include "internal_angles.h"
 #include "edge_lengths.h"
+#include "get_seconds.h"
 
 template <typename DerivedV, typename DerivedF, typename DerivedK>
 IGL_INLINE void igl::internal_angles(
@@ -15,6 +16,7 @@ IGL_INLINE void igl::internal_angles(
   Eigen::PlainObjectBase<DerivedK> & K)
 {
   using namespace Eigen;
+  using namespace std;
   // Edge lengths
   Matrix<
     typename DerivedV::Scalar,
@@ -23,7 +25,7 @@ IGL_INLINE void igl::internal_angles(
   edge_lengths(V,F,L);
 
   assert(F.cols() == 3 && "F should contain triangles");
-  return internal_angles(L,K);
+  internal_angles(L,K);
 }
 
 template <typename DerivedL, typename DerivedK>
@@ -32,13 +34,29 @@ IGL_INLINE void igl::internal_angles(
   Eigen::PlainObjectBase<DerivedK> & K)
 {
   assert(L.cols() == 3 && "Edge-lengths should come from triangles");
-  K.resize(L.rows(),L.cols());
-  for(int d = 0;d<3;d++)
+  const size_t m = L.rows();
+  K.resize(m,3);
+  //for(int d = 0;d<3;d++)
+  //{
+  //  const auto & s1 = L.col(d).array();
+  //  const auto & s2 = L.col((d+1)%3).array();
+  //  const auto & s3 = L.col((d+2)%3).array();
+  //  K.col(d) = ((s3.square() + s2.square() - s1.square())/(2.*s3*s2)).acos();
+  //}
+  // Minimum number of iterms per openmp thread
+  #ifndef IGL_OMP_MIN_VALUE
+  #  define IGL_OMP_MIN_VALUE 1000
+  #endif
+  #pragma omp parallel for if (m>IGL_OMP_MIN_VALUE)
+  for(int f = 0;f<m;f++)
   {
-    const auto & s1 = L.col(d).array();
-    const auto & s2 = L.col((d+1)%3).array();
-    const auto & s3 = L.col((d+2)%3).array();
-    K.col(d) = ((s3.square() + s2.square() - s1.square())/(2.*s3*s2)).acos();
+    for(int d = 0;d<3;d++)
+    {
+      const auto & s1 = L(f,d);
+      const auto & s2 = L(f,(d+1)%3);
+      const auto & s3 = L(f,(d+2)%3);
+      K(f,d) = acos((s3*s3 + s2*s2 - s1*s1)/(2.*s3*s2));
+    }
   }
 }
 

+ 3 - 3
include/igl/massmatrix.cpp

@@ -47,9 +47,9 @@ IGL_INLINE void igl::massmatrix(
     // loop over faces
     for(int i = 0;i<m;i++)
     {
-      l(i,0) = sqrt((V.row(F(i,1))-V.row(F(i,2))).array().pow(2).sum());
-      l(i,1) = sqrt((V.row(F(i,2))-V.row(F(i,0))).array().pow(2).sum());
-      l(i,2) = sqrt((V.row(F(i,0))-V.row(F(i,1))).array().pow(2).sum());
+      l(i,0) = (V.row(F(i,1))-V.row(F(i,2))).norm();
+      l(i,1) = (V.row(F(i,2))-V.row(F(i,0))).norm();
+      l(i,2) = (V.row(F(i,0))-V.row(F(i,1))).norm();
     }
     // semiperimeters
     Matrix<Scalar,Dynamic,1> s = l.rowwise().sum()*0.5;

+ 11 - 5
include/igl/per_edge_normals.cpp

@@ -1,6 +1,7 @@
 #include "all_edges.h"
 #include "doublearea.h"
 #include "per_edge_normals.h"
+#include "get_seconds.h"
 #include "per_face_normals.h"
 #include "unique_simplices.h"
 #include <vector>
@@ -36,11 +37,11 @@ IGL_INLINE void igl::per_edge_normals(
   // now sort(allE,2) == E(EMAP,:), that is, if EMAP(i) = j, then E.row(j) is
   // the undirected edge corresponding to the directed edge allE.row(i).
 
-  Eigen::VectorXd W(F.rows());
+  Eigen::VectorXd W;
   switch(weighting)
   {
     case PER_EDGE_NORMALS_WEIGHTING_TYPE_UNIFORM:
-      W.setConstant(1.);
+      // Do nothing
       break;
     default:
       assert(false && "Unknown weighting type");
@@ -52,15 +53,20 @@ IGL_INLINE void igl::per_edge_normals(
     }
   }
 
-  N.setConstant(E.rows(),3,0);
+  N.setZero(E.rows(),3);
   for(int f = 0;f<m;f++)
   {
     for(int c = 0;c<3;c++)
     {
-      N.row(EMAP(f+c*m)) += W(f) * FN.row(f);
+      if(weighting == PER_EDGE_NORMALS_WEIGHTING_TYPE_UNIFORM)
+      {
+        N.row(EMAP(f+c*m)) += FN.row(f);
+      }else
+      {
+        N.row(EMAP(f+c*m)) += W(f) * FN.row(f);
+      }
     }
   }
-  N.rowwise().normalize();
 }
 
 template <

+ 4 - 0
include/igl/per_vertex_normals.cpp

@@ -7,6 +7,7 @@
 // obtain one at http://mozilla.org/MPL/2.0/.
 #include "per_vertex_normals.h"
 
+#include "get_seconds.h"
 #include "per_face_normals.h"
 #include "doublearea.h"
 #include "internal_angles.h"
@@ -40,6 +41,9 @@ IGL_INLINE void igl::per_vertex_normals(
   const Eigen::PlainObjectBase<DerivedFN>& FN,
   Eigen::PlainObjectBase<DerivedN> & N)
 {
+  using namespace std;
+  double t_start;
+  t_start = get_seconds();
   // Resize for output
   N.setZero(V.rows(),3);
 

+ 2 - 2
include/igl/project_to_line.cpp

@@ -33,7 +33,7 @@ IGL_INLINE void igl::project_to_line(
   int np  = P.rows();
   // vector from source to destination
   MatL DmS = D-S;
-  double v_sqrlen = (double)(DmS.array().pow(2).sum());
+  double v_sqrlen = (double)(DmS.squaredNorm());
   assert(v_sqrlen != 0);
   // resize output
   t.resize(np,1);
@@ -48,7 +48,7 @@ IGL_INLINE void igl::project_to_line(
     t(i) = -(DmS.array()*SmPi.array()).sum() / v_sqrlen;
     // P projected onto line
     MatL projP = (1-t(i))*S + t(i)*D;
-    sqrD(i) = (Pi-projP).array().pow(2).sum();
+    sqrD(i) = (Pi-projP).squaredNorm();
   }
 }
 

+ 4 - 0
include/igl/sortrows.cpp

@@ -6,6 +6,7 @@
 // v. 2.0. If a copy of the MPL was not distributed with this file, You can 
 // obtain one at http://mozilla.org/MPL/2.0/.
 #include "sortrows.h"
+#include "get_seconds.h"
 
 #include "SortableRow.h"
 #include "sort.h"
@@ -55,6 +56,9 @@ IGL_INLINE void igl::sortrows(
   Eigen::PlainObjectBase<DerivedX>& Y,
   Eigen::PlainObjectBase<DerivedIX>& IX)
 {
+  // This is already 2x faster than matlab's builtin `sortrows`. I have tried
+  // implementing a "multiple-pass" sort on each column, but see no performance
+  // improvement.
   using namespace std;
   using namespace Eigen;
   using namespace igl;

+ 1 - 0
include/igl/unique.cpp

@@ -12,6 +12,7 @@
 #include "sortrows.h"
 #include "list_to_matrix.h"
 #include "matrix_to_list.h"
+#include "get_seconds.h"
 
 #include <algorithm>
 #include <iostream>

+ 9 - 1
include/igl/unique_simplices.cpp

@@ -8,6 +8,7 @@
 #include "unique_simplices.h"
 #include "sort.h"
 #include "unique.h"
+#include "get_seconds.h"
 
 template <
   typename DerivedF,
@@ -22,6 +23,7 @@ IGL_INLINE void igl::unique_simplices(
 {
   using namespace Eigen;
   using namespace igl;
+  using namespace std;
   // Sort each face
   MatrixXi sortF, unusedI;
   igl::sort(F,2,true,sortF,unusedI);
@@ -29,8 +31,14 @@ IGL_INLINE void igl::unique_simplices(
   MatrixXi C;
   igl::unique_rows(sortF,C,IA,IC);
   FF.resize(IA.size(),F.cols());
+  const size_t mff = FF.rows();
+  // Minimum number of iterms per openmp thread
+  #ifndef IGL_OMP_MIN_VALUE
+  #  define IGL_OMP_MIN_VALUE 1000
+  #endif
+  #pragma omp parallel for if (mff>IGL_OMP_MIN_VALUE)
   // Copy into output
-  for(int i = 0;i<IA.rows();i++)
+  for(int i = 0;i<mff;i++)
   {
     FF.row(i) = F.row(IA(i));
   }