Browse Source

templated parallel for

Former-commit-id: 85e646ba631294231963fcfc5903b6bdfcec1d79
Alec Jacobson 9 years ago
parent
commit
328682a5eb

+ 19 - 32
include/igl/ambient_occlusion.cpp

@@ -10,7 +10,7 @@
 #include "ray_mesh_intersect.h"
 #include "EPS.h"
 #include "Hit.h"
-#include <thread>
+#include "parallel_for.h"
 #include <functional>
 #include <vector>
 #include <algorithm>
@@ -38,40 +38,27 @@ IGL_INLINE void igl::ambient_occlusion(
   // Embree seems to be parallel when constructing but not when tracing rays
   const MatrixXf D = random_dir_stratified(num_samples).cast<float>();
 
-  const size_t nthreads = n<1000?1:std::thread::hardware_concurrency();
+  const auto & inner = [&P,&N,&num_samples,&D,&S,&shoot_ray](const int p)
   {
-    std::vector<std::thread> threads(nthreads);
-    for(int t = 0;t<nthreads;t++)
+    const Vector3f origin = P.row(p).template cast<float>();
+    const Vector3f normal = N.row(p).template cast<float>();
+    int num_hits = 0;
+    for(int s = 0;s<num_samples;s++)
     {
-      threads[t] = std::thread(std::bind(
-        [&P,&N,&shoot_ray,&S,&num_samples,&D](const int bi, const int ei, const int t)
-        {
-          // loop over mesh vertices in this chunk
-          for(int p = bi;p<ei;p++)
-          {
-            const Vector3f origin = P.row(p).template cast<float>();
-            const Vector3f normal = N.row(p).template cast<float>();
-            int num_hits = 0;
-            for(int s = 0;s<num_samples;s++)
-            {
-              Vector3f d = D.row(s);
-              if(d.dot(normal) < 0)
-              {
-                // reverse ray
-                d *= -1;
-              }
-              if(shoot_ray(origin,d))
-              {
-                num_hits++;
-              }
-            }
-            S(p) = (double)num_hits/(double)num_samples;
-          }
-        },t*n/nthreads,(t+1)==nthreads?n:(t+1)*n/nthreads,t));
+      Vector3f d = D.row(s);
+      if(d.dot(normal) < 0)
+      {
+        // reverse ray
+        d *= -1;
+      }
+      if(shoot_ray(origin,d))
+      {
+        num_hits++;
+      }
     }
-    std::for_each(threads.begin(),threads.end(),[](std::thread& x){x.join();});
-  }
-
+    S(p) = (double)num_hits/(double)num_samples;
+  };
+  parallel_for(n,inner,1000);
 }
 
 template <

+ 16 - 22
include/igl/doublearea.cpp

@@ -7,6 +7,7 @@
 // obtain one at http://mozilla.org/MPL/2.0/.
 #include "doublearea.h"
 #include "edge_lengths.h"
+#include "parallel_for.h"
 #include "sort.h"
 #include <cassert>
 #include <iostream>
@@ -148,28 +149,21 @@ IGL_INLINE void igl::doublearea(
   assert((Index)s.rows() == m);
   // resize output
   dblA.resize(l.rows(),1);
-  // Minimum number of iterms per openmp thread
-  #ifndef IGL_OMP_MIN_VALUE
-  #  define IGL_OMP_MIN_VALUE 1000
-  #endif
-  #pragma omp parallel for if (m>IGL_OMP_MIN_VALUE)
-  for(Index i = 0;i<m;i++)
-  {
-    //// Heron's formula for area
-    //const typename Derivedl::Scalar arg =
-    //  s(i)*(s(i)-l(i,0))*(s(i)-l(i,1))*(s(i)-l(i,2));
-    //assert(arg>=0);
-    //dblA(i) = 2.0*sqrt(arg);
-    // Kahan's Heron's formula
-    const typename Derivedl::Scalar arg =
-      (l(i,0)+(l(i,1)+l(i,2)))*
-      (l(i,2)-(l(i,0)-l(i,1)))*
-      (l(i,2)+(l(i,0)-l(i,1)))*
-      (l(i,0)+(l(i,1)-l(i,2)));
-    dblA(i) = 2.0*0.25*sqrt(arg);
-    assert( l(i,2) - (l(i,0)-l(i,1)) && "FAILED KAHAN'S ASSERTION");
-    assert(dblA(i) == dblA(i) && "DOUBLEAREA() PRODUCED NaN");
-  }
+  parallel_for(
+    m,
+    [&l,&dblA](const int i)
+    {
+      // Kahan's Heron's formula
+      const typename Derivedl::Scalar arg =
+        (l(i,0)+(l(i,1)+l(i,2)))*
+        (l(i,2)-(l(i,0)-l(i,1)))*
+        (l(i,2)+(l(i,0)-l(i,1)))*
+        (l(i,0)+(l(i,1)-l(i,2)));
+      dblA(i) = 2.0*0.25*sqrt(arg);
+      assert( l(i,2) - (l(i,0)-l(i,1)) && "FAILED KAHAN'S ASSERTION");
+      assert(dblA(i) == dblA(i) && "DOUBLEAREA() PRODUCED NaN");
+    },
+    1000l);
 }
 
 template <typename DerivedV, typename DerivedF, typename DeriveddblA>

+ 22 - 21
include/igl/edge_lengths.cpp

@@ -6,6 +6,7 @@
 // v. 2.0. If a copy of the MPL was not distributed with this file, You can
 // obtain one at http://mozilla.org/MPL/2.0/.
 #include "edge_lengths.h"
+#include "parallel_for.h"
 #include <iostream>
 
 template <typename DerivedV, typename DerivedF, typename DerivedL>
@@ -16,10 +17,6 @@ IGL_INLINE void igl::edge_lengths(
 {
   using namespace std;
   const int m = F.rows();
-  // Minimum number of iterms per openmp thread
-#ifndef IGL_OMP_MIN_VALUE
-#  define IGL_OMP_MIN_VALUE 1000
-#endif
   switch(F.cols())
   {
     case 2:
@@ -35,29 +32,33 @@ IGL_INLINE void igl::edge_lengths(
     {
       L.resize(m,3);
       // loop over faces
-      #pragma omp parallel for if (m>IGL_OMP_MIN_VALUE)
-      for(int i = 0;i<m;i++)
-      {
-        L(i,0) = (V.row(F(i,1))-V.row(F(i,2))).norm();
-        L(i,1) = (V.row(F(i,2))-V.row(F(i,0))).norm();
-        L(i,2) = (V.row(F(i,0))-V.row(F(i,1))).norm();
-      }
+      parallel_for(
+        m,
+        [&V,&F,&L](const int i)
+        {
+          L(i,0) = (V.row(F(i,1))-V.row(F(i,2))).norm();
+          L(i,1) = (V.row(F(i,2))-V.row(F(i,0))).norm();
+          L(i,2) = (V.row(F(i,0))-V.row(F(i,1))).norm();
+        },
+        1000);
       break;
     }
     case 4:
     {
       L.resize(m,6);
       // loop over faces
-      #pragma omp parallel for if (m>IGL_OMP_MIN_VALUE)
-      for(int i = 0;i<m;i++)
-      {
-        L(i,0) = (V.row(F(i,3))-V.row(F(i,0))).norm();
-        L(i,1) = (V.row(F(i,3))-V.row(F(i,1))).norm();
-        L(i,2) = (V.row(F(i,3))-V.row(F(i,2))).norm();
-        L(i,3) = (V.row(F(i,1))-V.row(F(i,2))).norm();
-        L(i,4) = (V.row(F(i,2))-V.row(F(i,0))).norm();
-        L(i,5) = (V.row(F(i,0))-V.row(F(i,1))).norm();
-      }
+      parallel_for(
+        m,
+        [&V,&F,&L](const int i)
+        {
+          L(i,0) = (V.row(F(i,3))-V.row(F(i,0))).norm();
+          L(i,1) = (V.row(F(i,3))-V.row(F(i,1))).norm();
+          L(i,2) = (V.row(F(i,3))-V.row(F(i,2))).norm();
+          L(i,3) = (V.row(F(i,1))-V.row(F(i,2))).norm();
+          L(i,4) = (V.row(F(i,2))-V.row(F(i,0))).norm();
+          L(i,5) = (V.row(F(i,0))-V.row(F(i,1))).norm();
+        },
+        1000);
       break;
     }
     default:

+ 13 - 21
include/igl/internal_angles.cpp

@@ -8,6 +8,7 @@
 // obtain one at http://mozilla.org/MPL/2.0/.
 #include "internal_angles.h"
 #include "edge_lengths.h"
+#include "parallel_for.h"
 #include "get_seconds.h"
 
 template <typename DerivedV, typename DerivedF, typename DerivedK>
@@ -70,28 +71,19 @@ IGL_INLINE void igl::internal_angles(
   assert(L.cols() == 3 && "Edge-lengths should come from triangles");
   const Index m = L.rows();
   K.resize(m,3);
-  //for(int d = 0;d<3;d++)
-  //{
-  //  const auto & s1 = L.col(d).array();
-  //  const auto & s2 = L.col((d+1)%3).array();
-  //  const auto & s3 = L.col((d+2)%3).array();
-  //  K.col(d) = ((s3.square() + s2.square() - s1.square())/(2.*s3*s2)).acos();
-  //}
-  // Minimum number of iterms per openmp thread
-  #ifndef IGL_OMP_MIN_VALUE
-  #  define IGL_OMP_MIN_VALUE 1000
-  #endif
-  #pragma omp parallel for if (m>IGL_OMP_MIN_VALUE)
-  for(Index f = 0;f<m;f++)
-  {
-    for(size_t d = 0;d<3;d++)
+  parallel_for(
+    m,
+    [&L,&K](const Index f)
     {
-      const auto & s1 = L(f,d);
-      const auto & s2 = L(f,(d+1)%3);
-      const auto & s3 = L(f,(d+2)%3);
-      K(f,d) = acos((s3*s3 + s2*s2 - s1*s1)/(2.*s3*s2));
-    }
-  }
+      for(size_t d = 0;d<3;d++)
+      {
+        const auto & s1 = L(f,d);
+        const auto & s2 = L(f,(d+1)%3);
+        const auto & s3 = L(f,(d+2)%3);
+        K(f,d) = acos((s3*s3 + s2*s2 - s1*s1)/(2.*s3*s2));
+      }
+    },
+    1000l);
 }
 
 #ifdef IGL_STATIC_LIBRARY

+ 180 - 0
include/igl/parallel_for.h

@@ -0,0 +1,180 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+// 
+// Copyright (C) 2016 Alec Jacobson <alecjacobson@gmail.com>
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public License 
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can 
+// obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef IGL_PARALLEL_FOR_H
+#define IGL_PARALLEL_FOR_H
+#include "igl_inline.h"
+#include <functional>
+
+namespace igl
+{
+  // PARALLEL_FOR Functional implementation of a basic, open-mp style, parallel
+  // for loop. If the inner block of a for-loop can be rewritten/encapsulated in
+  // a single (anonymous/lambda) function call `func` so that the serial code
+  // looks like:
+  // 
+  //     for(int i = 0;i<loop_size;i++)
+  //     {
+  //       func(i);
+  //     }
+  //
+  // then `parallel_for(loop_size,func,min_parallel)` will use as many threads as
+  // available on the current hardware to parallelize this for loop so long as
+  // loop_size<min_parallel, otherwise it will just use a serial for loop.
+  //
+  // Inputs:
+  //   loop_size  number of iterations. I.e. for(int i = 0;i<loop_size;i++) ...
+  //   func  function handle taking iteration index as only arguement to compute
+  //     inner block of for loop I.e. for(int i ...){ func(i); }
+  //   min_parallel  min size of loop_size such that parallel (non-serial)
+  //     thread pooling should be attempted {0}
+  // Returns true iff thread pool was invoked
+  template<typename Index, typename FunctionType >
+  inline bool parallel_for(
+    const Index loop_size, 
+    const FunctionType & func,
+    const Index min_parallel=0);
+  // PARALLEL_FOR Functional implementation of an open-mp style, parallel for
+  // loop with accumulation. For example, serial code separated into n chunks
+  // (each to be parallelized with a thread) might look like:
+  //     
+  //     Eigen::VectorXd S;
+  //     const auto & prep_func = [&S](int n){ S = Eigen:VectorXd::Zero(n); };
+  //     const auto & func = [&X,&S](int i, int t){ S(t) += X(i); };
+  //     const auto & accum_func = [&S,&sum](int t){ sum += S(t); };
+  //     prep_func(n);
+  //     for(int i = 0;i<loop_size;i++)
+  //     {
+  //       func(i,i%n);
+  //     }
+  //     double sum = 0;
+  //     for(int t = 0;t<n;t++)
+  //     {
+  //       accum_func(t);
+  //     }
+  // 
+  // Inputs:
+  //   loop_size  number of iterations. I.e. for(int i = 0;i<loop_size;i++) ...
+  //   prep_func function handle taking n >= number of threads as only
+  //     argument 
+  //   func  function handle taking iteration index i and thread id t as only
+  //     arguements to compute inner block of for loop I.e. 
+  //     for(int i ...){ func(i,t); }
+  //   accum_func  function handle taking thread index as only argument, to be
+  //     called after all calls of func, e.g., for serial accumulation across
+  //     all n (potential) threads, see n in description of prep_func.
+  //   min_parallel  min size of loop_size such that parallel (non-serial)
+  //     thread pooling should be attempted {0}
+  // Returns true iff thread pool was invoked
+  template<
+    typename Index, 
+    typename PrepFunctionType, 
+    typename FunctionType, 
+    typename AccumFunctionType 
+    >
+  inline bool parallel_for(
+    const Index loop_size, 
+    const PrepFunctionType & prep_func,
+    const FunctionType & func,
+    const AccumFunctionType & accum_func,
+    const Index min_parallel=0);
+}
+
+// Implementation
+
+#include <cmath>
+#include <cassert>
+#include <thread>
+#include <vector>
+#include <algorithm>
+
+template<typename Index, typename FunctionType >
+inline bool igl::parallel_for(
+  const Index loop_size, 
+  const FunctionType & func,
+  const Index min_parallel)
+{
+  using namespace std;
+  // no op preparation/accumulation
+  const auto & no_op = [](const size_t /*n/t*/){};
+  // two-parameter wrapper ignoring thread id
+  const auto & wrapper = [&func](Index i,size_t /*t*/){ func(i); };
+  return parallel_for(loop_size,no_op,wrapper,no_op,min_parallel);
+}
+
+template<
+  typename Index, 
+  typename PreFunctionType,
+  typename FunctionType, 
+  typename AccumFunctionType>
+inline bool igl::parallel_for(
+  const Index loop_size, 
+  const PreFunctionType & prep_func,
+  const FunctionType & func,
+  const AccumFunctionType & accum_func,
+  const Index min_parallel)
+{
+  assert(loop_size>=0);
+  if(loop_size==0) return false;
+  // Estimate number of threads in the pool
+  // http://ideone.com/Z7zldb
+  const static size_t sthc = std::thread::hardware_concurrency();
+  const size_t nthreads = loop_size<min_parallel?0:(sthc==0?8:sthc);
+  if(nthreads==0)
+  {
+    // serial
+    prep_func(1);
+    for(Index i = 0;i<loop_size;i++) func(i,0);
+    accum_func(0);
+    return false;
+  }else
+  {
+    // Size of a slice for the range functions
+    Index slice = 
+      std::max(
+        (Index)std::round((loop_size+1)/static_cast<double>(nthreads)),(Index)1);
+ 
+    // [Helper] Inner loop
+    const auto & range = [&func](const Index k1, const Index k2, const size_t t)
+    {
+      for(Index k = k1; k < k2; k++) func(k,t);
+    };
+    prep_func(nthreads);
+    // Create pool and launch jobs
+    std::vector<std::thread> pool;
+    pool.reserve(nthreads);
+    // Inner range extents
+    Index i1 = 0;
+    Index i2 = std::min(0 + slice, loop_size);
+    {
+      size_t t = 0;
+      for (; t+1 < nthreads && i1 < loop_size; ++t)
+      {
+        pool.emplace_back(range, i1, i2, t);
+        i1 = i2;
+        i2 = std::min(i2 + slice, loop_size);
+      }
+      if (i1 < loop_size)
+      {
+        pool.emplace_back(range, i1, loop_size, t);
+      }
+    }
+    // Wait for jobs to finish
+    for (std::thread &t : pool) if (t.joinable()) t.join();
+    // Accumulate across threads
+    for(size_t t = 0;t<nthreads;t++)
+    {
+      accum_func(t);
+    }
+    return true;
+  }
+}
+ 
+//#ifndef IGL_STATIC_LIBRARY
+//#include "parallel_for.cpp"
+//#endif
+#endif

+ 23 - 11
include/igl/per_vertex_normals.cpp

@@ -10,6 +10,7 @@
 #include "get_seconds.h"
 #include "per_face_normals.h"
 #include "doublearea.h"
+#include "parallel_for.h"
 #include "internal_angles.h"
 
 template <typename DerivedV, typename DerivedF>
@@ -68,23 +69,34 @@ IGL_INLINE void igl::per_vertex_normals(
   }
 
   // loop over faces
-  const int Frows = F.rows();
-//// Minimum number of iterms per openmp thread
-//#ifndef IGL_OMP_MIN_VALUE
-//#  define IGL_OMP_MIN_VALUE 1000
-//#endif
-//#pragma omp parallel for if (Frows>IGL_OMP_MIN_VALUE)
-  for(int i = 0; i < Frows;i++)
+  for(int i = 0;i<F.rows();i++)
   {
     // throw normal at each corner
     for(int j = 0; j < 3;j++)
     {
-      // Q: Does this need to be critical?
-      // A: Yes. Different (i,j)'s could produce the same F(i,j)
-//#pragma omp critical
-      N.row(F(i,j)) += W(i,j)*FN.row(i);
+      N.row(F(i,j)) += W(i,j) * FN.row(i);
     }
   }
+
+  //// loop over faces
+  //std::mutex critical;
+  //std::vector<DerivedN> NN;
+  //parallel_for(
+  //  F.rows(),
+  //  [&NN,&N](const size_t n){ NN.resize(n,DerivedN::Zero(N.rows(),3));},
+  //  [&F,&W,&FN,&NN,&critical](const int i, const size_t t)
+  //  {
+  //    // throw normal at each corner
+  //    for(int j = 0; j < 3;j++)
+  //    {
+  //      // Q: Does this need to be critical?
+  //      // A: Yes. Different (i,j)'s could produce the same F(i,j)
+  //      NN[t].row(F(i,j)) += W(i,j) * FN.row(i);
+  //    }
+  //  },
+  //  [&N,&NN](const size_t t){ N += NN[t]; },
+  //  1000l);
+
   // take average via normalization
   N.rowwise().normalize();
 }

+ 66 - 87
include/igl/sort.cpp

@@ -11,12 +11,11 @@
 #include "reorder.h"
 #include "IndexComparison.h"
 #include "colon.h"
+#include "parallel_for.h"
 
 #include <cassert>
 #include <algorithm>
 #include <iostream>
-#include <thread>
-#include <functional>
 
 template <typename DerivedX, typename DerivedY, typename DerivedIX>
 IGL_INLINE void igl::sort(
@@ -223,106 +222,86 @@ IGL_INLINE void igl::sort3(
     IX.col(2).setConstant(2);// = Eigen::PlainObjectBase<DerivedIX>::Ones (IX.rows(),1);
   }
 
-  const int n = num_outer;
-  const size_t nthreads = n<8000?1:std::thread::hardware_concurrency();
+
+  const auto & inner = [&IX,&Y,&dim,&ascending](const Index & i)
   {
-    std::vector<std::thread> threads(nthreads<=1?0:nthreads);
-    for(int t = 0;t<nthreads;t++)
+    YScalar & a = (dim==1 ? Y(0,i) : Y(i,0));
+    YScalar & b = (dim==1 ? Y(1,i) : Y(i,1));
+    YScalar & c = (dim==1 ? Y(2,i) : Y(i,2));
+    Index & ai = (dim==1 ? IX(0,i) : IX(i,0));
+    Index & bi = (dim==1 ? IX(1,i) : IX(i,1));
+    Index & ci = (dim==1 ? IX(2,i) : IX(i,2));
+    if(ascending)
     {
-      const auto & inner =  
-        [&X,&Y,&IX,&dim,&ascending](const int bi, const int ei, const int t)
+      // 123 132 213 231 312 321
+      if(a > b)
+      {
+        std::swap(a,b);
+        std::swap(ai,bi);
+      }
+      // 123 132 123 231 132 231
+      if(b > c)
       {
-        // loop over columns (or rows)
-        for(int i = bi;i<ei;i++)
+        std::swap(b,c);
+        std::swap(bi,ci);
+        // 123 123 123 213 123 213
+        if(a > b)
         {
-          YScalar & a = (dim==1 ? Y(0,i) : Y(i,0));
-          YScalar & b = (dim==1 ? Y(1,i) : Y(i,1));
-          YScalar & c = (dim==1 ? Y(2,i) : Y(i,2));
-          Index & ai = (dim==1 ? IX(0,i) : IX(i,0));
-          Index & bi = (dim==1 ? IX(1,i) : IX(i,1));
-          Index & ci = (dim==1 ? IX(2,i) : IX(i,2));
-          if(ascending)
-          {
-            // 123 132 213 231 312 321
-            if(a > b)
-            {
-              std::swap(a,b);
-              std::swap(ai,bi);
-            }
-            // 123 132 123 231 132 231
-            if(b > c)
-            {
-              std::swap(b,c);
-              std::swap(bi,ci);
-              // 123 123 123 213 123 213
-              if(a > b)
-              {
-                std::swap(a,b);
-                std::swap(ai,bi);
-              }
-              // 123 123 123 123 123 123
-            }
-          }else
-          {
-            // 123 132 213 231 312 321
-            if(a < b)
-            {
-              std::swap(a,b);
-              std::swap(ai,bi);
-            }
-            // 213 312 213 321 312 321
-            if(b < c)
-            {
-              std::swap(b,c);
-              std::swap(bi,ci);
-              // 231 321 231 321 321 321
-              if(a < b)
-              {
-                std::swap(a,b);
-                std::swap(ai,bi);
-              }
-              // 321 321 321 321 321 321
-            }
-          }
+          std::swap(a,b);
+          std::swap(ai,bi);
         }
-      };
-      if(nthreads == 1)
+        // 123 123 123 123 123 123
+      }
+    }else
+    {
+      // 123 132 213 231 312 321
+      if(a < b)
       {
-        inner(0,n,0);
-      }else
+        std::swap(a,b);
+        std::swap(ai,bi);
+      }
+      // 213 312 213 321 312 321
+      if(b < c)
       {
-        threads[t] = std::thread(
-          std::bind(inner,t*n/nthreads,(t+1)==nthreads?n:(t+1)*n/nthreads,t));
+        std::swap(b,c);
+        std::swap(bi,ci);
+        // 231 321 231 321 321 321
+        if(a < b)
+        {
+          std::swap(a,b);
+          std::swap(ai,bi);
+        }
+        // 321 321 321 321 321 321
       }
     }
-    std::for_each(threads.begin(),threads.end(),[](std::thread& x){x.join();});
-  }
+  };
+  parallel_for(num_outer,inner,16000);
 }
 
 template <class T>
 IGL_INLINE void igl::sort(
-  const std::vector<T> & unsorted,
-  const bool ascending,
-  std::vector<T> & sorted,
-  std::vector<size_t> & index_map)
+const std::vector<T> & unsorted,
+const bool ascending,
+std::vector<T> & sorted,
+std::vector<size_t> & index_map)
 {
-  // Original unsorted index map
-  index_map.resize(unsorted.size());
-  for(size_t i=0;i<unsorted.size();i++)
-  {
-    index_map[i] = i;
-  }
-  // Sort the index map, using unsorted for comparison
-  std::sort(
-    index_map.begin(),
-    index_map.end(),
-    igl::IndexLessThan<const std::vector<T>& >(unsorted));
+// Original unsorted index map
+index_map.resize(unsorted.size());
+for(size_t i=0;i<unsorted.size();i++)
+{
+  index_map[i] = i;
+}
+// Sort the index map, using unsorted for comparison
+std::sort(
+  index_map.begin(),
+  index_map.end(),
+  igl::IndexLessThan<const std::vector<T>& >(unsorted));
 
-  // if not ascending then reverse
-  if(!ascending)
-  {
-    std::reverse(index_map.begin(),index_map.end());
-  }
+// if not ascending then reverse
+if(!ascending)
+{
+  std::reverse(index_map.begin(),index_map.end());
+}
   // make space for output without clobbering
   sorted.resize(unsorted.size());
   // reorder unsorted into sorted using index map

+ 23 - 21
include/igl/triangle_triangle_adjacency.cpp

@@ -9,6 +9,7 @@
 #include "is_edge_manifold.h"
 #include "all_edges.h"
 #include "unique_simplices.h"
+#include "parallel_for.h"
 #include "unique_edge_map.h"
 #include <algorithm>
 #include <iostream>
@@ -172,36 +173,37 @@ template <
   }
 
   // No race conditions because TT*[f][c]'s are in bijection with e's
-  // Minimum number of iterms per openmp thread
+  // Minimum number of items per thread
   //const size_t num_e = E.rows();
-# ifndef IGL_OMP_MIN_VALUE
-#   define IGL_OMP_MIN_VALUE 1000
-# endif
-# pragma omp parallel for if (m>IGL_OMP_MIN_VALUE)
   // Slightly better memory access than loop over E
-  for(Index f = 0;f<(Index)m;f++)
-  {
-    for(Index c = 0;c<3;c++)
+  igl::parallel_for(
+    m,
+    [&](const Index & f)
     {
-      const Index e = f + m*c;
-      //const Index c = e/m;
-      const vector<uE2EType> & N = uE2E[EMAP(e)];
-      for(const auto & ne : N)
+      for(Index c = 0;c<3;c++)
       {
-        const Index nf = ne%m;
-        // don't add self
-        if(nf != f)
+        const Index e = f + m*c;
+        //const Index c = e/m;
+        const vector<uE2EType> & N = uE2E[EMAP(e)];
+        for(const auto & ne : N)
         {
-          TT[f][c].push_back(nf);
-          if(construct_TTi)
+          const Index nf = ne%m;
+          // don't add self
+          if(nf != f)
           {
-            const Index nc = ne/m;
-            TTi[f][c].push_back(nc);
+            TT[f][c].push_back(nf);
+            if(construct_TTi)
+            {
+              const Index nc = ne/m;
+              TTi[f][c].push_back(nc);
+            }
           }
         }
       }
-    }
-  }
+    },
+    1000ul);
+
+
 }
 
 #ifdef IGL_STATIC_LIBRARY

+ 3 - 12
include/igl/unique_simplices.cpp

@@ -1,6 +1,6 @@
 // This file is part of libigl, a simple c++ geometry processing library.
 // 
-// Copyright (C) 2013 Alec Jacobson <alecjacobson@gmail.com>
+// Copyright (C) 2016 Alec Jacobson <alecjacobson@gmail.com>
 // 
 // This Source Code Form is subject to the terms of the Mozilla Public License 
 // v. 2.0. If a copy of the MPL was not distributed with this file, You can 
@@ -8,7 +8,7 @@
 #include "unique_simplices.h"
 #include "sort.h"
 #include "unique.h"
-#include "get_seconds.h"
+#include "parallel_for.h"
 
 template <
   typename DerivedF,
@@ -31,16 +31,7 @@ IGL_INLINE void igl::unique_simplices(
   igl::unique_rows(sortF,C,IA,IC);
   FF.resize(IA.size(),F.cols());
   const size_t mff = FF.rows();
-  // Minimum number of iterms per openmp thread
-  #ifndef IGL_OMP_MIN_VALUE
-  #  define IGL_OMP_MIN_VALUE 1000
-  #endif
-  #pragma omp parallel for if (mff>IGL_OMP_MIN_VALUE)
-  // Copy into output
-  for(size_t i = 0;i<mff;i++)
-  {
-    FF.row(i) = F.row(IA(i));
-  }
+  parallel_for(mff,[&F,&IA,&FF](size_t & i){FF.row(i) = F.row(IA(i));},1000ul);
 }
 
 template <