소스 검색

optimized the precomputation of sparse_fast, now it is comparable to sparse (2-3x slower)

Former-commit-id: 5daa31ebbe2d434c2e54e804a31d4aafc314a8d5
Daniele Panozzo 7 년 전
부모
커밋
35cd7d4dc2
3개의 변경된 파일60개의 추가작업 그리고 77개의 파일을 삭제
  1. 34 52
      include/igl/slim.cpp
  2. 1 1
      include/igl/sparse_AtA_fast.cpp
  3. 25 24
      include/igl/sparse_fast.cpp

+ 34 - 52
include/igl/slim.cpp

@@ -507,21 +507,8 @@ namespace igl
       t.stop();
       std::cerr << "buildA: " << t.getElapsedTime() << std::endl;
 
-      // // // TEST CODE
-      // std::vector<Eigen::Triplet<double> > test;
-      // test.push_back(Eigen::Triplet<double>(4,3,10));
-      // test.push_back(Eigen::Triplet<double>(1,0,20));
-      // test.push_back(Eigen::Triplet<double>(2,2,30));
-      // test.push_back(Eigen::Triplet<double>(4,4,2.5));
-
-      // A = Eigen::SparseMatrix<double>(5,5);
-      // A.setFromTriplets(test.begin(),test.end());
       A.makeCompressed();
 
-      //std::cin.get();
-
-      
-
       t.start();
       Eigen::SparseMatrix<double> At = A.transpose();
       At.makeCompressed();
@@ -534,35 +521,28 @@ namespace igl
       t.stop();
       std::cerr << "idm: " << t.getElapsedTime() << std::endl;
 
-      // std::cerr << "\n TEST CODE" << std::endl;
-      //   s.WGL_M.resize(5);
-      //   s.WGL_M[0] = 1;
-      //   s.WGL_M[1] = 2;
-      //   s.WGL_M[2] = 3;
-      //   s.WGL_M[3] = 3;
-      //   s.WGL_M[4] = 10;
-      t.start();
-      Eigen::SparseMatrix<double> AtA_slow = At * s.WGL_M.asDiagonal() * A;
-      t.stop();
-      std::cerr << "AtA slow: " << t.getElapsedTime() << std::endl;
-      // std::cerr << "AtA slow: " << AtA_slow << std::endl;
+      // t.start();
+      // Eigen::SparseMatrix<double> AtA_slow = At * s.WGL_M.asDiagonal() * A;
+      // t.stop();
+      // std::cerr << "AtA slow: " << t.getElapsedTime() << std::endl;
+      // // std::cerr << "AtA slow: " << AtA_slow << std::endl;
 
-      t.start();
-      Eigen::SparseMatrix<double> AtA_fast;
-      igl::sparse_AtA_fast_data data;
-      data.W = s.WGL_M;
-      igl::sparse_AtA_fast_precompute(A,AtA_fast,data);
-      t.stop();
-      std::cerr << "AtA fast pre: " << t.getElapsedTime() << std::endl;
-      t.start();
-      igl::sparse_AtA_fast(A,AtA_fast,data);
-      t.stop();
-      std::cerr << "AtA fast: " << t.getElapsedTime() << std::endl;
-      // std::cerr << "AtA fast: " << AtA_fast  << std::endl;
+      // t.start();
+      // Eigen::SparseMatrix<double> AtA_fast;
+      // igl::sparse_AtA_fast_data data;
+      // data.W = s.WGL_M;
+      // igl::sparse_AtA_fast_precompute(A,AtA_fast,data);
+      // t.stop();
+      // std::cerr << "AtA fast pre: " << t.getElapsedTime() << std::endl;
+      // t.start();
+      // igl::sparse_AtA_fast(A,AtA_fast,data);
+      // t.stop();
+      // std::cerr << "AtA fast: " << t.getElapsedTime() << std::endl;
+      // // std::cerr << "AtA fast: " << AtA_fast  << std::endl;
 
-      std::cerr << "Difference: " << (AtA_fast - AtA_slow).norm() << std::endl;
+      // std::cerr << "Difference: " << (AtA_fast - AtA_slow).norm() << std::endl;
 
-        exit(0);
+      //   exit(0);
 
       // add proximal penalty
       t.start();
@@ -868,24 +848,26 @@ namespace igl
         }
       }
 
-      // igl::Timer t;
-      // t.start();
+      igl::Timer t;
+      t.start();
       A.setFromTriplets(IJV.begin(), IJV.end());
-      // t.stop();
-      // std::cerr << "setFromTriplets: " << t.getElapsedTime() << std::endl;
+      t.stop();
+      std::cerr << "setFromTriplets: " << t.getElapsedTime() << std::endl;
+      Eigen::SparseMatrix<double> A_slow = A;
 
       // // Debug Code
-      // Eigen::VectorXi data;
-      // t.start();
-      // igl::fast_sparse_precompute(IJV,A,data);
-      // t.stop();
-      // std::cerr << "fast_precompute: " << t.getElapsedTime() << std::endl;
+      Eigen::VectorXi data;
+      t.start();
+      igl::sparse_fast_precompute(IJV,A,data);
+      t.stop();
+      std::cerr << "fast_precompute: " << t.getElapsedTime() << std::endl;
 
-      // t.start();
-      // igl::fast_sparse(IJV,A,data);
-      // t.stop();
-      // std::cerr << "fast_precompute: " << t.getElapsedTime() << std::endl;
+      t.start();
+      igl::sparse_fast(IJV,A,data);
+      t.stop();
+      std::cerr << "fast_eval: " << t.getElapsedTime() << std::endl;
 
+      std::cerr << "Norm: " << (A - A_slow).norm() << std::endl;
 
     }
 

+ 1 - 1
include/igl/sparse_AtA_fast.cpp

@@ -18,7 +18,7 @@ IGL_INLINE void igl::sparse_AtA_fast_precompute(
     Eigen::SparseMatrix<double>& AtA,
     igl::sparse_AtA_fast_data& data)
 {
-  // 1 Compute At
+  // 1 Compute At (this could be avoided, but performance-wise it will not make a difference)
   std::vector<std::vector<int> > Col_RowPtr;
   std::vector<std::vector<int> > Col_IndexPtr;
 

+ 25 - 24
include/igl/sparse_fast.cpp

@@ -9,25 +9,11 @@
 
 #include <iostream>
 #include <vector>
+#include <array>
 #include <unordered_map>
 #include <map>
 #include <utility>
 
-// Hashing function
-// namespace std {
-//     template<> 
-//     struct hash<pair<int, int> > {
-//         size_t operator()(const pair<int, int>& p) const {
-//             size_t seed = 0;
-//             hash<int> h;
-//             seed ^= h(p.first) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 
-//             seed ^= h(p.second) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
-//             return seed;
-//         }
-//     };
-// }
-
-
 IGL_INLINE void igl::sparse_fast_precompute(
   const Eigen::VectorXi & I,
   const Eigen::VectorXi & J,
@@ -52,9 +38,19 @@ IGL_INLINE void igl::sparse_fast_precompute(
     X.setFromTriplets(triplets.begin(),triplets.end());
     X.makeCompressed();
 
-    // Build hash table for all nnz entries
-    // TODO: this is slow and could be done in nlogn
-    std::map<std::pair<int,int>,int> id;
+    std::vector<std::array<int,3> > T(triplets.size());
+    for (unsigned i=0; i<triplets.size(); ++i)
+    {
+      T[i][0] = triplets[i].col();
+      T[i][1] = triplets[i].row();
+      T[i][2] = i;
+    }
+
+    std::sort(T.begin(), T.end());
+
+    data.resize(triplets.size());
+
+    int t = 0;
 
     for (unsigned k=0; k<X.outerSize(); ++k)
     {
@@ -66,16 +62,21 @@ IGL_INLINE void igl::sparse_fast_precompute(
         int col = k;
         int row = *(X.innerIndexPtr()+l);
         int value_index = l;
+        assert(col < X.cols());
+        assert(col >= 0);
+        assert(row < X.rows());
+        assert(row >= 0);
+        assert(value_index >= 0);
+        assert(value_index < X.nonZeros());
 
-        std::pair<int,int> rc = std::make_pair(row,col);
-        id[rc] = value_index;
+        std::pair<int,int> p_m = std::make_pair(row,col);
+
+        while (t<T.size() && (p_m == std::make_pair(T[t][1],T[t][0])))
+          data[T[t++][2]] = value_index;
       }
     }
+    assert(t==T.size());
 
-    // Compute the indices
-    data.resize(triplets.size());
-    for (unsigned i=0; i<triplets.size(); ++i)
-      data[i] = id[std::make_pair(triplets[i].row(),triplets[i].col())];
 }
   
 IGL_INLINE void igl::sparse_fast(