Эх сурвалжийг харах

Update .travis.yml

Former-commit-id: 941b40334633a419e095aa62cf69ee4adab63800
Jérémie Dumas 6 жил өмнө
parent
commit
ed363da1f6

+ 0 - 8
.travis.yml

@@ -66,11 +66,3 @@ script:
 - ccache --show-stats
 - cd ../
 - rm -rf build
-
-# Python bindings (do these last; they are the least important)
-- cd python
-- ${PYTHON} setup.py develop --user -- -DCMAKE_BUILD_TYPE=${CONFIG} -DLIBIGL_WITH_EMBREE=OFF -DLIBIGL_USE_STATIC_LIBRARY=ON -DCHECK_UNDEFINED=${CHECK_UNDEFINED}
-- cd tutorial
-- ${PYTHON} 101_FileIO.py
-- cd ../../
-- rm -rf python/build

+ 1277 - 0
include/igl/Singular_Value_Decomposition_Main_Kernel_Body.hpp

@@ -0,0 +1,1277 @@
+//#####################################################################
+// Copyright (c) 2010-2011, Eftychios Sifakis.
+//
+// Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+//   * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+//   * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or
+//     other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+// BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+// SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//#####################################################################
+
+#ifdef __INTEL_COMPILER
+#pragma warning( disable : 592 )
+#endif
+
+// #define USE_ACCURATE_RSQRT_IN_JACOBI_CONJUGATION
+// #define PERFORM_STRICT_QUATERNION_RENORMALIZATION
+
+{ // Begin block : Scope of qV (if not maintained)
+
+#ifndef COMPUTE_V_AS_QUATERNION
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sqvs;)                  ENABLE_SSE_IMPLEMENTATION(__m128 Vqvs;)                                                   ENABLE_AVX_IMPLEMENTATION(__m256 Vqvs;) 
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sqvvx;)                 ENABLE_SSE_IMPLEMENTATION(__m128 Vqvvx;)                                                  ENABLE_AVX_IMPLEMENTATION(__m256 Vqvvx;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sqvvy;)                 ENABLE_SSE_IMPLEMENTATION(__m128 Vqvvy;)                                                  ENABLE_AVX_IMPLEMENTATION(__m256 Vqvvy;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sqvvz;)                 ENABLE_SSE_IMPLEMENTATION(__m128 Vqvvz;)                                                  ENABLE_AVX_IMPLEMENTATION(__m256 Vqvvz;)
+#endif
+
+{ // Begin block : Symmetric eigenanalysis
+
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Ss11;)                  ENABLE_SSE_IMPLEMENTATION(__m128 Vs11;)                                                   ENABLE_AVX_IMPLEMENTATION(__m256 Vs11;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Ss21;)                  ENABLE_SSE_IMPLEMENTATION(__m128 Vs21;)                                                   ENABLE_AVX_IMPLEMENTATION(__m256 Vs21;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Ss31;)                  ENABLE_SSE_IMPLEMENTATION(__m128 Vs31;)                                                   ENABLE_AVX_IMPLEMENTATION(__m256 Vs31;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Ss22;)                  ENABLE_SSE_IMPLEMENTATION(__m128 Vs22;)                                                   ENABLE_AVX_IMPLEMENTATION(__m256 Vs22;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Ss32;)                  ENABLE_SSE_IMPLEMENTATION(__m128 Vs32;)                                                   ENABLE_AVX_IMPLEMENTATION(__m256 Vs32;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Ss33;)                  ENABLE_SSE_IMPLEMENTATION(__m128 Vs33;)                                                   ENABLE_AVX_IMPLEMENTATION(__m256 Vs33;)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvs.f=1.;)                                              ENABLE_SSE_IMPLEMENTATION(Vqvs=Vone;)                                                     ENABLE_AVX_IMPLEMENTATION(Vqvs=Vone;)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvx.f=0.;)                                             ENABLE_SSE_IMPLEMENTATION(Vqvvx=_mm_xor_ps(Vqvvx,Vqvvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvx=_mm256_xor_ps(Vqvvx,Vqvvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvy.f=0.;)                                             ENABLE_SSE_IMPLEMENTATION(Vqvvy=_mm_xor_ps(Vqvvy,Vqvvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvy=_mm256_xor_ps(Vqvvy,Vqvvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvz.f=0.;)                                             ENABLE_SSE_IMPLEMENTATION(Vqvvz=_mm_xor_ps(Vqvvz,Vqvvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvz=_mm256_xor_ps(Vqvvz,Vqvvz);)
+
+    //###########################################################
+    // Compute normal equations matrix
+    //###########################################################
+
+    ENABLE_SCALAR_IMPLEMENTATION(Ss11.f=Sa11.f*Sa11.f;)                                   ENABLE_SSE_IMPLEMENTATION(Vs11=_mm_mul_ps(Va11,Va11);)                                    ENABLE_AVX_IMPLEMENTATION(Vs11=_mm256_mul_ps(Va11,Va11);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa21.f*Sa21.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va21,Va21);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va21,Va21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss11.f=Stmp1.f+Ss11.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs11=_mm_add_ps(Vtmp1,Vs11);)                                   ENABLE_AVX_IMPLEMENTATION(Vs11=_mm256_add_ps(Vtmp1,Vs11);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa31.f*Sa31.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va31,Va31);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va31,Va31);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss11.f=Stmp1.f+Ss11.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs11=_mm_add_ps(Vtmp1,Vs11);)                                   ENABLE_AVX_IMPLEMENTATION(Vs11=_mm256_add_ps(Vtmp1,Vs11);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Ss21.f=Sa12.f*Sa11.f;)                                   ENABLE_SSE_IMPLEMENTATION(Vs21=_mm_mul_ps(Va12,Va11);)                                    ENABLE_AVX_IMPLEMENTATION(Vs21=_mm256_mul_ps(Va12,Va11);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa22.f*Sa21.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va22,Va21);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va22,Va21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss21.f=Stmp1.f+Ss21.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs21=_mm_add_ps(Vtmp1,Vs21);)                                   ENABLE_AVX_IMPLEMENTATION(Vs21=_mm256_add_ps(Vtmp1,Vs21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa32.f*Sa31.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va32,Va31);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va32,Va31);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss21.f=Stmp1.f+Ss21.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs21=_mm_add_ps(Vtmp1,Vs21);)                                   ENABLE_AVX_IMPLEMENTATION(Vs21=_mm256_add_ps(Vtmp1,Vs21);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Ss31.f=Sa13.f*Sa11.f;)                                   ENABLE_SSE_IMPLEMENTATION(Vs31=_mm_mul_ps(Va13,Va11);)                                    ENABLE_AVX_IMPLEMENTATION(Vs31=_mm256_mul_ps(Va13,Va11);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa23.f*Sa21.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va23,Va21);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va23,Va21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss31.f=Stmp1.f+Ss31.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs31=_mm_add_ps(Vtmp1,Vs31);)                                   ENABLE_AVX_IMPLEMENTATION(Vs31=_mm256_add_ps(Vtmp1,Vs31);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa33.f*Sa31.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va33,Va31);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va33,Va31);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss31.f=Stmp1.f+Ss31.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs31=_mm_add_ps(Vtmp1,Vs31);)                                   ENABLE_AVX_IMPLEMENTATION(Vs31=_mm256_add_ps(Vtmp1,Vs31);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Ss22.f=Sa12.f*Sa12.f;)                                   ENABLE_SSE_IMPLEMENTATION(Vs22=_mm_mul_ps(Va12,Va12);)                                    ENABLE_AVX_IMPLEMENTATION(Vs22=_mm256_mul_ps(Va12,Va12);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa22.f*Sa22.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va22,Va22);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va22,Va22);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss22.f=Stmp1.f+Ss22.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs22=_mm_add_ps(Vtmp1,Vs22);)                                   ENABLE_AVX_IMPLEMENTATION(Vs22=_mm256_add_ps(Vtmp1,Vs22);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa32.f*Sa32.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va32,Va32);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va32,Va32);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss22.f=Stmp1.f+Ss22.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs22=_mm_add_ps(Vtmp1,Vs22);)                                   ENABLE_AVX_IMPLEMENTATION(Vs22=_mm256_add_ps(Vtmp1,Vs22);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Ss32.f=Sa13.f*Sa12.f;)                                   ENABLE_SSE_IMPLEMENTATION(Vs32=_mm_mul_ps(Va13,Va12);)                                    ENABLE_AVX_IMPLEMENTATION(Vs32=_mm256_mul_ps(Va13,Va12);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa23.f*Sa22.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va23,Va22);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va23,Va22);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss32.f=Stmp1.f+Ss32.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs32=_mm_add_ps(Vtmp1,Vs32);)                                   ENABLE_AVX_IMPLEMENTATION(Vs32=_mm256_add_ps(Vtmp1,Vs32);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa33.f*Sa32.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va33,Va32);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va33,Va32);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss32.f=Stmp1.f+Ss32.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs32=_mm_add_ps(Vtmp1,Vs32);)                                   ENABLE_AVX_IMPLEMENTATION(Vs32=_mm256_add_ps(Vtmp1,Vs32);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Ss33.f=Sa13.f*Sa13.f;)                                   ENABLE_SSE_IMPLEMENTATION(Vs33=_mm_mul_ps(Va13,Va13);)                                    ENABLE_AVX_IMPLEMENTATION(Vs33=_mm256_mul_ps(Va13,Va13);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa23.f*Sa23.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va23,Va23);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va23,Va23);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss33.f=Stmp1.f+Ss33.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs33=_mm_add_ps(Vtmp1,Vs33);)                                   ENABLE_AVX_IMPLEMENTATION(Vs33=_mm256_add_ps(Vtmp1,Vs33);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa33.f*Sa33.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va33,Va33);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va33,Va33);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ss33.f=Stmp1.f+Ss33.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vs33=_mm_add_ps(Vtmp1,Vs33);)                                   ENABLE_AVX_IMPLEMENTATION(Vs33=_mm256_add_ps(Vtmp1,Vs33);)
+    
+    //###########################################################
+    // Solve symmetric eigenproblem using Jacobi iteration
+    //###########################################################
+
+    for(int sweep=1;sweep<=4;sweep++){
+
+        // First Jacobi conjugation
+
+#define SS11 Ss11
+#define SS21 Ss21
+#define SS31 Ss31
+#define SS22 Ss22
+#define SS32 Ss32
+#define SS33 Ss33
+#define SQVVX Sqvvx
+#define SQVVY Sqvvy
+#define SQVVZ Sqvvz
+#define STMP1 Stmp1
+#define STMP2 Stmp2
+#define STMP3 Stmp3
+
+#define VS11 Vs11
+#define VS21 Vs21
+#define VS31 Vs31
+#define VS22 Vs22
+#define VS32 Vs32
+#define VS33 Vs33
+#define VQVVX Vqvvx
+#define VQVVY Vqvvy
+#define VQVVZ Vqvvz
+#define VTMP1 Vtmp1
+#define VTMP2 Vtmp2
+#define VTMP3 Vtmp3
+
+#include "Singular_Value_Decomposition_Jacobi_Conjugation_Kernel.hpp"
+
+#undef SS11
+#undef SS21
+#undef SS31
+#undef SS22
+#undef SS32
+#undef SS33
+#undef SQVVX
+#undef SQVVY
+#undef SQVVZ
+#undef STMP1
+#undef STMP2
+#undef STMP3
+
+#undef VS11
+#undef VS21
+#undef VS31
+#undef VS22
+#undef VS32
+#undef VS33
+#undef VQVVX
+#undef VQVVY
+#undef VQVVZ
+#undef VTMP1
+#undef VTMP2
+#undef VTMP3
+
+        // Second Jacobi conjugation
+
+#define SS11 Ss22
+#define SS21 Ss32
+#define SS31 Ss21
+#define SS22 Ss33
+#define SS32 Ss31
+#define SS33 Ss11
+#define SQVVX Sqvvy
+#define SQVVY Sqvvz
+#define SQVVZ Sqvvx
+#define STMP1 Stmp2
+#define STMP2 Stmp3
+#define STMP3 Stmp1
+
+#define VS11 Vs22
+#define VS21 Vs32
+#define VS31 Vs21
+#define VS22 Vs33
+#define VS32 Vs31
+#define VS33 Vs11
+#define VQVVX Vqvvy
+#define VQVVY Vqvvz
+#define VQVVZ Vqvvx
+#define VTMP1 Vtmp2
+#define VTMP2 Vtmp3
+#define VTMP3 Vtmp1
+
+#include "Singular_Value_Decomposition_Jacobi_Conjugation_Kernel.hpp"
+
+#undef SS11
+#undef SS21
+#undef SS31
+#undef SS22
+#undef SS32
+#undef SS33
+#undef SQVVX
+#undef SQVVY
+#undef SQVVZ
+#undef STMP1
+#undef STMP2
+#undef STMP3
+
+#undef VS11
+#undef VS21
+#undef VS31
+#undef VS22
+#undef VS32
+#undef VS33
+#undef VQVVX
+#undef VQVVY
+#undef VQVVZ
+#undef VTMP1
+#undef VTMP2
+#undef VTMP3
+
+        // Third Jacobi conjugation
+
+#define SS11 Ss33
+#define SS21 Ss31
+#define SS31 Ss32
+#define SS22 Ss11
+#define SS32 Ss21
+#define SS33 Ss22
+#define SQVVX Sqvvz
+#define SQVVY Sqvvx
+#define SQVVZ Sqvvy
+#define STMP1 Stmp3
+#define STMP2 Stmp1
+#define STMP3 Stmp2
+
+#define VS11 Vs33
+#define VS21 Vs31
+#define VS31 Vs32
+#define VS22 Vs11
+#define VS32 Vs21
+#define VS33 Vs22
+#define VQVVX Vqvvz
+#define VQVVY Vqvvx
+#define VQVVZ Vqvvy
+#define VTMP1 Vtmp3
+#define VTMP2 Vtmp1
+#define VTMP3 Vtmp2
+
+#include "Singular_Value_Decomposition_Jacobi_Conjugation_Kernel.hpp"
+
+#undef SS11
+#undef SS21
+#undef SS31
+#undef SS22
+#undef SS32
+#undef SS33
+#undef SQVVX
+#undef SQVVY
+#undef SQVVZ
+#undef STMP1
+#undef STMP2
+#undef STMP3
+
+#undef VS11
+#undef VS21
+#undef VS31
+#undef VS22
+#undef VS32
+#undef VS33
+#undef VQVVX
+#undef VQVVY
+#undef VQVVZ
+#undef VTMP1
+#undef VTMP2
+#undef VTMP3
+    }
+
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar S ="<<std::endl;
+    std::cout<<std::setw(12)<<Ss11.f<<std::endl;
+    std::cout<<std::setw(12)<<Ss21.f<<"  "<<std::setw(12)<<Ss22.f<<std::endl;
+    std::cout<<std::setw(12)<<Ss31.f<<"  "<<std::setw(12)<<Ss32.f<<"  "<<std::setw(12)<<Ss33.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Vs11);S11=buf[0];
+    _mm_storeu_ps(buf,Vs21);S21=buf[0];
+    _mm_storeu_ps(buf,Vs31);S31=buf[0];
+    _mm_storeu_ps(buf,Vs22);S22=buf[0];
+    _mm_storeu_ps(buf,Vs32);S32=buf[0];
+    _mm_storeu_ps(buf,Vs33);S33=buf[0];
+    std::cout<<"Vector S ="<<std::endl;
+    std::cout<<std::setw(12)<<S11<<std::endl;
+    std::cout<<std::setw(12)<<S21<<"  "<<std::setw(12)<<S22<<std::endl;
+    std::cout<<std::setw(12)<<S31<<"  "<<std::setw(12)<<S32<<"  "<<std::setw(12)<<S33<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Vs11);S11=buf[0];
+    _mm256_storeu_ps(buf,Vs21);S21=buf[0];
+    _mm256_storeu_ps(buf,Vs31);S31=buf[0];
+    _mm256_storeu_ps(buf,Vs22);S22=buf[0];
+    _mm256_storeu_ps(buf,Vs32);S32=buf[0];
+    _mm256_storeu_ps(buf,Vs33);S33=buf[0];
+    std::cout<<"Vector S ="<<std::endl;
+    std::cout<<std::setw(12)<<S11<<std::endl;
+    std::cout<<std::setw(12)<<S21<<"  "<<std::setw(12)<<S22<<std::endl;
+    std::cout<<std::setw(12)<<S31<<"  "<<std::setw(12)<<S32<<"  "<<std::setw(12)<<S33<<std::endl;
+#endif
+#endif
+
+} // End block : Symmetric eigenanalysis
+
+    //###########################################################
+    // Normalize quaternion for matrix V
+    //###########################################################
+
+#if !defined(USE_ACCURATE_RSQRT_IN_JACOBI_CONJUGATION) || defined(PERFORM_STRICT_QUATERNION_RENORMALIZATION)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Sqvs.f*Sqvs.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_mul_ps(Vqvs,Vqvs);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_mul_ps(Vqvs,Vqvs);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sqvvx.f*Sqvvx.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vqvvx,Vqvvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vqvvx,Vqvvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Stmp1.f+Stmp2.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_add_ps(Vtmp1,Vtmp2);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_add_ps(Vtmp1,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sqvvy.f*Sqvvy.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vqvvy,Vqvvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vqvvy,Vqvvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Stmp1.f+Stmp2.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_add_ps(Vtmp1,Vtmp2);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_add_ps(Vtmp1,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sqvvz.f*Sqvvz.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vqvvz,Vqvvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vqvvz,Vqvvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Stmp1.f+Stmp2.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_add_ps(Vtmp1,Vtmp2);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_add_ps(Vtmp1,Vtmp2);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=rsqrt(Stmp2.f);)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_rsqrt_ps(Vtmp2);)                                     ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_rsqrt_ps(Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp1.f*Sone_half.f;)                            ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Vtmp1,Vone_half);)                             ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Vtmp1,Vone_half);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Stmp1.f*Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Vtmp1,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Vtmp1,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Stmp1.f*Stmp3.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Vtmp1,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Vtmp1,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Stmp2.f*Stmp3.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Vtmp2,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Vtmp2,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Stmp1.f+Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_add_ps(Vtmp1,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_add_ps(Vtmp1,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Stmp1.f-Stmp3.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_sub_ps(Vtmp1,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_sub_ps(Vtmp1,Vtmp3);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvs.f=Sqvs.f*Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vqvs=_mm_mul_ps(Vqvs,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Vqvs=_mm256_mul_ps(Vqvs,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvx.f=Sqvvx.f*Stmp1.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvx=_mm_mul_ps(Vqvvx,Vtmp1);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvx=_mm256_mul_ps(Vqvvx,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvy.f=Sqvvy.f*Stmp1.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvy=_mm_mul_ps(Vqvvy,Vtmp1);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvy=_mm256_mul_ps(Vqvvy,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvz.f=Sqvvz.f*Stmp1.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvz=_mm_mul_ps(Vqvvz,Vtmp1);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvz=_mm256_mul_ps(Vqvvz,Vtmp1);)
+
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar qV ="<<std::endl;
+    std::cout<<std::setw(12)<<Sqvs.f<<"  "<<std::setw(12)<<Sqvvx.f<<"  "<<std::setw(12)<<Sqvvy.f<<"  "<<std::setw(12)<<Sqvvz.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Vqvs);QVS=buf[0];
+    _mm_storeu_ps(buf,Vqvvx);QVVX=buf[0];
+    _mm_storeu_ps(buf,Vqvvy);QVVY=buf[0];
+    _mm_storeu_ps(buf,Vqvvz);QVVZ=buf[0];
+    std::cout<<"Vector qV ="<<std::endl;
+    std::cout<<std::setw(12)<<QVS<<"  "<<std::setw(12)<<QVVX<<"  "<<std::setw(12)<<QVVY<<"  "<<std::setw(12)<<QVVZ<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Vqvs);QVS=buf[0];
+    _mm256_storeu_ps(buf,Vqvvx);QVVX=buf[0];
+    _mm256_storeu_ps(buf,Vqvvy);QVVY=buf[0];
+    _mm256_storeu_ps(buf,Vqvvz);QVVZ=buf[0];
+    std::cout<<"Vector qV ="<<std::endl;
+    std::cout<<std::setw(12)<<QVS<<"  "<<std::setw(12)<<QVVX<<"  "<<std::setw(12)<<QVVY<<"  "<<std::setw(12)<<QVVZ<<std::endl;
+#endif
+#endif
+
+#endif
+
+{ // Begin block : Conjugation with V
+
+    //###########################################################
+    // Transform quaternion to matrix V
+    //###########################################################
+
+#ifndef COMPUTE_V_AS_MATRIX
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sv11;)                  ENABLE_VECTOR_IMPLEMENTATION(__m128 Vv11;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sv21;)                  ENABLE_VECTOR_IMPLEMENTATION(__m128 Vv21;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sv31;)                  ENABLE_VECTOR_IMPLEMENTATION(__m128 Vv31;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sv12;)                  ENABLE_VECTOR_IMPLEMENTATION(__m128 Vv12;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sv22;)                  ENABLE_VECTOR_IMPLEMENTATION(__m128 Vv22;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sv32;)                  ENABLE_VECTOR_IMPLEMENTATION(__m128 Vv32;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sv13;)                  ENABLE_VECTOR_IMPLEMENTATION(__m128 Vv13;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sv23;)                  ENABLE_VECTOR_IMPLEMENTATION(__m128 Vv23;)
+    ENABLE_SCALAR_IMPLEMENTATION(union {float f;unsigned int ui;} Sv33;)                  ENABLE_VECTOR_IMPLEMENTATION(__m128 Vv33;)
+#endif
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sqvvx.f*Sqvvx.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vqvvx,Vqvvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vqvvx,Vqvvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Sqvvy.f*Sqvvy.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_mul_ps(Vqvvy,Vqvvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_mul_ps(Vqvvy,Vqvvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Sqvvz.f*Sqvvz.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Vqvvz,Vqvvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Vqvvz,Vqvvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv11.f=Sqvs.f*Sqvs.f;)                                   ENABLE_SSE_IMPLEMENTATION(Vv11=_mm_mul_ps(Vqvs,Vqvs);)                                    ENABLE_AVX_IMPLEMENTATION(Vv11=_mm256_mul_ps(Vqvs,Vqvs);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv22.f=Sv11.f-Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv22=_mm_sub_ps(Vv11,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Vv22=_mm256_sub_ps(Vv11,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv33.f=Sv22.f-Stmp2.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv33=_mm_sub_ps(Vv22,Vtmp2);)                                   ENABLE_AVX_IMPLEMENTATION(Vv33=_mm256_sub_ps(Vv22,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv33.f=Sv33.f+Stmp3.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv33=_mm_add_ps(Vv33,Vtmp3);)                                   ENABLE_AVX_IMPLEMENTATION(Vv33=_mm256_add_ps(Vv33,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv22.f=Sv22.f+Stmp2.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv22=_mm_add_ps(Vv22,Vtmp2);)                                   ENABLE_AVX_IMPLEMENTATION(Vv22=_mm256_add_ps(Vv22,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv22.f=Sv22.f-Stmp3.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv22=_mm_sub_ps(Vv22,Vtmp3);)                                   ENABLE_AVX_IMPLEMENTATION(Vv22=_mm256_sub_ps(Vv22,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv11.f=Sv11.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv11=_mm_add_ps(Vv11,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Vv11=_mm256_add_ps(Vv11,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv11.f=Sv11.f-Stmp2.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv11=_mm_sub_ps(Vv11,Vtmp2);)                                   ENABLE_AVX_IMPLEMENTATION(Vv11=_mm256_sub_ps(Vv11,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv11.f=Sv11.f-Stmp3.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv11=_mm_sub_ps(Vv11,Vtmp3);)                                   ENABLE_AVX_IMPLEMENTATION(Vv11=_mm256_sub_ps(Vv11,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sqvvx.f+Sqvvx.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_add_ps(Vqvvx,Vqvvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_add_ps(Vqvvx,Vqvvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Sqvvy.f+Sqvvy.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_add_ps(Vqvvy,Vqvvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_add_ps(Vqvvy,Vqvvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Sqvvz.f+Sqvvz.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_add_ps(Vqvvz,Vqvvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_add_ps(Vqvvz,Vqvvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv32.f=Sqvs.f*Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv32=_mm_mul_ps(Vqvs,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Vv32=_mm256_mul_ps(Vqvs,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv13.f=Sqvs.f*Stmp2.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv13=_mm_mul_ps(Vqvs,Vtmp2);)                                   ENABLE_AVX_IMPLEMENTATION(Vv13=_mm256_mul_ps(Vqvs,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv21.f=Sqvs.f*Stmp3.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv21=_mm_mul_ps(Vqvs,Vtmp3);)                                   ENABLE_AVX_IMPLEMENTATION(Vv21=_mm256_mul_ps(Vqvs,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sqvvy.f*Stmp1.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vqvvy,Vtmp1);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vqvvy,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Sqvvz.f*Stmp2.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_mul_ps(Vqvvz,Vtmp2);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_mul_ps(Vqvvz,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Sqvvx.f*Stmp3.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Vqvvx,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Vqvvx,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv12.f=Stmp1.f-Sv21.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv12=_mm_sub_ps(Vtmp1,Vv21);)                                   ENABLE_AVX_IMPLEMENTATION(Vv12=_mm256_sub_ps(Vtmp1,Vv21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv23.f=Stmp2.f-Sv32.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv23=_mm_sub_ps(Vtmp2,Vv32);)                                   ENABLE_AVX_IMPLEMENTATION(Vv23=_mm256_sub_ps(Vtmp2,Vv32);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv31.f=Stmp3.f-Sv13.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv31=_mm_sub_ps(Vtmp3,Vv13);)                                   ENABLE_AVX_IMPLEMENTATION(Vv31=_mm256_sub_ps(Vtmp3,Vv13);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv21.f=Stmp1.f+Sv21.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv21=_mm_add_ps(Vtmp1,Vv21);)                                   ENABLE_AVX_IMPLEMENTATION(Vv21=_mm256_add_ps(Vtmp1,Vv21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv32.f=Stmp2.f+Sv32.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv32=_mm_add_ps(Vtmp2,Vv32);)                                   ENABLE_AVX_IMPLEMENTATION(Vv32=_mm256_add_ps(Vtmp2,Vv32);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv13.f=Stmp3.f+Sv13.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv13=_mm_add_ps(Vtmp3,Vv13);)                                   ENABLE_AVX_IMPLEMENTATION(Vv13=_mm256_add_ps(Vtmp3,Vv13);)
+
+#ifdef COMPUTE_V_AS_MATRIX
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar V ="<<std::endl;
+    std::cout<<std::setw(12)<<Sv11.f<<"  "<<std::setw(12)<<Sv12.f<<"  "<<std::setw(12)<<Sv13.f<<std::endl;
+    std::cout<<std::setw(12)<<Sv21.f<<"  "<<std::setw(12)<<Sv22.f<<"  "<<std::setw(12)<<Sv23.f<<std::endl;
+    std::cout<<std::setw(12)<<Sv31.f<<"  "<<std::setw(12)<<Sv32.f<<"  "<<std::setw(12)<<Sv33.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Vv11);V11=buf[0];
+    _mm_storeu_ps(buf,Vv21);V21=buf[0];
+    _mm_storeu_ps(buf,Vv31);V31=buf[0];
+    _mm_storeu_ps(buf,Vv12);V12=buf[0];
+    _mm_storeu_ps(buf,Vv22);V22=buf[0];
+    _mm_storeu_ps(buf,Vv32);V32=buf[0];
+    _mm_storeu_ps(buf,Vv13);V13=buf[0];
+    _mm_storeu_ps(buf,Vv23);V23=buf[0];
+    _mm_storeu_ps(buf,Vv33);V33=buf[0];
+    std::cout<<"Vector V ="<<std::endl;
+    std::cout<<std::setw(12)<<V11<<"  "<<std::setw(12)<<V12<<"  "<<std::setw(12)<<V13<<std::endl;
+    std::cout<<std::setw(12)<<V21<<"  "<<std::setw(12)<<V22<<"  "<<std::setw(12)<<V23<<std::endl;
+    std::cout<<std::setw(12)<<V31<<"  "<<std::setw(12)<<V32<<"  "<<std::setw(12)<<V33<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Vv11);V11=buf[0];
+    _mm256_storeu_ps(buf,Vv21);V21=buf[0];
+    _mm256_storeu_ps(buf,Vv31);V31=buf[0];
+    _mm256_storeu_ps(buf,Vv12);V12=buf[0];
+    _mm256_storeu_ps(buf,Vv22);V22=buf[0];
+    _mm256_storeu_ps(buf,Vv32);V32=buf[0];
+    _mm256_storeu_ps(buf,Vv13);V13=buf[0];
+    _mm256_storeu_ps(buf,Vv23);V23=buf[0];
+    _mm256_storeu_ps(buf,Vv33);V33=buf[0];
+    std::cout<<"Vector V ="<<std::endl;
+    std::cout<<std::setw(12)<<V11<<"  "<<std::setw(12)<<V12<<"  "<<std::setw(12)<<V13<<std::endl;
+    std::cout<<std::setw(12)<<V21<<"  "<<std::setw(12)<<V22<<"  "<<std::setw(12)<<V23<<std::endl;
+    std::cout<<std::setw(12)<<V31<<"  "<<std::setw(12)<<V32<<"  "<<std::setw(12)<<V33<<std::endl;
+#endif
+#endif
+#endif
+
+    //###########################################################
+    // Multiply (from the right) with V
+    //###########################################################
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Sa12.f;)                                         ENABLE_SSE_IMPLEMENTATION(Vtmp2=Va12;)                                                    ENABLE_AVX_IMPLEMENTATION(Vtmp2=Va12;)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Sa13.f;)                                         ENABLE_SSE_IMPLEMENTATION(Vtmp3=Va13;)                                                    ENABLE_AVX_IMPLEMENTATION(Vtmp3=Va13;)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa12.f=Sv12.f*Sa11.f;)                                   ENABLE_SSE_IMPLEMENTATION(Va12=_mm_mul_ps(Vv12,Va11);)                                    ENABLE_AVX_IMPLEMENTATION(Va12=_mm256_mul_ps(Vv12,Va11);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa13.f=Sv13.f*Sa11.f;)                                   ENABLE_SSE_IMPLEMENTATION(Va13=_mm_mul_ps(Vv13,Va11);)                                    ENABLE_AVX_IMPLEMENTATION(Va13=_mm256_mul_ps(Vv13,Va11);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa11.f=Sv11.f*Sa11.f;)                                   ENABLE_SSE_IMPLEMENTATION(Va11=_mm_mul_ps(Vv11,Va11);)                                    ENABLE_AVX_IMPLEMENTATION(Va11=_mm256_mul_ps(Vv11,Va11);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv21.f*Stmp2.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv21,Vtmp2);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv21,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa11.f=Sa11.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va11=_mm_add_ps(Va11,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va11=_mm256_add_ps(Va11,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv31.f*Stmp3.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv31,Vtmp3);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv31,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa11.f=Sa11.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va11=_mm_add_ps(Va11,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va11=_mm256_add_ps(Va11,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv22.f*Stmp2.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv22,Vtmp2);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv22,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa12.f=Sa12.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va12=_mm_add_ps(Va12,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va12=_mm256_add_ps(Va12,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv32.f*Stmp3.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv32,Vtmp3);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv32,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa12.f=Sa12.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va12=_mm_add_ps(Va12,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va12=_mm256_add_ps(Va12,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv23.f*Stmp2.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv23,Vtmp2);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv23,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa13.f=Sa13.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va13=_mm_add_ps(Va13,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va13=_mm256_add_ps(Va13,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv33.f*Stmp3.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv33,Vtmp3);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv33,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa13.f=Sa13.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va13=_mm_add_ps(Va13,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va13=_mm256_add_ps(Va13,Vtmp1);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Sa22.f;)                                         ENABLE_SSE_IMPLEMENTATION(Vtmp2=Va22;)                                                    ENABLE_AVX_IMPLEMENTATION(Vtmp2=Va22;)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Sa23.f;)                                         ENABLE_SSE_IMPLEMENTATION(Vtmp3=Va23;)                                                    ENABLE_AVX_IMPLEMENTATION(Vtmp3=Va23;)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa22.f=Sv12.f*Sa21.f;)                                   ENABLE_SSE_IMPLEMENTATION(Va22=_mm_mul_ps(Vv12,Va21);)                                    ENABLE_AVX_IMPLEMENTATION(Va22=_mm256_mul_ps(Vv12,Va21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa23.f=Sv13.f*Sa21.f;)                                   ENABLE_SSE_IMPLEMENTATION(Va23=_mm_mul_ps(Vv13,Va21);)                                    ENABLE_AVX_IMPLEMENTATION(Va23=_mm256_mul_ps(Vv13,Va21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa21.f=Sv11.f*Sa21.f;)                                   ENABLE_SSE_IMPLEMENTATION(Va21=_mm_mul_ps(Vv11,Va21);)                                    ENABLE_AVX_IMPLEMENTATION(Va21=_mm256_mul_ps(Vv11,Va21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv21.f*Stmp2.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv21,Vtmp2);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv21,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa21.f=Sa21.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va21=_mm_add_ps(Va21,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va21=_mm256_add_ps(Va21,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv31.f*Stmp3.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv31,Vtmp3);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv31,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa21.f=Sa21.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va21=_mm_add_ps(Va21,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va21=_mm256_add_ps(Va21,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv22.f*Stmp2.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv22,Vtmp2);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv22,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa22.f=Sa22.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va22=_mm_add_ps(Va22,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va22=_mm256_add_ps(Va22,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv32.f*Stmp3.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv32,Vtmp3);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv32,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa22.f=Sa22.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va22=_mm_add_ps(Va22,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va22=_mm256_add_ps(Va22,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv23.f*Stmp2.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv23,Vtmp2);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv23,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa23.f=Sa23.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va23=_mm_add_ps(Va23,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va23=_mm256_add_ps(Va23,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv33.f*Stmp3.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv33,Vtmp3);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv33,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa23.f=Sa23.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va23=_mm_add_ps(Va23,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va23=_mm256_add_ps(Va23,Vtmp1);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Sa32.f;)                                         ENABLE_SSE_IMPLEMENTATION(Vtmp2=Va32;)                                                    ENABLE_AVX_IMPLEMENTATION(Vtmp2=Va32;)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Sa33.f;)                                         ENABLE_SSE_IMPLEMENTATION(Vtmp3=Va33;)                                                    ENABLE_AVX_IMPLEMENTATION(Vtmp3=Va33;)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa32.f=Sv12.f*Sa31.f;)                                   ENABLE_SSE_IMPLEMENTATION(Va32=_mm_mul_ps(Vv12,Va31);)                                    ENABLE_AVX_IMPLEMENTATION(Va32=_mm256_mul_ps(Vv12,Va31);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa33.f=Sv13.f*Sa31.f;)                                   ENABLE_SSE_IMPLEMENTATION(Va33=_mm_mul_ps(Vv13,Va31);)                                    ENABLE_AVX_IMPLEMENTATION(Va33=_mm256_mul_ps(Vv13,Va31);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa31.f=Sv11.f*Sa31.f;)                                   ENABLE_SSE_IMPLEMENTATION(Va31=_mm_mul_ps(Vv11,Va31);)                                    ENABLE_AVX_IMPLEMENTATION(Va31=_mm256_mul_ps(Vv11,Va31);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv21.f*Stmp2.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv21,Vtmp2);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv21,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa31.f=Sa31.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va31=_mm_add_ps(Va31,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va31=_mm256_add_ps(Va31,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv31.f*Stmp3.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv31,Vtmp3);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv31,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa31.f=Sa31.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va31=_mm_add_ps(Va31,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va31=_mm256_add_ps(Va31,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv22.f*Stmp2.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv22,Vtmp2);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv22,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa32.f=Sa32.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va32=_mm_add_ps(Va32,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va32=_mm256_add_ps(Va32,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv32.f*Stmp3.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv32,Vtmp3);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv32,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa32.f=Sa32.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va32=_mm_add_ps(Va32,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va32=_mm256_add_ps(Va32,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv23.f*Stmp2.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv23,Vtmp2);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv23,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa33.f=Sa33.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va33=_mm_add_ps(Va33,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va33=_mm256_add_ps(Va33,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sv33.f*Stmp3.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vv33,Vtmp3);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vv33,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa33.f=Sa33.f+Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va33=_mm_add_ps(Va33,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Va33=_mm256_add_ps(Va33,Vtmp1);)
+
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar A (after multiplying with V) ="<<std::endl;
+    std::cout<<std::setw(12)<<Sa11.f<<"  "<<std::setw(12)<<Sa12.f<<"  "<<std::setw(12)<<Sa13.f<<std::endl;
+    std::cout<<std::setw(12)<<Sa21.f<<"  "<<std::setw(12)<<Sa22.f<<"  "<<std::setw(12)<<Sa23.f<<std::endl;
+    std::cout<<std::setw(12)<<Sa31.f<<"  "<<std::setw(12)<<Sa32.f<<"  "<<std::setw(12)<<Sa33.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Va11);A11=buf[0];
+    _mm_storeu_ps(buf,Va21);A21=buf[0];
+    _mm_storeu_ps(buf,Va31);A31=buf[0];
+    _mm_storeu_ps(buf,Va12);A12=buf[0];
+    _mm_storeu_ps(buf,Va22);A22=buf[0];
+    _mm_storeu_ps(buf,Va32);A32=buf[0];
+    _mm_storeu_ps(buf,Va13);A13=buf[0];
+    _mm_storeu_ps(buf,Va23);A23=buf[0];
+    _mm_storeu_ps(buf,Va33);A33=buf[0];
+    std::cout<<"Vector A (after multiplying with V) ="<<std::endl;
+    std::cout<<std::setw(12)<<A11<<"  "<<std::setw(12)<<A12<<"  "<<std::setw(12)<<A13<<std::endl;
+    std::cout<<std::setw(12)<<A21<<"  "<<std::setw(12)<<A22<<"  "<<std::setw(12)<<A23<<std::endl;
+    std::cout<<std::setw(12)<<A31<<"  "<<std::setw(12)<<A32<<"  "<<std::setw(12)<<A33<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Va11);A11=buf[0];
+    _mm256_storeu_ps(buf,Va21);A21=buf[0];
+    _mm256_storeu_ps(buf,Va31);A31=buf[0];
+    _mm256_storeu_ps(buf,Va12);A12=buf[0];
+    _mm256_storeu_ps(buf,Va22);A22=buf[0];
+    _mm256_storeu_ps(buf,Va32);A32=buf[0];
+    _mm256_storeu_ps(buf,Va13);A13=buf[0];
+    _mm256_storeu_ps(buf,Va23);A23=buf[0];
+    _mm256_storeu_ps(buf,Va33);A33=buf[0];
+    std::cout<<"Vector A (after multiplying with V) ="<<std::endl;
+    std::cout<<std::setw(12)<<A11<<"  "<<std::setw(12)<<A12<<"  "<<std::setw(12)<<A13<<std::endl;
+    std::cout<<std::setw(12)<<A21<<"  "<<std::setw(12)<<A22<<"  "<<std::setw(12)<<A23<<std::endl;
+    std::cout<<std::setw(12)<<A31<<"  "<<std::setw(12)<<A32<<"  "<<std::setw(12)<<A33<<std::endl;
+#endif
+#endif
+
+} // End block : Conjugation with V
+
+} // End block : Scope of qV (if not maintained)
+
+    //###########################################################
+    // Permute columns such that the singular values are sorted
+    //###########################################################
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sa11.f*Sa11.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Va11,Va11);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Va11,Va11);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Sa21.f*Sa21.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Va21,Va21);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Va21,Va21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Stmp1.f+Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_add_ps(Vtmp1,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_add_ps(Vtmp1,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Sa31.f*Sa31.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Va31,Va31);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Va31,Va31);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Stmp1.f+Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_add_ps(Vtmp1,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_add_ps(Vtmp1,Vtmp4);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Sa12.f*Sa12.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_mul_ps(Va12,Va12);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_mul_ps(Va12,Va12);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Sa22.f*Sa22.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Va22,Va22);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Va22,Va22);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Stmp2.f+Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_add_ps(Vtmp2,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_add_ps(Vtmp2,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Sa32.f*Sa32.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Va32,Va32);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Va32,Va32);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Stmp2.f+Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_add_ps(Vtmp2,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_add_ps(Vtmp2,Vtmp4);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Sa13.f*Sa13.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Va13,Va13);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Va13,Va13);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Sa23.f*Sa23.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Va23,Va23);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Va23,Va23);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Stmp3.f+Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_add_ps(Vtmp3,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_add_ps(Vtmp3,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Sa33.f*Sa33.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Va33,Va33);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Va33,Va33);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Stmp3.f+Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_add_ps(Vtmp3,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_add_ps(Vtmp3,Vtmp4);)
+
+    // Swap columns 1-2 if necessary
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.ui=(Stmp1.f<Stmp2.f)?0xffffffff:0;)                ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_cmplt_ps(Vtmp1,Vtmp2);)                               ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_cmp_ps(Vtmp1,Vtmp2, _CMP_LT_OS);) //ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_cmplt_ps(Vtmp1,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sa11.ui^Sa12.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Va11,Va12);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Va11,Va12);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa11.ui=Sa11.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va11=_mm_xor_ps(Va11,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va11=_mm256_xor_ps(Va11,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa12.ui=Sa12.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va12=_mm_xor_ps(Va12,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va12=_mm256_xor_ps(Va12,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sa21.ui^Sa22.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Va21,Va22);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Va21,Va22);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa21.ui=Sa21.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va21=_mm_xor_ps(Va21,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va21=_mm256_xor_ps(Va21,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa22.ui=Sa22.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va22=_mm_xor_ps(Va22,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va22=_mm256_xor_ps(Va22,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sa31.ui^Sa32.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Va31,Va32);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Va31,Va32);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa31.ui=Sa31.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va31=_mm_xor_ps(Va31,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va31=_mm256_xor_ps(Va31,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa32.ui=Sa32.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va32=_mm_xor_ps(Va32,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va32=_mm256_xor_ps(Va32,Vtmp5);)
+
+#ifdef COMPUTE_V_AS_MATRIX
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sv11.ui^Sv12.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vv11,Vv12);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vv11,Vv12);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv11.ui=Sv11.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv11=_mm_xor_ps(Vv11,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv11=_mm256_xor_ps(Vv11,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv12.ui=Sv12.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv12=_mm_xor_ps(Vv12,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv12=_mm256_xor_ps(Vv12,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sv21.ui^Sv22.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vv21,Vv22);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vv21,Vv22);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv21.ui=Sv21.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv21=_mm_xor_ps(Vv21,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv21=_mm256_xor_ps(Vv21,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv22.ui=Sv22.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv22=_mm_xor_ps(Vv22,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv22=_mm256_xor_ps(Vv22,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sv31.ui^Sv32.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vv31,Vv32);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vv31,Vv32);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv31.ui=Sv31.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv31=_mm_xor_ps(Vv31,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv31=_mm256_xor_ps(Vv31,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv32.ui=Sv32.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv32=_mm_xor_ps(Vv32,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv32=_mm256_xor_ps(Vv32,Vtmp5);)
+#endif
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp1.ui^Stmp2.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vtmp1,Vtmp2);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vtmp1,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.ui=Stmp1.ui^Stmp5.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_xor_ps(Vtmp1,Vtmp5);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_xor_ps(Vtmp1,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.ui=Stmp2.ui^Stmp5.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_xor_ps(Vtmp2,Vtmp5);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_xor_ps(Vtmp2,Vtmp5);)
+
+    // If columns 1-2 have been swapped, negate 2nd column of A and V so that V is still a rotation
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=-2.;)                                            ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_set1_ps(-2.);)                                        ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_set1_ps(-2.);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=1.;)                                             ENABLE_SSE_IMPLEMENTATION(Vtmp4=Vone;)                                                    ENABLE_AVX_IMPLEMENTATION(Vtmp4=Vone;)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp4.f+Stmp5.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_add_ps(Vtmp4,Vtmp5);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_add_ps(Vtmp4,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Sa12.f=Sa12.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va12=_mm_mul_ps(Va12,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Va12=_mm256_mul_ps(Va12,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa22.f=Sa22.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va22=_mm_mul_ps(Va22,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Va22=_mm256_mul_ps(Va22,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa32.f=Sa32.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va32=_mm_mul_ps(Va32,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Va32=_mm256_mul_ps(Va32,Vtmp4);)
+
+#ifdef COMPUTE_V_AS_MATRIX
+    ENABLE_SCALAR_IMPLEMENTATION(Sv12.f=Sv12.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv12=_mm_mul_ps(Vv12,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vv12=_mm256_mul_ps(Vv12,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv22.f=Sv22.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv22=_mm_mul_ps(Vv22,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vv22=_mm256_mul_ps(Vv22,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv32.f=Sv32.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv32=_mm_mul_ps(Vv32,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vv32=_mm256_mul_ps(Vv32,Vtmp4);)
+#endif
+
+    // If columns 1-2 have been swapped, also update quaternion representation of V (the quaternion may become un-normalized after this)
+
+#ifdef COMPUTE_V_AS_QUATERNION
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp4.f*Sone_half.f;)                            ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Vtmp4,Vone_half);)                             ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Vtmp4,Vone_half);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp4.f-Sone_half.f;)                            ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_sub_ps(Vtmp4,Vone_half);)                             ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_sub_ps(Vtmp4,Vone_half);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp4.f*Sqvvz.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_mul_ps(Vtmp4,Vqvvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_mul_ps(Vtmp4,Vqvvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp5.f+Sqvs.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_add_ps(Vtmp5,Vqvs);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_add_ps(Vtmp5,Vqvs);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvs.f=Sqvs.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vqvs=_mm_mul_ps(Vqvs,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vqvs=_mm256_mul_ps(Vqvs,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvz.f=Sqvvz.f-Sqvs.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vqvvz=_mm_sub_ps(Vqvvz,Vqvs);)                                  ENABLE_AVX_IMPLEMENTATION(Vqvvz=_mm256_sub_ps(Vqvvz,Vqvs);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvs.f=Stmp5.f;)                                         ENABLE_SSE_IMPLEMENTATION(Vqvs=Vtmp5;)                                                    ENABLE_AVX_IMPLEMENTATION(Vqvs=Vtmp5;)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp4.f*Sqvvx.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_mul_ps(Vtmp4,Vqvvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_mul_ps(Vtmp4,Vqvvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp5.f+Sqvvy.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_add_ps(Vtmp5,Vqvvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_add_ps(Vtmp5,Vqvvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvy.f=Sqvvy.f*Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvy=_mm_mul_ps(Vqvvy,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvy=_mm256_mul_ps(Vqvvy,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvx.f=Sqvvx.f-Sqvvy.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvx=_mm_sub_ps(Vqvvx,Vqvvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvx=_mm256_sub_ps(Vqvvx,Vqvvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvy.f=Stmp5.f;)                                        ENABLE_SSE_IMPLEMENTATION(Vqvvy=Vtmp5;)                                                   ENABLE_AVX_IMPLEMENTATION(Vqvvy=Vtmp5;)
+#endif
+
+    // Swap columns 1-3 if necessary
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.ui=(Stmp1.f<Stmp3.f)?0xffffffff:0;)                ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_cmplt_ps(Vtmp1,Vtmp3);)                               ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_cmp_ps(Vtmp1,Vtmp3, _CMP_LT_OS);) //ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_cmplt_ps(Vtmp1,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sa11.ui^Sa13.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Va11,Va13);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Va11,Va13);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa11.ui=Sa11.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va11=_mm_xor_ps(Va11,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va11=_mm256_xor_ps(Va11,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa13.ui=Sa13.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va13=_mm_xor_ps(Va13,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va13=_mm256_xor_ps(Va13,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sa21.ui^Sa23.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Va21,Va23);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Va21,Va23);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa21.ui=Sa21.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va21=_mm_xor_ps(Va21,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va21=_mm256_xor_ps(Va21,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa23.ui=Sa23.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va23=_mm_xor_ps(Va23,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va23=_mm256_xor_ps(Va23,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sa31.ui^Sa33.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Va31,Va33);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Va31,Va33);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa31.ui=Sa31.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va31=_mm_xor_ps(Va31,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va31=_mm256_xor_ps(Va31,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa33.ui=Sa33.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va33=_mm_xor_ps(Va33,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va33=_mm256_xor_ps(Va33,Vtmp5);)
+
+#ifdef COMPUTE_V_AS_MATRIX
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sv11.ui^Sv13.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vv11,Vv13);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vv11,Vv13);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv11.ui=Sv11.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv11=_mm_xor_ps(Vv11,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv11=_mm256_xor_ps(Vv11,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv13.ui=Sv13.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv13=_mm_xor_ps(Vv13,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv13=_mm256_xor_ps(Vv13,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sv21.ui^Sv23.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vv21,Vv23);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vv21,Vv23);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv21.ui=Sv21.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv21=_mm_xor_ps(Vv21,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv21=_mm256_xor_ps(Vv21,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv23.ui=Sv23.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv23=_mm_xor_ps(Vv23,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv23=_mm256_xor_ps(Vv23,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sv31.ui^Sv33.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vv31,Vv33);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vv31,Vv33);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv31.ui=Sv31.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv31=_mm_xor_ps(Vv31,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv31=_mm256_xor_ps(Vv31,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv33.ui=Sv33.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv33=_mm_xor_ps(Vv33,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv33=_mm256_xor_ps(Vv33,Vtmp5);)
+#endif
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp1.ui^Stmp3.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vtmp1,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vtmp1,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.ui=Stmp1.ui^Stmp5.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_xor_ps(Vtmp1,Vtmp5);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_xor_ps(Vtmp1,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.ui=Stmp3.ui^Stmp5.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_xor_ps(Vtmp3,Vtmp5);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_xor_ps(Vtmp3,Vtmp5);)
+
+    // If columns 1-3 have been swapped, negate 1st column of A and V so that V is still a rotation
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=-2.;)                                            ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_set1_ps(-2.);)                                        ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_set1_ps(-2.);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=1.;)                                             ENABLE_SSE_IMPLEMENTATION(Vtmp4=Vone;)                                                    ENABLE_AVX_IMPLEMENTATION(Vtmp4=Vone;)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp4.f+Stmp5.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_add_ps(Vtmp4,Vtmp5);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_add_ps(Vtmp4,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Sa11.f=Sa11.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va11=_mm_mul_ps(Va11,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Va11=_mm256_mul_ps(Va11,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa21.f=Sa21.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va21=_mm_mul_ps(Va21,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Va21=_mm256_mul_ps(Va21,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa31.f=Sa31.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va31=_mm_mul_ps(Va31,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Va31=_mm256_mul_ps(Va31,Vtmp4);)
+
+#ifdef COMPUTE_V_AS_MATRIX
+    ENABLE_SCALAR_IMPLEMENTATION(Sv11.f=Sv11.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv11=_mm_mul_ps(Vv11,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vv11=_mm256_mul_ps(Vv11,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv21.f=Sv21.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv21=_mm_mul_ps(Vv21,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vv21=_mm256_mul_ps(Vv21,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv31.f=Sv31.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv31=_mm_mul_ps(Vv31,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vv31=_mm256_mul_ps(Vv31,Vtmp4);)
+#endif
+
+    // If columns 1-3 have been swapped, also update quaternion representation of V (the quaternion may become un-normalized after this)
+
+#ifdef COMPUTE_V_AS_QUATERNION
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp4.f*Sone_half.f;)                            ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Vtmp4,Vone_half);)                             ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Vtmp4,Vone_half);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp4.f-Sone_half.f;)                            ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_sub_ps(Vtmp4,Vone_half);)                             ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_sub_ps(Vtmp4,Vone_half);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp4.f*Sqvvy.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_mul_ps(Vtmp4,Vqvvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_mul_ps(Vtmp4,Vqvvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp5.f+Sqvs.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_add_ps(Vtmp5,Vqvs);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_add_ps(Vtmp5,Vqvs);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvs.f=Sqvs.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vqvs=_mm_mul_ps(Vqvs,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vqvs=_mm256_mul_ps(Vqvs,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvy.f=Sqvvy.f-Sqvs.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vqvvy=_mm_sub_ps(Vqvvy,Vqvs);)                                  ENABLE_AVX_IMPLEMENTATION(Vqvvy=_mm256_sub_ps(Vqvvy,Vqvs);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvs.f=Stmp5.f;)                                         ENABLE_SSE_IMPLEMENTATION(Vqvs=Vtmp5;)                                                    ENABLE_AVX_IMPLEMENTATION(Vqvs=Vtmp5;)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp4.f*Sqvvz.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_mul_ps(Vtmp4,Vqvvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_mul_ps(Vtmp4,Vqvvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp5.f+Sqvvx.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_add_ps(Vtmp5,Vqvvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_add_ps(Vtmp5,Vqvvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvx.f=Sqvvx.f*Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvx=_mm_mul_ps(Vqvvx,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvx=_mm256_mul_ps(Vqvvx,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvz.f=Sqvvz.f-Sqvvx.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvz=_mm_sub_ps(Vqvvz,Vqvvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvz=_mm256_sub_ps(Vqvvz,Vqvvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvx.f=Stmp5.f;)                                        ENABLE_SSE_IMPLEMENTATION(Vqvvx=Vtmp5;)                                                   ENABLE_AVX_IMPLEMENTATION(Vqvvx=Vtmp5;)
+#endif
+
+    // Swap columns 2-3 if necessary
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.ui=(Stmp2.f<Stmp3.f)?0xffffffff:0;)                ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_cmplt_ps(Vtmp2,Vtmp3);)                               ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_cmp_ps(Vtmp2,Vtmp3, _CMP_LT_OS);) //ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_cmplt_ps(Vtmp2,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sa12.ui^Sa13.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Va12,Va13);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Va12,Va13);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa12.ui=Sa12.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va12=_mm_xor_ps(Va12,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va12=_mm256_xor_ps(Va12,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa13.ui=Sa13.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va13=_mm_xor_ps(Va13,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va13=_mm256_xor_ps(Va13,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sa22.ui^Sa23.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Va22,Va23);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Va22,Va23);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa22.ui=Sa22.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va22=_mm_xor_ps(Va22,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va22=_mm256_xor_ps(Va22,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa23.ui=Sa23.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va23=_mm_xor_ps(Va23,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va23=_mm256_xor_ps(Va23,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sa32.ui^Sa33.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Va32,Va33);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Va32,Va33);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa32.ui=Sa32.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va32=_mm_xor_ps(Va32,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va32=_mm256_xor_ps(Va32,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa33.ui=Sa33.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Va33=_mm_xor_ps(Va33,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Va33=_mm256_xor_ps(Va33,Vtmp5);)
+
+#ifdef COMPUTE_V_AS_MATRIX
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sv12.ui^Sv13.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vv12,Vv13);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vv12,Vv13);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv12.ui=Sv12.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv12=_mm_xor_ps(Vv12,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv12=_mm256_xor_ps(Vv12,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv13.ui=Sv13.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv13=_mm_xor_ps(Vv13,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv13=_mm256_xor_ps(Vv13,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sv22.ui^Sv23.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vv22,Vv23);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vv22,Vv23);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv22.ui=Sv22.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv22=_mm_xor_ps(Vv22,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv22=_mm256_xor_ps(Vv22,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv23.ui=Sv23.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv23=_mm_xor_ps(Vv23,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv23=_mm256_xor_ps(Vv23,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Sv32.ui^Sv33.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vv32,Vv33);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vv32,Vv33);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv32.ui=Sv32.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv32=_mm_xor_ps(Vv32,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv32=_mm256_xor_ps(Vv32,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv33.ui=Sv33.ui^Stmp5.ui;)                               ENABLE_SSE_IMPLEMENTATION(Vv33=_mm_xor_ps(Vv33,Vtmp5);)                                   ENABLE_AVX_IMPLEMENTATION(Vv33=_mm256_xor_ps(Vv33,Vtmp5);)
+#endif
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp2.ui^Stmp3.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_xor_ps(Vtmp2,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_xor_ps(Vtmp2,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.ui=Stmp2.ui^Stmp5.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_xor_ps(Vtmp2,Vtmp5);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_xor_ps(Vtmp2,Vtmp5);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.ui=Stmp3.ui^Stmp5.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_xor_ps(Vtmp3,Vtmp5);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_xor_ps(Vtmp3,Vtmp5);)
+
+    // If columns 2-3 have been swapped, negate 3rd column of A and V so that V is still a rotation
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=-2.;)                                            ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_set1_ps(-2.);)                                        ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_set1_ps(-2.);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.ui=Stmp5.ui&Stmp4.ui;)                             ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_and_ps(Vtmp5,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_and_ps(Vtmp5,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=1.;)                                             ENABLE_SSE_IMPLEMENTATION(Vtmp4=Vone;)                                                    ENABLE_AVX_IMPLEMENTATION(Vtmp4=Vone;)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp4.f+Stmp5.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_add_ps(Vtmp4,Vtmp5);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_add_ps(Vtmp4,Vtmp5);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Sa13.f=Sa13.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va13=_mm_mul_ps(Va13,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Va13=_mm256_mul_ps(Va13,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa23.f=Sa23.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va23=_mm_mul_ps(Va23,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Va23=_mm256_mul_ps(Va23,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sa33.f=Sa33.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Va33=_mm_mul_ps(Va33,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Va33=_mm256_mul_ps(Va33,Vtmp4);)
+
+#ifdef COMPUTE_V_AS_MATRIX
+    ENABLE_SCALAR_IMPLEMENTATION(Sv13.f=Sv13.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv13=_mm_mul_ps(Vv13,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vv13=_mm256_mul_ps(Vv13,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv23.f=Sv23.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv23=_mm_mul_ps(Vv23,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vv23=_mm256_mul_ps(Vv23,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sv33.f=Sv33.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vv33=_mm_mul_ps(Vv33,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vv33=_mm256_mul_ps(Vv33,Vtmp4);)
+#endif
+
+    // If columns 2-3 have been swapped, also update quaternion representation of V (the quaternion may become un-normalized after this)
+
+#ifdef COMPUTE_V_AS_QUATERNION
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp4.f*Sone_half.f;)                            ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Vtmp4,Vone_half);)                             ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Vtmp4,Vone_half);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp4.f-Sone_half.f;)                            ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_sub_ps(Vtmp4,Vone_half);)                             ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_sub_ps(Vtmp4,Vone_half);)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp4.f*Sqvvx.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_mul_ps(Vtmp4,Vqvvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_mul_ps(Vtmp4,Vqvvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp5.f+Sqvs.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_add_ps(Vtmp5,Vqvs);)                                  ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_add_ps(Vtmp5,Vqvs);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvs.f=Sqvs.f*Stmp4.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vqvs=_mm_mul_ps(Vqvs,Vtmp4);)                                   ENABLE_AVX_IMPLEMENTATION(Vqvs=_mm256_mul_ps(Vqvs,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvx.f=Sqvvx.f-Sqvs.f;)                                 ENABLE_SSE_IMPLEMENTATION(Vqvvx=_mm_sub_ps(Vqvvx,Vqvs);)                                  ENABLE_AVX_IMPLEMENTATION(Vqvvx=_mm256_sub_ps(Vqvvx,Vqvs);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvs.f=Stmp5.f;)                                         ENABLE_SSE_IMPLEMENTATION(Vqvs=Vtmp5;)                                                    ENABLE_AVX_IMPLEMENTATION(Vqvs=Vtmp5;)
+
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp4.f*Sqvvy.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_mul_ps(Vtmp4,Vqvvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_mul_ps(Vtmp4,Vqvvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp5.f=Stmp5.f+Sqvvz.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp5=_mm_add_ps(Vtmp5,Vqvvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp5=_mm256_add_ps(Vtmp5,Vqvvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvz.f=Sqvvz.f*Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvz=_mm_mul_ps(Vqvvz,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvz=_mm256_mul_ps(Vqvvz,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvy.f=Sqvvy.f-Sqvvz.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvy=_mm_sub_ps(Vqvvy,Vqvvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvy=_mm256_sub_ps(Vqvvy,Vqvvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvz.f=Stmp5.f;)                                        ENABLE_SSE_IMPLEMENTATION(Vqvvz=Vtmp5;)                                                   ENABLE_AVX_IMPLEMENTATION(Vqvvz=Vtmp5;)
+#endif
+
+#ifdef COMPUTE_V_AS_MATRIX
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar V ="<<std::endl;
+    std::cout<<std::setw(12)<<Sv11.f<<"  "<<std::setw(12)<<Sv12.f<<"  "<<std::setw(12)<<Sv13.f<<std::endl;
+    std::cout<<std::setw(12)<<Sv21.f<<"  "<<std::setw(12)<<Sv22.f<<"  "<<std::setw(12)<<Sv23.f<<std::endl;
+    std::cout<<std::setw(12)<<Sv31.f<<"  "<<std::setw(12)<<Sv32.f<<"  "<<std::setw(12)<<Sv33.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Vv11);V11=buf[0];
+    _mm_storeu_ps(buf,Vv21);V21=buf[0];
+    _mm_storeu_ps(buf,Vv31);V31=buf[0];
+    _mm_storeu_ps(buf,Vv12);V12=buf[0];
+    _mm_storeu_ps(buf,Vv22);V22=buf[0];
+    _mm_storeu_ps(buf,Vv32);V32=buf[0];
+    _mm_storeu_ps(buf,Vv13);V13=buf[0];
+    _mm_storeu_ps(buf,Vv23);V23=buf[0];
+    _mm_storeu_ps(buf,Vv33);V33=buf[0];
+    std::cout<<"Vector V ="<<std::endl;
+    std::cout<<std::setw(12)<<V11<<"  "<<std::setw(12)<<V12<<"  "<<std::setw(12)<<V13<<std::endl;
+    std::cout<<std::setw(12)<<V21<<"  "<<std::setw(12)<<V22<<"  "<<std::setw(12)<<V23<<std::endl;
+    std::cout<<std::setw(12)<<V31<<"  "<<std::setw(12)<<V32<<"  "<<std::setw(12)<<V33<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Vv11);V11=buf[0];
+    _mm256_storeu_ps(buf,Vv21);V21=buf[0];
+    _mm256_storeu_ps(buf,Vv31);V31=buf[0];
+    _mm256_storeu_ps(buf,Vv12);V12=buf[0];
+    _mm256_storeu_ps(buf,Vv22);V22=buf[0];
+    _mm256_storeu_ps(buf,Vv32);V32=buf[0];
+    _mm256_storeu_ps(buf,Vv13);V13=buf[0];
+    _mm256_storeu_ps(buf,Vv23);V23=buf[0];
+    _mm256_storeu_ps(buf,Vv33);V33=buf[0];
+    std::cout<<"Vector V ="<<std::endl;
+    std::cout<<std::setw(12)<<V11<<"  "<<std::setw(12)<<V12<<"  "<<std::setw(12)<<V13<<std::endl;
+    std::cout<<std::setw(12)<<V21<<"  "<<std::setw(12)<<V22<<"  "<<std::setw(12)<<V23<<std::endl;
+    std::cout<<std::setw(12)<<V31<<"  "<<std::setw(12)<<V32<<"  "<<std::setw(12)<<V33<<std::endl;
+#endif
+#endif
+#endif
+
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar A (after multiplying with V) ="<<std::endl;
+    std::cout<<std::setw(12)<<Sa11.f<<"  "<<std::setw(12)<<Sa12.f<<"  "<<std::setw(12)<<Sa13.f<<std::endl;
+    std::cout<<std::setw(12)<<Sa21.f<<"  "<<std::setw(12)<<Sa22.f<<"  "<<std::setw(12)<<Sa23.f<<std::endl;
+    std::cout<<std::setw(12)<<Sa31.f<<"  "<<std::setw(12)<<Sa32.f<<"  "<<std::setw(12)<<Sa33.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Va11);A11=buf[0];
+    _mm_storeu_ps(buf,Va21);A21=buf[0];
+    _mm_storeu_ps(buf,Va31);A31=buf[0];
+    _mm_storeu_ps(buf,Va12);A12=buf[0];
+    _mm_storeu_ps(buf,Va22);A22=buf[0];
+    _mm_storeu_ps(buf,Va32);A32=buf[0];
+    _mm_storeu_ps(buf,Va13);A13=buf[0];
+    _mm_storeu_ps(buf,Va23);A23=buf[0];
+    _mm_storeu_ps(buf,Va33);A33=buf[0];
+    std::cout<<"Vector A (after multiplying with V) ="<<std::endl;
+    std::cout<<std::setw(12)<<A11<<"  "<<std::setw(12)<<A12<<"  "<<std::setw(12)<<A13<<std::endl;
+    std::cout<<std::setw(12)<<A21<<"  "<<std::setw(12)<<A22<<"  "<<std::setw(12)<<A23<<std::endl;
+    std::cout<<std::setw(12)<<A31<<"  "<<std::setw(12)<<A32<<"  "<<std::setw(12)<<A33<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Va11);A11=buf[0];
+    _mm256_storeu_ps(buf,Va21);A21=buf[0];
+    _mm256_storeu_ps(buf,Va31);A31=buf[0];
+    _mm256_storeu_ps(buf,Va12);A12=buf[0];
+    _mm256_storeu_ps(buf,Va22);A22=buf[0];
+    _mm256_storeu_ps(buf,Va32);A32=buf[0];
+    _mm256_storeu_ps(buf,Va13);A13=buf[0];
+    _mm256_storeu_ps(buf,Va23);A23=buf[0];
+    _mm256_storeu_ps(buf,Va33);A33=buf[0];
+    std::cout<<"Vector A (after multiplying with V) ="<<std::endl;
+    std::cout<<std::setw(12)<<A11<<"  "<<std::setw(12)<<A12<<"  "<<std::setw(12)<<A13<<std::endl;
+    std::cout<<std::setw(12)<<A21<<"  "<<std::setw(12)<<A22<<"  "<<std::setw(12)<<A23<<std::endl;
+    std::cout<<std::setw(12)<<A31<<"  "<<std::setw(12)<<A32<<"  "<<std::setw(12)<<A33<<std::endl;
+#endif
+#endif
+
+    //###########################################################
+    // Re-normalize quaternion for matrix V
+    //###########################################################
+
+#ifdef COMPUTE_V_AS_QUATERNION
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Sqvs.f*Sqvs.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_mul_ps(Vqvs,Vqvs);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_mul_ps(Vqvs,Vqvs);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sqvvx.f*Sqvvx.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vqvvx,Vqvvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vqvvx,Vqvvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Stmp1.f+Stmp2.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_add_ps(Vtmp1,Vtmp2);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_add_ps(Vtmp1,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sqvvy.f*Sqvvy.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vqvvy,Vqvvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vqvvy,Vqvvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Stmp1.f+Stmp2.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_add_ps(Vtmp1,Vtmp2);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_add_ps(Vtmp1,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Sqvvz.f*Sqvvz.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vqvvz,Vqvvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vqvvz,Vqvvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Stmp1.f+Stmp2.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_add_ps(Vtmp1,Vtmp2);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_add_ps(Vtmp1,Vtmp2);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=rsqrt(Stmp2.f);)                                 ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_rsqrt_ps(Vtmp2);)                                     ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_rsqrt_ps(Vtmp2);)
+
+#ifdef PERFORM_STRICT_QUATERNION_RENORMALIZATION
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp4.f=Stmp1.f*Sone_half.f;)                            ENABLE_SSE_IMPLEMENTATION(Vtmp4=_mm_mul_ps(Vtmp1,Vone_half);)                             ENABLE_AVX_IMPLEMENTATION(Vtmp4=_mm256_mul_ps(Vtmp1,Vone_half);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Stmp1.f*Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Vtmp1,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Vtmp1,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Stmp1.f*Stmp3.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Vtmp1,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Vtmp1,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Stmp2.f*Stmp3.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Vtmp2,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Vtmp2,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Stmp1.f+Stmp4.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_add_ps(Vtmp1,Vtmp4);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_add_ps(Vtmp1,Vtmp4);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Stmp1.f-Stmp3.f;)                                ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_sub_ps(Vtmp1,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_sub_ps(Vtmp1,Vtmp3);)
+#endif
+
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvs.f=Sqvs.f*Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vqvs=_mm_mul_ps(Vqvs,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Vqvs=_mm256_mul_ps(Vqvs,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvx.f=Sqvvx.f*Stmp1.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvx=_mm_mul_ps(Vqvvx,Vtmp1);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvx=_mm256_mul_ps(Vqvvx,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvy.f=Sqvvy.f*Stmp1.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvy=_mm_mul_ps(Vqvvy,Vtmp1);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvy=_mm256_mul_ps(Vqvvy,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Sqvvz.f=Sqvvz.f*Stmp1.f;)                                ENABLE_SSE_IMPLEMENTATION(Vqvvz=_mm_mul_ps(Vqvvz,Vtmp1);)                                 ENABLE_AVX_IMPLEMENTATION(Vqvvz=_mm256_mul_ps(Vqvvz,Vtmp1);)
+
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar qV ="<<std::endl;
+    std::cout<<std::setw(12)<<Sqvs.f<<"  "<<std::setw(12)<<Sqvvx.f<<"  "<<std::setw(12)<<Sqvvy.f<<"  "<<std::setw(12)<<Sqvvz.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Vqvs);QVS=buf[0];
+    _mm_storeu_ps(buf,Vqvvx);QVVX=buf[0];
+    _mm_storeu_ps(buf,Vqvvy);QVVY=buf[0];
+    _mm_storeu_ps(buf,Vqvvz);QVVZ=buf[0];
+    std::cout<<"Vector qV ="<<std::endl;
+    std::cout<<std::setw(12)<<QVS<<"  "<<std::setw(12)<<QVVX<<"  "<<std::setw(12)<<QVVY<<"  "<<std::setw(12)<<QVVZ<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Vqvs);QVS=buf[0];
+    _mm256_storeu_ps(buf,Vqvvx);QVVX=buf[0];
+    _mm256_storeu_ps(buf,Vqvvy);QVVY=buf[0];
+    _mm256_storeu_ps(buf,Vqvvz);QVVZ=buf[0];
+    std::cout<<"Vector qV ="<<std::endl;
+    std::cout<<std::setw(12)<<QVS<<"  "<<std::setw(12)<<QVVX<<"  "<<std::setw(12)<<QVVY<<"  "<<std::setw(12)<<QVVZ<<std::endl;
+#endif
+#endif
+#endif
+
+    //###########################################################
+    // Construct QR factorization of A*V (=U*D) using Givens rotations
+    //###########################################################
+
+#ifdef COMPUTE_U_AS_MATRIX
+    ENABLE_SCALAR_IMPLEMENTATION(Su11.f=1.;)                                              ENABLE_SSE_IMPLEMENTATION(Vu11=Vone;)                                                     ENABLE_AVX_IMPLEMENTATION(Vu11=Vone;)
+    ENABLE_SCALAR_IMPLEMENTATION(Su21.f=0.;)                                              ENABLE_SSE_IMPLEMENTATION(Vu21=_mm_xor_ps(Vu21,Vu21);)                                    ENABLE_AVX_IMPLEMENTATION(Vu21=_mm256_xor_ps(Vu21,Vu21);)
+    ENABLE_SCALAR_IMPLEMENTATION(Su31.f=0.;)                                              ENABLE_SSE_IMPLEMENTATION(Vu31=_mm_xor_ps(Vu31,Vu31);)                                    ENABLE_AVX_IMPLEMENTATION(Vu31=_mm256_xor_ps(Vu31,Vu31);)
+    ENABLE_SCALAR_IMPLEMENTATION(Su12.f=0.;)                                              ENABLE_SSE_IMPLEMENTATION(Vu12=_mm_xor_ps(Vu12,Vu12);)                                    ENABLE_AVX_IMPLEMENTATION(Vu12=_mm256_xor_ps(Vu12,Vu12);)
+    ENABLE_SCALAR_IMPLEMENTATION(Su22.f=1.;)                                              ENABLE_SSE_IMPLEMENTATION(Vu22=Vone;)                                                     ENABLE_AVX_IMPLEMENTATION(Vu22=Vone;)
+    ENABLE_SCALAR_IMPLEMENTATION(Su32.f=0.;)                                              ENABLE_SSE_IMPLEMENTATION(Vu32=_mm_xor_ps(Vu32,Vu32);)                                    ENABLE_AVX_IMPLEMENTATION(Vu32=_mm256_xor_ps(Vu32,Vu32);)
+    ENABLE_SCALAR_IMPLEMENTATION(Su13.f=0.;)                                              ENABLE_SSE_IMPLEMENTATION(Vu13=_mm_xor_ps(Vu13,Vu13);)                                    ENABLE_AVX_IMPLEMENTATION(Vu13=_mm256_xor_ps(Vu13,Vu13);)
+    ENABLE_SCALAR_IMPLEMENTATION(Su23.f=0.;)                                              ENABLE_SSE_IMPLEMENTATION(Vu23=_mm_xor_ps(Vu23,Vu23);)                                    ENABLE_AVX_IMPLEMENTATION(Vu23=_mm256_xor_ps(Vu23,Vu23);)
+    ENABLE_SCALAR_IMPLEMENTATION(Su33.f=1.;)                                              ENABLE_SSE_IMPLEMENTATION(Vu33=Vone;)                                                     ENABLE_AVX_IMPLEMENTATION(Vu33=Vone;)
+#endif
+
+#ifdef COMPUTE_U_AS_QUATERNION
+    ENABLE_SCALAR_IMPLEMENTATION(Squs.f=1.;)                                              ENABLE_SSE_IMPLEMENTATION(Vqus=Vone;)                                                     ENABLE_AVX_IMPLEMENTATION(Vqus=Vone;)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvx.f=0.;)                                             ENABLE_SSE_IMPLEMENTATION(Vquvx=_mm_xor_ps(Vquvx,Vquvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vquvx=_mm256_xor_ps(Vquvx,Vquvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvy.f=0.;)                                             ENABLE_SSE_IMPLEMENTATION(Vquvy=_mm_xor_ps(Vquvy,Vquvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vquvy=_mm256_xor_ps(Vquvy,Vquvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvz.f=0.;)                                             ENABLE_SSE_IMPLEMENTATION(Vquvz=_mm_xor_ps(Vquvz,Vquvz);)                                 ENABLE_AVX_IMPLEMENTATION(Vquvz=_mm256_xor_ps(Vquvz,Vquvz);)
+#endif
+
+    // First Givens rotation
+
+#define SAPIVOT Sa11
+#define SANPIVOT Sa21
+#define SA11 Sa11
+#define SA21 Sa21
+#define SA12 Sa12
+#define SA22 Sa22
+#define SA13 Sa13
+#define SA23 Sa23
+#define SU11 Su11
+#define SU12 Su12
+#define SU21 Su21
+#define SU22 Su22
+#define SU31 Su31
+#define SU32 Su32
+
+#define VAPIVOT Va11
+#define VANPIVOT Va21
+#define VA11 Va11
+#define VA21 Va21
+#define VA12 Va12
+#define VA22 Va22
+#define VA13 Va13
+#define VA23 Va23
+#define VU11 Vu11
+#define VU12 Vu12
+#define VU21 Vu21
+#define VU22 Vu22
+#define VU31 Vu31
+#define VU32 Vu32
+
+#include "Singular_Value_Decomposition_Givens_QR_Factorization_Kernel.hpp"
+    
+#undef SAPIVOT
+#undef SANPIVOT
+#undef SA11
+#undef SA21
+#undef SA12
+#undef SA22
+#undef SA13
+#undef SA23
+#undef SU11
+#undef SU12
+#undef SU21
+#undef SU22
+#undef SU31
+#undef SU32
+
+#undef VAPIVOT
+#undef VANPIVOT
+#undef VA11
+#undef VA21
+#undef VA12
+#undef VA22
+#undef VA13
+#undef VA23
+#undef VU11
+#undef VU12
+#undef VU21
+#undef VU22
+#undef VU31
+#undef VU32
+
+    // Update quaternion representation of U
+
+#ifdef COMPUTE_U_AS_QUATERNION
+    ENABLE_SCALAR_IMPLEMENTATION(Squs.f=Sch.f;)                                           ENABLE_SSE_IMPLEMENTATION(Vqus=Vch;)                                                      ENABLE_AVX_IMPLEMENTATION(Vqus=Vch;)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvx.f=0.;)                                             ENABLE_SSE_IMPLEMENTATION(Vquvx=_mm_xor_ps(Vquvx,Vquvx);)                                 ENABLE_AVX_IMPLEMENTATION(Vquvx=_mm256_xor_ps(Vquvx,Vquvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvy.f=0.;)                                             ENABLE_SSE_IMPLEMENTATION(Vquvy=_mm_xor_ps(Vquvy,Vquvy);)                                 ENABLE_AVX_IMPLEMENTATION(Vquvy=_mm256_xor_ps(Vquvy,Vquvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvz.f=Ssh.f;)                                          ENABLE_SSE_IMPLEMENTATION(Vquvz=Vsh;)                                                     ENABLE_AVX_IMPLEMENTATION(Vquvz=Vsh;)
+#endif
+
+    // Second Givens rotation
+
+#define SAPIVOT Sa11
+#define SANPIVOT Sa31
+#define SA11 Sa11
+#define SA21 Sa31
+#define SA12 Sa12
+#define SA22 Sa32
+#define SA13 Sa13
+#define SA23 Sa33
+#define SU11 Su11
+#define SU12 Su13
+#define SU21 Su21
+#define SU22 Su23
+#define SU31 Su31
+#define SU32 Su33
+
+#define VAPIVOT Va11
+#define VANPIVOT Va31
+#define VA11 Va11
+#define VA21 Va31
+#define VA12 Va12
+#define VA22 Va32
+#define VA13 Va13
+#define VA23 Va33
+#define VU11 Vu11
+#define VU12 Vu13
+#define VU21 Vu21
+#define VU22 Vu23
+#define VU31 Vu31
+#define VU32 Vu33
+
+#include "Singular_Value_Decomposition_Givens_QR_Factorization_Kernel.hpp"
+    
+#undef SAPIVOT
+#undef SANPIVOT
+#undef SA11
+#undef SA21
+#undef SA12
+#undef SA22
+#undef SA13
+#undef SA23
+#undef SU11
+#undef SU12
+#undef SU21
+#undef SU22
+#undef SU31
+#undef SU32
+
+#undef VAPIVOT
+#undef VANPIVOT
+#undef VA11
+#undef VA21
+#undef VA12
+#undef VA22
+#undef VA13
+#undef VA23
+#undef VU11
+#undef VU12
+#undef VU21
+#undef VU22
+#undef VU31
+#undef VU32
+
+    // Update quaternion representation of U
+
+#ifdef COMPUTE_U_AS_QUATERNION
+    ENABLE_SCALAR_IMPLEMENTATION(Squvx.f=Ssh.f*Squvz.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vquvx=_mm_mul_ps(Vsh,Vquvz);)                                   ENABLE_AVX_IMPLEMENTATION(Vquvx=_mm256_mul_ps(Vsh,Vquvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ssh.f=Ssh.f*Squs.f;)                                     ENABLE_SSE_IMPLEMENTATION(Vsh=_mm_mul_ps(Vsh,Vqus);)                                      ENABLE_AVX_IMPLEMENTATION(Vsh=_mm256_mul_ps(Vsh,Vqus);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvy.f=Squvy.f-Ssh.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vquvy=_mm_sub_ps(Vquvy,Vsh);)                                   ENABLE_AVX_IMPLEMENTATION(Vquvy=_mm256_sub_ps(Vquvy,Vsh);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squs.f=Sch.f*Squs.f;)                                    ENABLE_SSE_IMPLEMENTATION(Vqus=_mm_mul_ps(Vch,Vqus);)                                     ENABLE_AVX_IMPLEMENTATION(Vqus=_mm256_mul_ps(Vch,Vqus);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvz.f=Sch.f*Squvz.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vquvz=_mm_mul_ps(Vch,Vquvz);)                                   ENABLE_AVX_IMPLEMENTATION(Vquvz=_mm256_mul_ps(Vch,Vquvz);)
+#endif
+
+    // Third Givens rotation
+
+#define SAPIVOT Sa22
+#define SANPIVOT Sa32
+#define SA11 Sa21
+#define SA21 Sa31
+#define SA12 Sa22
+#define SA22 Sa32
+#define SA13 Sa23
+#define SA23 Sa33
+#define SU11 Su12
+#define SU12 Su13
+#define SU21 Su22
+#define SU22 Su23
+#define SU31 Su32
+#define SU32 Su33
+
+#define VAPIVOT Va22
+#define VANPIVOT Va32
+#define VA11 Va21
+#define VA21 Va31
+#define VA12 Va22
+#define VA22 Va32
+#define VA13 Va23
+#define VA23 Va33
+#define VU11 Vu12
+#define VU12 Vu13
+#define VU21 Vu22
+#define VU22 Vu23
+#define VU31 Vu32
+#define VU32 Vu33
+
+#include "Singular_Value_Decomposition_Givens_QR_Factorization_Kernel.hpp"
+    
+#undef SAPIVOT
+#undef SANPIVOT
+#undef SA11
+#undef SA21
+#undef SA12
+#undef SA22
+#undef SA13
+#undef SA23
+#undef SU11
+#undef SU12
+#undef SU21
+#undef SU22
+#undef SU31
+#undef SU32
+
+#undef VAPIVOT
+#undef VANPIVOT
+#undef VA11
+#undef VA21
+#undef VA12
+#undef VA22
+#undef VA13
+#undef VA23
+#undef VU11
+#undef VU12
+#undef VU21
+#undef VU22
+#undef VU31
+#undef VU32
+
+    // Update quaternion representation of U
+
+#ifdef COMPUTE_U_AS_QUATERNION
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp1.f=Ssh.f*Squvx.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp1=_mm_mul_ps(Vsh,Vquvx);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp1=_mm256_mul_ps(Vsh,Vquvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp2.f=Ssh.f*Squvy.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp2=_mm_mul_ps(Vsh,Vquvy);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp2=_mm256_mul_ps(Vsh,Vquvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Stmp3.f=Ssh.f*Squvz.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vtmp3=_mm_mul_ps(Vsh,Vquvz);)                                   ENABLE_AVX_IMPLEMENTATION(Vtmp3=_mm256_mul_ps(Vsh,Vquvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Ssh.f=Ssh.f*Squs.f;)                                     ENABLE_SSE_IMPLEMENTATION(Vsh=_mm_mul_ps(Vsh,Vqus);)                                      ENABLE_AVX_IMPLEMENTATION(Vsh=_mm256_mul_ps(Vsh,Vqus);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squs.f=Sch.f*Squs.f;)                                    ENABLE_SSE_IMPLEMENTATION(Vqus=_mm_mul_ps(Vch,Vqus);)                                     ENABLE_AVX_IMPLEMENTATION(Vqus=_mm256_mul_ps(Vch,Vqus);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvx.f=Sch.f*Squvx.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vquvx=_mm_mul_ps(Vch,Vquvx);)                                   ENABLE_AVX_IMPLEMENTATION(Vquvx=_mm256_mul_ps(Vch,Vquvx);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvy.f=Sch.f*Squvy.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vquvy=_mm_mul_ps(Vch,Vquvy);)                                   ENABLE_AVX_IMPLEMENTATION(Vquvy=_mm256_mul_ps(Vch,Vquvy);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvz.f=Sch.f*Squvz.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vquvz=_mm_mul_ps(Vch,Vquvz);)                                   ENABLE_AVX_IMPLEMENTATION(Vquvz=_mm256_mul_ps(Vch,Vquvz);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvx.f=Squvx.f+Ssh.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vquvx=_mm_add_ps(Vquvx,Vsh);)                                   ENABLE_AVX_IMPLEMENTATION(Vquvx=_mm256_add_ps(Vquvx,Vsh);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squs.f=Squs.f-Stmp1.f;)                                  ENABLE_SSE_IMPLEMENTATION(Vqus=_mm_sub_ps(Vqus,Vtmp1);)                                   ENABLE_AVX_IMPLEMENTATION(Vqus=_mm256_sub_ps(Vqus,Vtmp1);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvy.f=Squvy.f+Stmp3.f;)                                ENABLE_SSE_IMPLEMENTATION(Vquvy=_mm_add_ps(Vquvy,Vtmp3);)                                 ENABLE_AVX_IMPLEMENTATION(Vquvy=_mm256_add_ps(Vquvy,Vtmp3);)
+    ENABLE_SCALAR_IMPLEMENTATION(Squvz.f=Squvz.f-Stmp2.f;)                                ENABLE_SSE_IMPLEMENTATION(Vquvz=_mm_sub_ps(Vquvz,Vtmp2);)                                 ENABLE_AVX_IMPLEMENTATION(Vquvz=_mm256_sub_ps(Vquvz,Vtmp2);)
+#endif
+
+#ifdef COMPUTE_U_AS_MATRIX
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar U ="<<std::endl;
+    std::cout<<std::setw(12)<<Su11.f<<"  "<<std::setw(12)<<Su12.f<<"  "<<std::setw(12)<<Su13.f<<std::endl;
+    std::cout<<std::setw(12)<<Su21.f<<"  "<<std::setw(12)<<Su22.f<<"  "<<std::setw(12)<<Su23.f<<std::endl;
+    std::cout<<std::setw(12)<<Su31.f<<"  "<<std::setw(12)<<Su32.f<<"  "<<std::setw(12)<<Su33.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Vu11);U11=buf[0];
+    _mm_storeu_ps(buf,Vu21);U21=buf[0];
+    _mm_storeu_ps(buf,Vu31);U31=buf[0];
+    _mm_storeu_ps(buf,Vu12);U12=buf[0];
+    _mm_storeu_ps(buf,Vu22);U22=buf[0];
+    _mm_storeu_ps(buf,Vu32);U32=buf[0];
+    _mm_storeu_ps(buf,Vu13);U13=buf[0];
+    _mm_storeu_ps(buf,Vu23);U23=buf[0];
+    _mm_storeu_ps(buf,Vu33);U33=buf[0];
+    std::cout<<"Vector U ="<<std::endl;
+    std::cout<<std::setw(12)<<U11<<"  "<<std::setw(12)<<U12<<"  "<<std::setw(12)<<U13<<std::endl;
+    std::cout<<std::setw(12)<<U21<<"  "<<std::setw(12)<<U22<<"  "<<std::setw(12)<<U23<<std::endl;
+    std::cout<<std::setw(12)<<U31<<"  "<<std::setw(12)<<U32<<"  "<<std::setw(12)<<U33<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Vu11);U11=buf[0];
+    _mm256_storeu_ps(buf,Vu21);U21=buf[0];
+    _mm256_storeu_ps(buf,Vu31);U31=buf[0];
+    _mm256_storeu_ps(buf,Vu12);U12=buf[0];
+    _mm256_storeu_ps(buf,Vu22);U22=buf[0];
+    _mm256_storeu_ps(buf,Vu32);U32=buf[0];
+    _mm256_storeu_ps(buf,Vu13);U13=buf[0];
+    _mm256_storeu_ps(buf,Vu23);U23=buf[0];
+    _mm256_storeu_ps(buf,Vu33);U33=buf[0];
+    std::cout<<"Vector U ="<<std::endl;
+    std::cout<<std::setw(12)<<U11<<"  "<<std::setw(12)<<U12<<"  "<<std::setw(12)<<U13<<std::endl;
+    std::cout<<std::setw(12)<<U21<<"  "<<std::setw(12)<<U22<<"  "<<std::setw(12)<<U23<<std::endl;
+    std::cout<<std::setw(12)<<U31<<"  "<<std::setw(12)<<U32<<"  "<<std::setw(12)<<U33<<std::endl;
+#endif
+#endif
+#endif
+
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar A (after multiplying with U-transpose and V) ="<<std::endl;
+    std::cout<<std::setw(12)<<Sa11.f<<"  "<<std::setw(12)<<Sa12.f<<"  "<<std::setw(12)<<Sa13.f<<std::endl;
+    std::cout<<std::setw(12)<<Sa21.f<<"  "<<std::setw(12)<<Sa22.f<<"  "<<std::setw(12)<<Sa23.f<<std::endl;
+    std::cout<<std::setw(12)<<Sa31.f<<"  "<<std::setw(12)<<Sa32.f<<"  "<<std::setw(12)<<Sa33.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Va11);A11=buf[0];
+    _mm_storeu_ps(buf,Va21);A21=buf[0];
+    _mm_storeu_ps(buf,Va31);A31=buf[0];
+    _mm_storeu_ps(buf,Va12);A12=buf[0];
+    _mm_storeu_ps(buf,Va22);A22=buf[0];
+    _mm_storeu_ps(buf,Va32);A32=buf[0];
+    _mm_storeu_ps(buf,Va13);A13=buf[0];
+    _mm_storeu_ps(buf,Va23);A23=buf[0];
+    _mm_storeu_ps(buf,Va33);A33=buf[0];
+    std::cout<<"Vector A (after multiplying with U-transpose and V) ="<<std::endl;
+    std::cout<<std::setw(12)<<A11<<"  "<<std::setw(12)<<A12<<"  "<<std::setw(12)<<A13<<std::endl;
+    std::cout<<std::setw(12)<<A21<<"  "<<std::setw(12)<<A22<<"  "<<std::setw(12)<<A23<<std::endl;
+    std::cout<<std::setw(12)<<A31<<"  "<<std::setw(12)<<A32<<"  "<<std::setw(12)<<A33<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Va11);A11=buf[0];
+    _mm256_storeu_ps(buf,Va21);A21=buf[0];
+    _mm256_storeu_ps(buf,Va31);A31=buf[0];
+    _mm256_storeu_ps(buf,Va12);A12=buf[0];
+    _mm256_storeu_ps(buf,Va22);A22=buf[0];
+    _mm256_storeu_ps(buf,Va32);A32=buf[0];
+    _mm256_storeu_ps(buf,Va13);A13=buf[0];
+    _mm256_storeu_ps(buf,Va23);A23=buf[0];
+    _mm256_storeu_ps(buf,Va33);A33=buf[0];
+    std::cout<<"Vector A (after multiplying with U-transpose and V) ="<<std::endl;
+    std::cout<<std::setw(12)<<A11<<"  "<<std::setw(12)<<A12<<"  "<<std::setw(12)<<A13<<std::endl;
+    std::cout<<std::setw(12)<<A21<<"  "<<std::setw(12)<<A22<<"  "<<std::setw(12)<<A23<<std::endl;
+    std::cout<<std::setw(12)<<A31<<"  "<<std::setw(12)<<A32<<"  "<<std::setw(12)<<A33<<std::endl;
+#endif
+#endif
+
+#ifdef COMPUTE_U_AS_QUATERNION
+#ifdef PRINT_DEBUGGING_OUTPUT
+#ifdef USE_SCALAR_IMPLEMENTATION
+    std::cout<<"Scalar qU ="<<std::endl;
+    std::cout<<std::setw(12)<<Squs.f<<"  "<<std::setw(12)<<Squvx.f<<"  "<<std::setw(12)<<Squvy.f<<"  "<<std::setw(12)<<Squvz.f<<std::endl;
+#endif
+#ifdef USE_SSE_IMPLEMENTATION
+    _mm_storeu_ps(buf,Vqus);QUS=buf[0];
+    _mm_storeu_ps(buf,Vquvx);QUVX=buf[0];
+    _mm_storeu_ps(buf,Vquvy);QUVY=buf[0];
+    _mm_storeu_ps(buf,Vquvz);QUVZ=buf[0];
+    std::cout<<"Vector qU ="<<std::endl;
+    std::cout<<std::setw(12)<<QUS<<"  "<<std::setw(12)<<QUVX<<"  "<<std::setw(12)<<QUVY<<"  "<<std::setw(12)<<QUVZ<<std::endl;
+#endif
+#ifdef USE_AVX_IMPLEMENTATION
+    _mm256_storeu_ps(buf,Vqus);QUS=buf[0];
+    _mm256_storeu_ps(buf,Vquvx);QUVX=buf[0];
+    _mm256_storeu_ps(buf,Vquvy);QUVY=buf[0];
+    _mm256_storeu_ps(buf,Vquvz);QUVZ=buf[0];
+    std::cout<<"Vector qU ="<<std::endl;
+    std::cout<<std::setw(12)<<QUS<<"  "<<std::setw(12)<<QUVX<<"  "<<std::setw(12)<<QUVY<<"  "<<std::setw(12)<<QUVZ<<std::endl;
+#endif
+#endif
+#endif
+
+#ifdef __INTEL_COMPILER
+#pragma warning( default : 592 )
+#endif

+ 0 - 1
include/igl/Singular_Value_Decomposition_Main_Kernel_Body.hpp.REMOVED.git-id

@@ -1 +0,0 @@
-e8898a8aa8e920ab7c2cd0bfaac5acc2e4991daf

+ 3225 - 0
include/igl/exact_geodesic.cpp

@@ -0,0 +1,3225 @@
+// This file is part of libigl, a simple c++ geometry processing library.
+//
+// Copyright (C) 2018 Zhongshi Jiang <jiangzs@nyu.edu>
+//
+// This Source Code Form is subject to the terms of the Mozilla Public License
+// v. 2.0. If a copy of the MPL was not distributed with this file, You can
+// obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "exact_geodesic.h"
+
+//Copyright (C) 2008 Danil Kirsanov, MIT License
+//Code from https://code.google.com/archive/p/geodesic/
+// Compiled into a single file by Zhongshi Jiang
+
+#include <igl/PI.h>
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstddef>
+#include <ctime>
+#include <fstream>
+#include <iostream>
+#include <set>
+#include <vector>
+#include <memory>
+namespace igl{
+namespace geodesic{
+
+//#include "geodesic_constants_and_simple_functions.h"
+
+//double const GEODESIC_INF = std::numeric_limits<double>::max();
+double const GEODESIC_INF = 1e100;
+
+//in order to avoid numerical problems with "infinitely small" intervals,
+//we drop all the intervals smaller than SMALLEST_INTERVAL_RATIO*edge_length
+double const SMALLEST_INTERVAL_RATIO = 1e-6;
+//double const SMALL_EPSILON = 1e-10;
+
+
+inline double cos_from_edges(double const a,			//compute the cosine of the angle given the lengths of the edges
+							 double const b,
+							 double const c)
+{
+	assert(a>1e-50);
+	assert(b>1e-50);
+	assert(c>1e-50);
+
+	double result = (b*b + c*c - a*a)/(2.0*b*c);
+	result = std::max(result, -1.0);
+	return std::min(result, 1.0);
+}
+
+inline double angle_from_edges(double const a,			//compute the cosine of the angle given the lengths of the edges
+							   double const b,
+							   double const c)
+{
+	return acos(cos_from_edges(a,b,c));
+}
+
+template<class Points, class Faces>
+inline bool read_mesh_from_file(char* filename,
+								Points& points,
+								Faces& faces)
+{
+	std::ifstream file(filename);
+	assert(file.is_open());
+	if(!file.is_open()) return false;
+
+	unsigned num_points;
+	file >> num_points;
+	assert(num_points>=3);
+
+	unsigned num_faces;
+	file >> num_faces;
+
+	points.resize(num_points*3);
+	for(typename Points::iterator i=points.begin(); i!=points.end(); ++i)
+	{
+		file >> *i;
+	}
+
+	faces.resize(num_faces*3);
+	for(typename Faces::iterator i=faces.begin(); i!=faces.end(); ++i)
+	{
+		file >> *i;
+	}
+	file.close();
+
+	return true;
+}
+
+// #include "geodesic_memory"
+template<class T>			//quickly allocates multiple elements of a given type; no deallocation
+class SimlpeMemoryAllocator
+{
+public:
+	typedef T* pointer;
+
+	SimlpeMemoryAllocator(unsigned block_size = 0,
+						  unsigned max_number_of_blocks = 0)
+	{
+		reset(block_size,
+			  max_number_of_blocks);
+	};
+
+	~SimlpeMemoryAllocator(){};
+
+	void reset(unsigned block_size,
+			   unsigned max_number_of_blocks)
+	{
+		m_block_size = block_size;
+		m_max_number_of_blocks = max_number_of_blocks;
+
+
+		m_current_position = 0;
+
+		m_storage.reserve(max_number_of_blocks);
+		m_storage.resize(1);
+		m_storage[0].resize(block_size);
+	};
+
+	pointer allocate(unsigned const n)		//allocate n units
+	{
+		assert(n < m_block_size);
+
+		if(m_current_position + n >= m_block_size)
+		{
+			m_storage.push_back( std::vector<T>() );
+			m_storage.back().resize(m_block_size);
+			m_current_position = 0;
+		}
+		pointer result = & m_storage.back()[m_current_position];
+		m_current_position += n;
+
+		return result;
+	};
+private:
+	std::vector<std::vector<T> > m_storage;
+	unsigned m_block_size;				//size of a single block
+	unsigned m_max_number_of_blocks;		//maximum allowed number of blocks
+	unsigned m_current_position;			//first unused element inside the current block
+};
+
+
+template<class T>		//quickly allocates and deallocates single elements of a given type
+class MemoryAllocator
+{
+public:
+	typedef T* pointer;
+
+	MemoryAllocator(unsigned block_size = 1024,
+				    unsigned max_number_of_blocks = 1024)
+	{
+		reset(block_size,
+			  max_number_of_blocks);
+	};
+
+	~MemoryAllocator(){};
+
+	void clear()
+	{
+		reset(m_block_size,
+			  m_max_number_of_blocks);
+	}
+
+	void reset(unsigned block_size,
+			   unsigned max_number_of_blocks)
+	{
+		m_block_size = block_size;
+		m_max_number_of_blocks = max_number_of_blocks;
+
+		assert(m_block_size > 0);
+		assert(m_max_number_of_blocks > 0);
+
+		m_current_position = 0;
+
+		m_storage.reserve(max_number_of_blocks);
+		m_storage.resize(1);
+		m_storage[0].resize(block_size);
+
+		m_deleted.clear();
+		m_deleted.reserve(2*block_size);
+	};
+
+	pointer allocate()		//allocates single unit of memory
+	{
+		pointer result;
+		if(m_deleted.empty())
+		{
+			if(m_current_position + 1 >= m_block_size)
+			{
+				m_storage.push_back( std::vector<T>() );
+				m_storage.back().resize(m_block_size);
+				m_current_position = 0;
+			}
+			result = & m_storage.back()[m_current_position];
+			++m_current_position;
+		}
+		else
+		{
+			result = m_deleted.back();
+			m_deleted.pop_back();
+		}
+
+		return result;
+	};
+
+	void deallocate(pointer p)		//allocate n units
+	{
+		if(m_deleted.size() < m_deleted.capacity())
+		{
+			m_deleted.push_back(p);
+		}
+	};
+
+private:
+	std::vector<std::vector<T> > m_storage;
+	unsigned m_block_size;				//size of a single block
+	unsigned m_max_number_of_blocks;		//maximum allowed number of blocks
+	unsigned m_current_position;			//first unused element inside the current block
+
+	std::vector<pointer> m_deleted;			//pointers to deleted elemets
+};
+
+
+class OutputBuffer
+{
+public:
+	OutputBuffer():
+		m_num_bytes(0)
+	{}
+
+	void clear()
+	{
+		m_num_bytes = 0;
+		m_buffer = std::shared_ptr<double>();
+	}
+
+	template<class T>
+	T* allocate(unsigned n)
+	{
+		double wanted = n*sizeof(T);
+		if(wanted > m_num_bytes)
+		{
+			unsigned new_size = (unsigned) ceil(wanted / (double)sizeof(double));
+			m_buffer = std::shared_ptr<double>(new double[new_size]);
+			m_num_bytes = new_size*sizeof(double);
+		}
+
+		return (T*)m_buffer.get();
+	}
+
+	template <class T>
+	T* get()
+	{
+		return (T*)m_buffer.get();
+	}
+
+	template<class T>
+	unsigned capacity()
+	{
+		return (unsigned)floor((double)m_num_bytes/(double)sizeof(T));
+	};
+
+private:
+
+	std::shared_ptr<double> m_buffer;
+	unsigned m_num_bytes;
+};
+
+
+
+
+class Vertex;
+class Edge;
+class Face;
+class Mesh;
+class MeshElementBase;
+
+typedef Vertex* vertex_pointer;
+typedef Edge* edge_pointer;
+typedef Face* face_pointer;
+typedef Mesh* mesh_pointer;
+typedef MeshElementBase* base_pointer;
+
+template <class Data>		//simple vector that stores info about mesh references
+class SimpleVector			//for efficiency, it uses an outside memory allocator
+{
+public:
+	SimpleVector():
+	  m_size(0),
+	  m_begin(NULL)
+	{};
+
+	typedef Data* iterator;
+
+	unsigned size(){return m_size;};
+	iterator begin(){return m_begin;};
+	iterator end(){return m_begin + m_size;};
+
+	template<class DataPointer>
+	void set_allocation(DataPointer begin, unsigned size)
+	{
+		assert(begin != NULL || size == 0);
+		m_size = size;
+		m_begin = (iterator)begin;
+	}
+
+	Data& operator[](unsigned i)
+	{
+		assert(i < m_size);
+		return *(m_begin + i);
+	}
+
+	void clear()
+	{
+		m_size = 0;
+		m_begin = NULL;
+	}
+
+private:
+	unsigned m_size;
+	Data* m_begin;
+};
+
+enum PointType
+{
+    VERTEX,
+    EDGE,
+    FACE,
+	UNDEFINED_POINT
+};
+
+class MeshElementBase	//prototype of vertices, edges and faces
+{
+public:
+	typedef SimpleVector<vertex_pointer> vertex_pointer_vector;
+	typedef SimpleVector<edge_pointer> edge_pointer_vector;
+	typedef SimpleVector<face_pointer> face_pointer_vector;
+
+	MeshElementBase():
+		m_id(0),
+		m_type(UNDEFINED_POINT)
+	{};
+
+	vertex_pointer_vector& adjacent_vertices(){return m_adjacent_vertices;};
+	edge_pointer_vector& adjacent_edges(){return m_adjacent_edges;};
+	face_pointer_vector& adjacent_faces(){return m_adjacent_faces;};
+
+	unsigned& id(){return m_id;};
+	PointType type(){return m_type;};
+
+protected:
+	vertex_pointer_vector m_adjacent_vertices;		//list of the adjacent vertices
+	edge_pointer_vector m_adjacent_edges;			//list of the adjacent edges
+	face_pointer_vector m_adjacent_faces;			//list of the adjacent faces
+
+	unsigned m_id;							//unique id
+	PointType m_type;							//vertex, edge or face
+};
+
+class Point3D			//point in 3D and corresponding operations
+{
+public:
+	Point3D(){};
+	Point3D(Point3D* p)
+	{
+		x() = p->x();
+		y() = p->y();
+		z() = p->z();
+	};
+
+	double* xyz(){return m_coordinates;};
+	double& x(){return *m_coordinates;};
+	double& y(){return *(m_coordinates+1);};
+	double& z(){return *(m_coordinates+2);};
+
+	void set(double new_x, double new_y, double new_z)
+	{
+		x() = new_x;
+		y() = new_y;
+		z() = new_z;
+	}
+
+	void set(double* data)
+	{
+		x() = *data;
+		y() = *(data+1);
+		z() = *(data+2);
+	}
+
+	double distance(double* v)
+	{
+		double dx = m_coordinates[0] - v[0];
+		double dy = m_coordinates[1] - v[1];
+		double dz = m_coordinates[2] - v[2];
+
+		return sqrt(dx*dx + dy*dy + dz*dz);
+	};
+
+    double distance(Point3D* v)
+	{
+		return distance(v->xyz());
+	};
+
+	void add(Point3D* v)
+	{
+		x() += v->x();
+		y() += v->y();
+		z() += v->z();
+	};
+
+	void multiply(double v)
+	{
+		x() *= v;
+		y() *= v;
+		z() *= v;
+	};
+
+private:
+	double m_coordinates[3];					//xyz
+};
+
+class Vertex: public MeshElementBase, public Point3D
+{
+public:
+	Vertex()
+	{
+		m_type = VERTEX;
+	};
+
+	~Vertex(){};
+
+	bool& saddle_or_boundary(){return m_saddle_or_boundary;};
+private:
+									//this flag speeds up exact geodesic algorithm
+	bool m_saddle_or_boundary;		//it is true if total adjacent angle is larger than 2*PI or this vertex belongs to the mesh boundary
+};
+
+
+class Face: public MeshElementBase
+{
+public:
+	Face()
+	{
+		m_type = FACE;
+	};
+
+	~Face(){};
+
+	edge_pointer opposite_edge(vertex_pointer v);
+	vertex_pointer opposite_vertex(edge_pointer e);
+	edge_pointer next_edge(edge_pointer e, vertex_pointer v);
+
+	double vertex_angle(vertex_pointer v)
+	{
+		for(unsigned i=0; i<3; ++i)
+		{
+			if(adjacent_vertices()[i]->id() == v->id())
+			{
+				return m_corner_angles[i];
+			}
+		}
+		assert(0);
+		return 0;
+	}
+
+	double* corner_angles(){return m_corner_angles;};
+
+private:
+	double m_corner_angles[3];		//triangle angles in radians; angles correspond to vertices in m_adjacent_vertices
+};
+
+class Edge: public MeshElementBase
+{
+public:
+	Edge()
+	{
+		m_type = EDGE;
+	};
+
+	~Edge(){};
+
+	double& length(){return m_length;};
+
+	face_pointer opposite_face(face_pointer f)
+	{
+		if(adjacent_faces().size() == 1)
+		{
+			assert(adjacent_faces()[0]->id() == f->id());
+			return NULL;
+		}
+
+		assert(adjacent_faces()[0]->id() == f->id() ||
+			   adjacent_faces()[1]->id() == f->id());
+
+		return adjacent_faces()[0]->id() == f->id() ?
+			   adjacent_faces()[1] : adjacent_faces()[0];
+	};
+
+	vertex_pointer opposite_vertex(vertex_pointer v)
+	{
+		assert(belongs(v));
+
+		return adjacent_vertices()[0]->id() == v->id() ?
+			   adjacent_vertices()[1] : adjacent_vertices()[0];
+	};
+
+	bool belongs(vertex_pointer v)
+	{
+		return adjacent_vertices()[0]->id() == v->id() ||
+			   adjacent_vertices()[1]->id() == v->id();
+	}
+
+	bool is_boundary(){return adjacent_faces().size() == 1;};
+
+	vertex_pointer v0(){return adjacent_vertices()[0];};
+	vertex_pointer v1(){return adjacent_vertices()[1];};
+
+	void local_coordinates(Point3D* point,
+						   double& x,
+						   double& y)
+	{
+		double d0 = point->distance(v0());
+		if(d0 < 1e-50)
+		{
+			x = 0.0;
+			y = 0.0;
+			return;
+		}
+
+		double d1 = point->distance(v1());
+		if(d1 < 1e-50)
+		{
+			x = m_length;
+			y = 0.0;
+			return;
+		}
+
+		x = m_length/2.0 + (d0*d0 - d1*d1)/(2.0*m_length);
+		y = sqrt(std::max(0.0, d0*d0 - x*x));
+		return;
+	}
+
+private:
+	double m_length;							//length of the edge
+};
+
+class SurfacePoint:public Point3D  //point on the surface of the mesh
+{
+public:
+	SurfacePoint():
+		m_p(NULL)
+	{};
+
+	SurfacePoint(vertex_pointer v):		//set the surface point in the vertex
+		SurfacePoint::Point3D(v),
+		m_p(v)
+	{};
+
+	SurfacePoint(face_pointer f):		//set the surface point in the center of the face
+		m_p(f)
+	{
+		set(0,0,0);
+		add(f->adjacent_vertices()[0]);
+		add(f->adjacent_vertices()[1]);
+		add(f->adjacent_vertices()[2]);
+		multiply(1./3.);
+	};
+
+	SurfacePoint(edge_pointer e,		//set the surface point in the middle of the edge
+				 double a = 0.5):
+		m_p(e)
+	{
+		double b = 1 - a;
+
+		vertex_pointer v0 = e->adjacent_vertices()[0];
+		vertex_pointer v1 = e->adjacent_vertices()[1];
+
+		x() = b*v0->x() + a*v1->x();
+		y() = b*v0->y() + a*v1->y();
+		z() = b*v0->z() + a*v1->z();
+	};
+
+	SurfacePoint(base_pointer g,
+				 double x,
+				 double y,
+				 double z,
+				 PointType t = UNDEFINED_POINT):
+		m_p(g)
+	{
+		set(x,y,z);
+	};
+
+	void initialize(SurfacePoint const& p)
+	{
+		*this = p;
+	}
+
+	~SurfacePoint(){};
+
+	PointType type(){return m_p ? m_p->type() : UNDEFINED_POINT;};
+	base_pointer& base_element(){return m_p;};
+protected:
+	base_pointer m_p;			//could be face, vertex or edge pointer
+};
+
+inline edge_pointer Face::opposite_edge(vertex_pointer v)
+{
+	for(unsigned i=0; i<3; ++i)
+	{
+		edge_pointer e = adjacent_edges()[i];
+		if(!e->belongs(v))
+		{
+			return e;
+		}
+	}
+	assert(0);
+	return NULL;
+}
+
+inline vertex_pointer Face::opposite_vertex(edge_pointer e)
+{
+	for(unsigned i=0; i<3; ++i)
+	{
+		vertex_pointer v = adjacent_vertices()[i];
+		if(!e->belongs(v))
+		{
+			return v;
+		}
+	}
+	assert(0);
+	return NULL;
+}
+
+inline edge_pointer Face::next_edge(edge_pointer e, vertex_pointer v)
+{
+	assert(e->belongs(v));
+
+	for(unsigned i=0; i<3; ++i)
+	{
+		edge_pointer next = adjacent_edges()[i];
+		if(e->id() != next->id() && next->belongs(v))
+		{
+			return next;
+		}
+	}
+	assert(0);
+	return NULL;
+}
+
+struct HalfEdge			//prototype of the edge; used for mesh construction
+{
+	unsigned face_id;
+	unsigned vertex_0;		//adjacent vertices sorted by id value
+	unsigned vertex_1;		//they are sorted, vertex_0 < vertex_1
+};
+
+inline bool operator < (const HalfEdge &x, const HalfEdge &y)
+{
+	if(x.vertex_0 == y.vertex_0)
+	{
+	    return x.vertex_1 < y.vertex_1;
+	}
+	else
+	{
+		return x.vertex_0 < y.vertex_0;
+	}
+}
+
+inline bool operator != (const HalfEdge &x, const HalfEdge &y)
+{
+	return x.vertex_0 != y.vertex_0 || x.vertex_1 != y.vertex_1;
+}
+
+inline bool operator == (const HalfEdge &x, const HalfEdge &y)
+{
+	return x.vertex_0 == y.vertex_0 && x.vertex_1 == y.vertex_1;
+}
+
+struct edge_visible_from_source
+{
+	unsigned source;
+	edge_pointer edge;
+};
+
+class Mesh
+{
+public:
+	Mesh()
+	{};
+
+	~Mesh(){};
+
+	template<class Points, class Faces>
+	void initialize_mesh_data(unsigned num_vertices,
+							  Points& p,
+							  unsigned num_faces,
+							  Faces& tri);		//build mesh from regular point-triangle representation
+
+	template<class Points, class Faces>
+	void initialize_mesh_data(Points& p, Faces& tri);		//build mesh from regular point-triangle representation
+
+	std::vector<Vertex>& vertices(){return m_vertices;};
+	std::vector<Edge>& edges(){return m_edges;};
+	std::vector<Face>& faces(){return m_faces;};
+
+	unsigned closest_vertices(SurfacePoint* p,
+								 std::vector<vertex_pointer>* storage = NULL);		//list vertices closest to the point
+
+private:
+
+	void build_adjacencies();		//build internal structure of the mesh
+	bool verify();					//verifies connectivity of the mesh and prints some debug info
+
+	typedef void* void_pointer;
+	void_pointer allocate_pointers(unsigned n)
+	{
+		return m_pointer_allocator.allocate(n);
+	}
+
+	std::vector<Vertex> m_vertices;
+	std::vector<Edge> m_edges;
+	std::vector<Face> m_faces;
+
+	SimlpeMemoryAllocator<void_pointer> m_pointer_allocator;	//fast memory allocating for Face/Vertex/Edge cross-references
+};
+
+inline unsigned Mesh::closest_vertices(SurfacePoint* p,
+										  std::vector<vertex_pointer>* storage)
+{
+	assert(p->type() != UNDEFINED_POINT);
+
+	if(p->type() == VERTEX)
+	{
+		if(storage)
+		{
+			storage->push_back(static_cast<vertex_pointer>(p->base_element()));
+		}
+		return 1;
+	}
+	else if(p->type() == FACE)
+	{
+		if(storage)
+		{
+			vertex_pointer* vp= p->base_element()->adjacent_vertices().begin();
+			storage->push_back(*vp);
+			storage->push_back(*(vp+1));
+			storage->push_back(*(vp+2));
+		}
+		return 2;
+	}
+	else if(p->type() == EDGE)		//for edge include all 4 adjacent vertices
+	{
+		edge_pointer edge = static_cast<edge_pointer>(p->base_element());
+
+		if(storage)
+		{
+			storage->push_back(edge->adjacent_vertices()[0]);
+			storage->push_back(edge->adjacent_vertices()[1]);
+
+			for(unsigned i = 0; i < edge->adjacent_faces().size(); ++i)
+			{
+				face_pointer face = edge->adjacent_faces()[i];
+				storage->push_back(face->opposite_vertex(edge));
+			}
+		}
+		return 2 + edge->adjacent_faces().size();
+	}
+
+	assert(0);
+	return 0;
+}
+
+template<class Points, class Faces>
+void Mesh::initialize_mesh_data(Points& p, Faces& tri)		//build mesh from regular point-triangle representation
+{
+	assert(p.size() % 3 == 0);
+	unsigned const num_vertices = p.size() / 3;
+	assert(tri.size() % 3 == 0);
+	unsigned const num_faces = tri.size() / 3;
+
+	initialize_mesh_data(num_vertices, p, num_faces, tri);
+}
+
+template<class Points, class Faces>
+void Mesh::initialize_mesh_data(unsigned num_vertices,
+								Points& p,
+								unsigned num_faces,
+								Faces& tri)
+{
+	unsigned const approximate_number_of_internal_pointers = (num_vertices + num_faces)*4;
+	unsigned const max_number_of_pointer_blocks = 100;
+	m_pointer_allocator.reset(approximate_number_of_internal_pointers,
+							  max_number_of_pointer_blocks);
+
+	m_vertices.resize(num_vertices);
+	for(unsigned i=0; i<num_vertices; ++i)		//copy coordinates to vertices
+	{
+		Vertex& v = m_vertices[i];
+		v.id() = i;
+
+		unsigned shift = 3*i;
+		v.x() = p[shift];
+		v.y() = p[shift + 1];
+		v.z() = p[shift + 2];
+	}
+
+	m_faces.resize(num_faces);
+	for(unsigned i=0; i<num_faces; ++i)		//copy adjacent vertices to polygons/faces
+	{
+		Face& f = m_faces[i];
+		f.id() = i;
+		f.adjacent_vertices().set_allocation(allocate_pointers(3),3);	//allocate three units of memory
+
+		unsigned shift = 3*i;
+		for(unsigned j=0; j<3; ++j)
+		{
+			unsigned vertex_index = tri[shift + j];
+			assert(vertex_index < num_vertices);
+			f.adjacent_vertices()[j] = &m_vertices[vertex_index];
+		}
+	}
+
+	build_adjacencies();	//build the structure of the mesh
+}
+
+inline void Mesh::build_adjacencies()
+{
+	//		Vertex->adjacent Faces
+	std::vector<unsigned> count(m_vertices.size());	//count adjacent vertices
+	for(unsigned i=0; i<m_faces.size(); ++i)
+	{
+		Face& f = m_faces[i];
+		for(unsigned j=0; j<3; ++j)
+		{
+			unsigned vertex_id = f.adjacent_vertices()[j]->id();
+			assert(vertex_id < m_vertices.size());
+			count[vertex_id]++;
+		}
+	}
+
+	for(unsigned i=0; i<m_vertices.size(); ++i)		//reserve space
+	{
+		Vertex& v = m_vertices[i];
+		unsigned num_adjacent_faces = count[i];
+
+		v.adjacent_faces().set_allocation(allocate_pointers(num_adjacent_faces),		//allocate three units of memory
+										  num_adjacent_faces);
+	}
+
+	std::fill(count.begin(), count.end(), 0);
+	for(unsigned i=0; i<m_faces.size(); ++i)
+	{
+		Face& f = m_faces[i];
+		for(unsigned j=0; j<3; ++j)
+		{
+			vertex_pointer v = f.adjacent_vertices()[j];
+			v->adjacent_faces()[count[v->id()]++] = &f;
+		}
+	}
+
+	//find all edges
+	//i.e. find all half-edges, sort and combine them into edges
+	std::vector<HalfEdge> half_edges(m_faces.size()*3);
+	unsigned k = 0;
+	for(unsigned i=0; i<m_faces.size(); ++i)
+	{
+		Face& f = m_faces[i];
+		for(unsigned j=0; j<3; ++j)
+		{
+			half_edges[k].face_id = i;
+			unsigned vertex_id_1 = f.adjacent_vertices()[j]->id();
+			unsigned vertex_id_2 = f.adjacent_vertices()[(j+1) % 3]->id();
+			half_edges[k].vertex_0 = std::min(vertex_id_1, vertex_id_2);
+			half_edges[k].vertex_1 = std::max(vertex_id_1, vertex_id_2);
+
+			k++;
+		}
+	}
+	std::sort(half_edges.begin(), half_edges.end());
+
+	unsigned number_of_edges = 1;
+	for(unsigned i=1; i<half_edges.size(); ++i)
+	{
+		if(half_edges[i] != half_edges[i-1])
+		{
+			++number_of_edges;
+		}
+		else
+		{
+			if(i<half_edges.size()-1)		//sanity check: there should be at most two equal half-edges
+			{								//if it fails, most likely the input data are messed up
+				assert(half_edges[i] != half_edges[i+1]);
+			}
+		}
+	}
+
+	//		Edges->adjacent Vertices and Faces
+	m_edges.resize(number_of_edges);
+	unsigned edge_id = 0;
+	for(unsigned i=0; i<half_edges.size();)
+	{
+		Edge& e = m_edges[edge_id];
+		e.id() = edge_id++;
+
+		e.adjacent_vertices().set_allocation(allocate_pointers(2),2);		//allocate two units of memory
+
+		e.adjacent_vertices()[0] = &m_vertices[half_edges[i].vertex_0];
+		e.adjacent_vertices()[1] = &m_vertices[half_edges[i].vertex_1];
+
+		e.length() = e.adjacent_vertices()[0]->distance(e.adjacent_vertices()[1]);
+		assert(e.length() > 1e-100);		//algorithm works well with non-degenerate meshes only
+
+		if(i != half_edges.size()-1 && half_edges[i] == half_edges[i+1])	//double edge
+		{
+			e.adjacent_faces().set_allocation(allocate_pointers(2),2);
+			e.adjacent_faces()[0] = &m_faces[half_edges[i].face_id];
+			e.adjacent_faces()[1] = &m_faces[half_edges[i+1].face_id];
+			i += 2;
+		}
+		else			//single edge
+		{
+			e.adjacent_faces().set_allocation(allocate_pointers(1),1);		//one adjucent faces
+			e.adjacent_faces()[0] = &m_faces[half_edges[i].face_id];
+			i += 1;
+		}
+	}
+
+	//			Vertices->adjacent Edges
+	std::fill(count.begin(), count.end(), 0);
+	for(unsigned i=0; i<m_edges.size(); ++i)
+	{
+		Edge& e = m_edges[i];
+		assert(e.adjacent_vertices().size()==2);
+		count[e.adjacent_vertices()[0]->id()]++;
+		count[e.adjacent_vertices()[1]->id()]++;
+	}
+	for(unsigned i=0; i<m_vertices.size(); ++i)
+	{
+		m_vertices[i].adjacent_edges().set_allocation(allocate_pointers(count[i]),
+													  count[i]);
+	}
+	std::fill(count.begin(), count.end(), 0);
+	for(unsigned i=0; i<m_edges.size(); ++i)
+	{
+		Edge& e = m_edges[i];
+		for(unsigned j=0; j<2; ++j)
+		{
+			vertex_pointer v = e.adjacent_vertices()[j];
+			v->adjacent_edges()[count[v->id()]++] = &e;
+		}
+	}
+
+	//			Faces->adjacent Edges
+	for(unsigned i=0; i<m_faces.size(); ++i)
+	{
+		m_faces[i].adjacent_edges().set_allocation(allocate_pointers(3),3);
+	}
+
+	count.resize(m_faces.size());
+	std::fill(count.begin(), count.end(), 0);
+	for(unsigned i=0; i<m_edges.size(); ++i)
+	{
+		Edge& e = m_edges[i];
+		for(unsigned j=0; j<e.adjacent_faces().size(); ++j)
+		{
+			face_pointer f = e.adjacent_faces()[j];
+			assert(count[f->id()]<3);
+			f->adjacent_edges()[count[f->id()]++] = &e;
+		}
+	}
+
+		//compute angles for the faces
+	for(unsigned i=0; i<m_faces.size(); ++i)
+	{
+		Face& f = m_faces[i];
+		double abc[3];
+		double sum = 0;
+		for(unsigned j=0; j<3; ++j)		//compute angle adjacent to the vertex j
+		{
+			for(unsigned k=0; k<3; ++k)
+			{
+				vertex_pointer v = f.adjacent_vertices()[(j + k)%3];
+				abc[k] = f.opposite_edge(v)->length();
+			}
+
+			double angle = angle_from_edges(abc[0], abc[1], abc[2]);
+			assert(angle>1e-5);						//algorithm works well with non-degenerate meshes only
+
+			f.corner_angles()[j] = angle;
+			sum += angle;
+		}
+		assert(std::abs(sum - igl::PI) < 1e-5);		//algorithm works well with non-degenerate meshes only
+	}
+
+		//define m_turn_around_flag for vertices
+	std::vector<double> total_vertex_angle(m_vertices.size());
+	for(unsigned i=0; i<m_faces.size(); ++i)
+	{
+		Face& f = m_faces[i];
+		for(unsigned j=0; j<3; ++j)
+		{
+			vertex_pointer v = f.adjacent_vertices()[j];
+			total_vertex_angle[v->id()] += f.corner_angles()[j];
+		}
+	}
+
+	for(unsigned i=0; i<m_vertices.size(); ++i)
+	{
+		Vertex& v = m_vertices[i];
+		v.saddle_or_boundary() = (total_vertex_angle[v.id()] > 2.0*igl::PI - 1e-5);
+	}
+
+	for(unsigned i=0; i<m_edges.size(); ++i)
+	{
+		Edge& e = m_edges[i];
+		if(e.is_boundary())
+		{
+			e.adjacent_vertices()[0]->saddle_or_boundary() = true;
+			e.adjacent_vertices()[1]->saddle_or_boundary() = true;
+		}
+	}
+
+	assert(verify());
+}
+
+inline bool Mesh::verify()		//verifies connectivity of the mesh and prints some debug info
+{
+	std::cout << std::endl;
+	// make sure that all vertices are mentioned at least once.
+	// though the loose vertex is not a bug, it most likely indicates that something is wrong with the mesh
+	std::vector<bool> map(m_vertices.size(), false);
+	for(unsigned i=0; i<m_edges.size(); ++i)
+	{
+		edge_pointer e = &m_edges[i];
+		map[e->adjacent_vertices()[0]->id()] = true;
+		map[e->adjacent_vertices()[1]->id()] = true;
+	}
+	assert(std::find(map.begin(), map.end(), false) == map.end());
+
+	//make sure that the mesh is connected trough its edges
+	//if mesh has more than one connected component, it is most likely a bug
+	std::vector<face_pointer> stack(1,&m_faces[0]);
+	stack.reserve(m_faces.size());
+
+	map.resize(m_faces.size());
+	std::fill(map.begin(), map.end(), false);
+	map[0] = true;
+
+	while(!stack.empty())
+	{
+		face_pointer f = stack.back();
+		stack.pop_back();
+
+		for(unsigned i=0; i<3; ++i)
+		{
+			edge_pointer e = f->adjacent_edges()[i];
+			face_pointer f_adjacent = e->opposite_face(f);
+			if(f_adjacent && !map[f_adjacent->id()])
+			{
+				map[f_adjacent->id()] = true;
+				stack.push_back(f_adjacent);
+			}
+		}
+	}
+	assert(std::find(map.begin(), map.end(), false) == map.end());
+
+	//print some mesh statistics that can be useful in debugging
+	// std::cout << "mesh has "	<< m_vertices.size()
+	// 		  << " vertices, "	<< m_faces.size()
+	// 		  << " faces, "		<< m_edges.size()
+	// 		  << " edges\n";
+
+	unsigned total_boundary_edges = 0;
+	double longest_edge = 0;
+	double shortest_edge = 1e100;
+	for(unsigned i=0; i<m_edges.size(); ++i)
+	{
+		Edge& e = m_edges[i];
+		total_boundary_edges += e.is_boundary() ? 1 : 0;
+		longest_edge = std::max(longest_edge, e.length());
+		shortest_edge = std::min(shortest_edge, e.length());
+	}
+	// std::cout << total_boundary_edges << " edges are boundary edges\n";
+	// std::cout << "shortest/longest edges are "
+	// 		  << shortest_edge << "/"
+	// 		  << longest_edge << " = "
+	// 		  << shortest_edge/longest_edge
+	// 		  << std::endl;
+
+	double minx = 1e100;
+	double maxx = -1e100;
+	double miny = 1e100;
+	double maxy = -1e100;
+	double minz = 1e100;
+	double maxz = -1e100;
+	for(unsigned i=0; i<m_vertices.size(); ++i)
+	{
+		Vertex& v = m_vertices[i];
+		minx = std::min(minx, v.x());
+		maxx = std::max(maxx, v.x());
+		miny = std::min(miny, v.y());
+		maxy = std::max(maxy, v.y());
+		minz = std::min(minz, v.z());
+		maxz = std::max(maxz, v.z());
+	}
+	// std::cout << "enclosing XYZ box:"
+	// 		  <<" X[" << minx << "," << maxx << "]"
+	// 		  <<" Y[" << miny << "," << maxy << "]"
+	// 		  <<" Z[" << minz << "," << maxz << "]"
+	// 		  << std::endl;
+
+	double dx = maxx - minx;
+	double dy = maxy - miny;
+	double dz = maxz - minz;
+	// std::cout << "approximate diameter of the mesh is "
+	// 		  << sqrt(dx*dx + dy*dy + dz*dz)
+	// 		  << std::endl;
+
+	double min_angle = 1e100;
+	double max_angle = -1e100;
+	for(unsigned i=0; i<m_faces.size(); ++i)
+	{
+		Face& f = m_faces[i];
+		for(unsigned j=0; j<3; ++j)
+		{
+			double angle = f.corner_angles()[j];
+			min_angle = std::min(min_angle, angle);
+			max_angle = std::max(max_angle, angle);
+		}
+	}
+	// std::cout << "min/max face angles are "
+	// 		  << min_angle/igl::PI*180.0 << "/"
+	// 		  << max_angle/igl::PI*180.0
+	// 		  << " degrees\n";
+
+	// std::cout << std::endl;
+	return true;
+}
+
+inline void fill_surface_point_structure(geodesic::SurfacePoint* point,
+										 double* data,
+										 Mesh* mesh)
+{
+	point->set(data);
+	unsigned type = (unsigned) data[3];
+	unsigned id = (unsigned) data[4];
+
+
+	if(type == 0)		//vertex
+	{
+		point->base_element() = &mesh->vertices()[id];
+	}
+	else if(type == 1)	//edge
+	{
+		point->base_element() = &mesh->edges()[id];
+	}
+	else				//face
+	{
+		point->base_element() = &mesh->faces()[id];
+	}
+}
+
+inline void fill_surface_point_double(geodesic::SurfacePoint* point,
+									  double* data,
+									  long mesh_id)
+{
+	data[0] = point->x();
+	data[1] = point->y();
+	data[2] = point->z();
+	data[4] = point->base_element()->id();
+
+	if(point->type() == VERTEX)		//vertex
+	{
+		data[3] = 0;
+	}
+	else if(point->type() == EDGE)	//edge
+	{
+		data[3] = 1;
+	}
+	else				//face
+	{
+		data[3] = 2;
+	}
+}
+
+class Interval;
+class IntervalList;
+typedef Interval* interval_pointer;
+typedef IntervalList* list_pointer;
+
+class Interval						//interval of the edge
+{
+public:
+
+	Interval(){};
+	~Interval(){};
+
+	enum DirectionType
+    {
+        FROM_FACE_0,
+		FROM_FACE_1,
+		FROM_SOURCE,
+		UNDEFINED_DIRECTION
+    };
+
+	double signal(double x)		//geodesic distance function at point x
+	{
+		assert(x>=0.0 && x <= m_edge->length());
+
+		if(m_d == GEODESIC_INF)
+		{
+			return GEODESIC_INF;
+		}
+		else
+		{
+			double dx = x - m_pseudo_x;
+			if(m_pseudo_y == 0.0)
+			{
+				return m_d + std::abs(dx);
+			}
+			else
+			{
+				return m_d + sqrt(dx*dx + m_pseudo_y*m_pseudo_y);
+			}
+		}
+	}
+
+	double max_distance(double end)
+	{
+		if(m_d == GEODESIC_INF)
+		{
+			return GEODESIC_INF;
+		}
+		else
+		{
+			double a = std::abs(m_start - m_pseudo_x);
+			double b = std::abs(end - m_pseudo_x);
+
+			return a > b ? m_d + sqrt(a*a + m_pseudo_y*m_pseudo_y):
+						   m_d + sqrt(b*b + m_pseudo_y*m_pseudo_y);
+		}
+	}
+
+	void compute_min_distance(double stop)			//compute min, given c,d theta, start, end.
+	{
+		assert(stop > m_start);
+
+		if(m_d == GEODESIC_INF)
+		{
+			m_min = GEODESIC_INF;
+		}
+		else if(m_start > m_pseudo_x)
+		{
+			m_min = signal(m_start);
+		}
+		else if(stop < m_pseudo_x)
+		{
+			m_min = signal(stop);
+		}
+		else
+		{
+			assert(m_pseudo_y<=0);
+			m_min = m_d - m_pseudo_y;
+		}
+	}
+			//compare two intervals in the queue
+	bool operator()(interval_pointer const x, interval_pointer const y) const
+	{
+		if(x->min() != y->min())
+		{
+			return x->min() < y->min();
+		}
+		else if(x->start() != y->start())
+		{
+			return x->start() < y->start();
+		}
+		else
+		{
+			return x->edge()->id() < y->edge()->id();
+		}
+	}
+
+	double stop()		//return the endpoint of the interval
+	{
+		return m_next ? m_next->start() : m_edge->length();
+	}
+
+	double hypotenuse(double a, double b)
+	{
+		return sqrt(a*a + b*b);
+	}
+
+	void find_closest_point(double const x,
+						    double const y,
+						    double& offset,
+						    double& distance);			//find the point on the interval that is closest to the point (alpha, s)
+
+	double& start(){return m_start;};
+	double& d(){return m_d;};
+	double& pseudo_x(){return m_pseudo_x;};
+	double& pseudo_y(){return m_pseudo_y;};
+	double& min(){return m_min;};
+	interval_pointer& next(){return m_next;};
+	edge_pointer& edge(){return m_edge;};
+	DirectionType& direction(){return m_direction;};
+	bool visible_from_source(){return m_direction == FROM_SOURCE;};
+	unsigned& source_index(){return m_source_index;};
+
+	void initialize(edge_pointer edge,
+					SurfacePoint* point = NULL,
+					unsigned source_index = 0);
+
+protected:
+	double m_start;						//initial point of the interval on the edge
+	double m_d;							//distance from the source to the pseudo-source
+	double m_pseudo_x;					//coordinates of the pseudo-source in the local coordinate system
+	double m_pseudo_y;					//y-coordinate should be always negative
+	double m_min;						//minimum distance on the interval
+
+	interval_pointer m_next;			//pointer to the next interval in the list
+	edge_pointer m_edge;				//edge that the interval belongs to
+	unsigned m_source_index;			//the source it belongs to
+	DirectionType m_direction;			//where the interval is coming from
+};
+
+struct IntervalWithStop : public Interval
+{
+public:
+	double& stop(){return m_stop;};
+protected:
+	double m_stop;
+};
+
+class IntervalList						//list of the of intervals of the given edge
+{
+public:
+	IntervalList(){m_first = NULL;};
+	~IntervalList(){};
+
+	void clear()
+	{
+		m_first = NULL;
+	};
+
+	void initialize(edge_pointer e)
+	{
+		m_edge = e;
+		m_first = NULL;
+	};
+
+	interval_pointer covering_interval(double offset)			//returns the interval that covers the offset
+	{
+		assert(offset >= 0.0 && offset <= m_edge->length());
+
+		interval_pointer p = m_first;
+		while(p && p->stop() < offset)
+		{
+			p = p->next();
+		}
+
+		return p;// && p->start() <= offset ? p : NULL;
+	};
+
+	void find_closest_point(SurfacePoint* point,
+							double& offset,
+							double& distance,
+							interval_pointer& interval)
+	{
+		interval_pointer p = m_first;
+		distance = GEODESIC_INF;
+		interval = NULL;
+
+		double x,y;
+		m_edge->local_coordinates(point, x, y);
+
+		while(p)
+		{
+			if(p->min()<GEODESIC_INF)
+			{
+				double o, d;
+				p->find_closest_point(x, y, o, d);
+				if(d < distance)
+				{
+					distance = d;
+					offset = o;
+					interval = p;
+				}
+			}
+			p = p->next();
+		}
+	};
+
+	unsigned number_of_intervals()
+	{
+		interval_pointer p = m_first;
+		unsigned count = 0;
+		while(p)
+		{
+			++count;
+			p = p->next();
+		}
+		return count;
+	}
+
+	interval_pointer last()
+	{
+		interval_pointer p = m_first;
+		if(p)
+		{
+			while(p->next())
+			{
+				p = p->next();
+			}
+		}
+		return p;
+	}
+
+	double signal(double x)
+	{
+		interval_pointer interval = covering_interval(x);
+
+		return interval ? interval->signal(x) : GEODESIC_INF;
+	}
+
+	interval_pointer& first(){return m_first;};
+	edge_pointer& edge(){return m_edge;};
+private:
+	interval_pointer m_first;			//pointer to the first member of the list
+	edge_pointer m_edge;				//edge that owns this list
+};
+
+class SurfacePointWithIndex : public SurfacePoint
+{
+public:
+	unsigned index(){return m_index;};
+
+	void initialize(SurfacePoint& p, unsigned index)
+	{
+		SurfacePoint::initialize(p);
+		m_index = index;
+	}
+
+	bool operator()(SurfacePointWithIndex* x, SurfacePointWithIndex* y) const //used for sorting
+	{
+		assert(x->type() != UNDEFINED_POINT && y->type() !=UNDEFINED_POINT);
+
+		if(x->type() != y->type())
+		{
+			return x->type() < y->type();
+		}
+		else
+		{
+			return x->base_element()->id() < y->base_element()->id();
+		}
+	}
+
+private:
+	unsigned m_index;
+};
+
+class SortedSources : public std::vector<SurfacePointWithIndex>
+{
+private:
+	typedef std::vector<SurfacePointWithIndex*> sorted_vector_type;
+public:
+	typedef sorted_vector_type::iterator sorted_iterator;
+	typedef std::pair<sorted_iterator, sorted_iterator> sorted_iterator_pair;
+
+	sorted_iterator_pair sources(base_pointer mesh_element)
+	{
+		m_search_dummy.base_element() = mesh_element;
+
+		return equal_range(m_sorted.begin(),
+						   m_sorted.end(),
+						   &m_search_dummy,
+						   m_compare_less);
+	}
+
+	void initialize(std::vector<SurfacePoint>& sources)	//we initialize the sources by copie
+	{
+		resize(sources.size());
+		m_sorted.resize(sources.size());
+		for(unsigned i=0; i<sources.size(); ++i)
+		{
+			SurfacePointWithIndex& p = *(begin() + i);
+
+			p.initialize(sources[i],i);
+			m_sorted[i] = &p;
+		}
+
+		std::sort(m_sorted.begin(), m_sorted.end(), m_compare_less);
+	};
+
+	SurfacePointWithIndex& operator[](unsigned i)
+	{
+		assert(i < size());
+		return *(begin() + i);
+	}
+
+private:
+	sorted_vector_type m_sorted;
+	SurfacePointWithIndex m_search_dummy;		//used as a search template
+	SurfacePointWithIndex m_compare_less;			//used as a compare functor
+};
+
+
+inline void Interval::find_closest_point(double const rs,
+										 double const hs,
+										 double& r,
+										 double& d_out)			//find the point on the interval that is closest to the point (alpha, s)
+	{
+		if(m_d == GEODESIC_INF)
+		{
+			r = GEODESIC_INF;
+			d_out = GEODESIC_INF;
+			return;
+		}
+
+		double hc = -m_pseudo_y;
+		double rc = m_pseudo_x;
+		double end = stop();
+
+		double local_epsilon = SMALLEST_INTERVAL_RATIO*m_edge->length();
+		if(std::abs(hs+hc) < local_epsilon)
+		{
+			if(rs<=m_start)
+			{
+				r = m_start;
+				d_out = signal(m_start) + std::abs(rs - m_start);
+			}
+			else if(rs>=end)
+			{
+				r = end;
+				d_out = signal(end) + fabs(end - rs);
+			}
+			else
+			{
+				r = rs;
+				d_out = signal(rs);
+			}
+		}
+		else
+		{
+			double ri = (rs*hc + hs*rc)/(hs+hc);
+
+			if(ri<m_start)
+			{
+				r = m_start;
+				d_out = signal(m_start) + hypotenuse(m_start - rs, hs);
+			}
+			else if(ri>end)
+			{
+				r = end;
+				d_out = signal(end) + hypotenuse(end - rs, hs);
+			}
+			else
+			{
+				r = ri;
+				d_out = m_d + hypotenuse(rc - rs, hc + hs);
+			}
+		}
+	}
+
+
+inline void Interval::initialize(edge_pointer edge,
+								 SurfacePoint* source,
+								 unsigned source_index)
+{
+	m_next = NULL;
+	//m_geodesic_previous = NULL;
+	m_direction = UNDEFINED_DIRECTION;
+	m_edge = edge;
+	m_source_index = source_index;
+
+	m_start = 0.0;
+	//m_stop = edge->length();
+	if(!source)
+	{
+		m_d = GEODESIC_INF;
+		m_min = GEODESIC_INF;
+		return;
+	}
+	m_d = 0;
+
+	if(source->base_element()->type() == VERTEX)
+	{
+		if(source->base_element()->id() == edge->v0()->id())
+		{
+			m_pseudo_x = 0.0;
+			m_pseudo_y = 0.0;
+			m_min = 0.0;
+			return;
+		}
+		else if(source->base_element()->id() == edge->v1()->id())
+		{
+			m_pseudo_x = stop();
+			m_pseudo_y = 0.0;
+			m_min = 0.0;
+			return;
+		}
+	}
+
+	edge->local_coordinates(source, m_pseudo_x, m_pseudo_y);
+	m_pseudo_y = -m_pseudo_y;
+
+	compute_min_distance(stop());
+}
+
+
+
+// #include "geodesic_algorithm_base.h"
+class GeodesicAlgorithmBase
+{
+public:
+    enum AlgorithmType
+    {
+        EXACT,
+		DIJKSTRA,
+        SUBDIVISION,
+		UNDEFINED_ALGORITHM
+    };
+
+	GeodesicAlgorithmBase(geodesic::Mesh* mesh):
+		m_type(UNDEFINED_ALGORITHM),
+		m_max_propagation_distance(1e100),
+		m_mesh(mesh)
+	{};
+
+	virtual ~GeodesicAlgorithmBase(){};
+
+	virtual void propagate(std::vector<SurfacePoint>& sources,
+   						   double max_propagation_distance = GEODESIC_INF,			//propagation algorithm stops after reaching the certain distance from the source
+						   std::vector<SurfacePoint>* stop_points = NULL) = 0; //or after ensuring that all the stop_points are covered
+
+	virtual void trace_back(SurfacePoint& destination,		//trace back piecewise-linear path
+							std::vector<SurfacePoint>& path) = 0;
+
+	void geodesic(SurfacePoint& source,
+						  SurfacePoint& destination,
+						  std::vector<SurfacePoint>& path); //lazy people can find geodesic path with one function call
+
+	void geodesic(std::vector<SurfacePoint>& sources,
+						  std::vector<SurfacePoint>& destinations,
+						  std::vector<std::vector<SurfacePoint> >& paths); //lazy people can find geodesic paths with one function call
+
+	virtual unsigned best_source(SurfacePoint& point,			//after propagation step is done, quickly find what source this point belongs to and what is the distance to this source
+								 double& best_source_distance) = 0;
+
+	virtual void print_statistics()		//print info about timing and memory usage in the propagation step of the algorithm
+	{
+		std::cout << "propagation step took " << m_time_consumed << " seconds " << std::endl;
+	};
+
+	AlgorithmType type(){return m_type;};
+
+	virtual std::string name();
+
+	geodesic::Mesh* mesh(){return m_mesh;};
+protected:
+
+	void set_stop_conditions(std::vector<SurfacePoint>* stop_points,
+						     double stop_distance);
+	double stop_distance()
+	{
+		return m_max_propagation_distance;
+	}
+
+	AlgorithmType m_type;					   // type of the algorithm
+
+	typedef std::pair<vertex_pointer, double> stop_vertex_with_distace_type;
+	std::vector<stop_vertex_with_distace_type> m_stop_vertices; // algorithm stops propagation after covering certain vertices
+	double m_max_propagation_distance;			 // or reaching the certain distance
+
+	geodesic::Mesh* m_mesh;
+
+	double m_time_consumed;		//how much time does the propagation step takes
+	double m_propagation_distance_stopped;		//at what distance (if any) the propagation algorithm stopped
+};
+
+inline double length(std::vector<SurfacePoint>& path)
+{
+	double length = 0;
+	if(!path.empty())
+	{
+		for(unsigned i=0; i<path.size()-1; ++i)
+		{
+			length += path[i].distance(&path[i+1]);
+		}
+	}
+	return length;
+}
+
+inline void print_info_about_path(std::vector<SurfacePoint>& path)
+{
+	std::cout << "number of the points in the path = " << path.size()
+			  << ", length of the path = " << length(path)
+			  << std::endl;
+}
+
+inline std::string GeodesicAlgorithmBase::name()
+{
+	switch(m_type)
+	{
+	case EXACT:
+		return "exact";
+	case DIJKSTRA:
+		return "dijkstra";
+	case SUBDIVISION:
+		return "subdivision";
+	default:
+	case UNDEFINED_ALGORITHM:
+		return "undefined";
+	}
+}
+
+inline void GeodesicAlgorithmBase::geodesic(SurfacePoint& source,
+											SurfacePoint& destination,
+											std::vector<SurfacePoint>& path) //lazy people can find geodesic path with one function call
+{
+	std::vector<SurfacePoint> sources(1, source);
+	std::vector<SurfacePoint> stop_points(1, destination);
+	double const max_propagation_distance = GEODESIC_INF;
+
+	propagate(sources,
+			  max_propagation_distance,
+			  &stop_points);
+
+	trace_back(destination, path);
+}
+
+inline void GeodesicAlgorithmBase::geodesic(std::vector<SurfacePoint>& sources,
+											std::vector<SurfacePoint>& destinations,
+											std::vector<std::vector<SurfacePoint> >& paths) //lazy people can find geodesic paths with one function call
+{
+	double const max_propagation_distance = GEODESIC_INF;
+
+	propagate(sources,
+			  max_propagation_distance,
+			  &destinations);		//we use desinations as stop points
+
+	paths.resize(destinations.size());
+
+	for(unsigned i=0; i<paths.size(); ++i)
+	{
+		trace_back(destinations[i], paths[i]);
+	}
+}
+
+inline void GeodesicAlgorithmBase::set_stop_conditions(std::vector<SurfacePoint>* stop_points,
+														double stop_distance)
+{
+	m_max_propagation_distance = stop_distance;
+
+	if(!stop_points)
+	{
+		m_stop_vertices.clear();
+		return;
+	}
+
+	m_stop_vertices.resize(stop_points->size());
+
+	std::vector<vertex_pointer> possible_vertices;
+	for(unsigned i = 0; i < stop_points->size(); ++i)
+	{
+		SurfacePoint* point = &(*stop_points)[i];
+
+		possible_vertices.clear();
+		m_mesh->closest_vertices(point, &possible_vertices);
+
+		vertex_pointer closest_vertex = NULL;
+		double min_distance = 1e100;
+		for(unsigned j = 0; j < possible_vertices.size(); ++j)
+		{
+			double distance = point->distance(possible_vertices[j]);
+			if(distance < min_distance)
+			{
+				min_distance = distance;
+				closest_vertex = possible_vertices[j];
+			}
+		}
+		assert(closest_vertex);
+
+		m_stop_vertices[i].first = closest_vertex;
+		m_stop_vertices[i].second = min_distance;
+	}
+}
+
+
+
+class GeodesicAlgorithmExact : public GeodesicAlgorithmBase
+{
+public:
+	GeodesicAlgorithmExact(geodesic::Mesh* mesh):
+	  	GeodesicAlgorithmBase(mesh),
+		m_memory_allocator(mesh->edges().size(), mesh->edges().size()),
+		m_edge_interval_lists(mesh->edges().size())
+	{
+		m_type = EXACT;
+
+		for(unsigned i=0; i<m_edge_interval_lists.size(); ++i)
+		{
+			m_edge_interval_lists[i].initialize(&mesh->edges()[i]);
+		}
+	};
+
+	~GeodesicAlgorithmExact(){};
+
+	void propagate(std::vector<SurfacePoint>& sources,
+   				   double max_propagation_distance = GEODESIC_INF,			//propagation algorithm stops after reaching the certain distance from the source
+				   std::vector<SurfacePoint>* stop_points = NULL); //or after ensuring that all the stop_points are covered
+
+	void trace_back(SurfacePoint& destination,		//trace back piecewise-linear path
+					std::vector<SurfacePoint>& path);
+
+	unsigned best_source(SurfacePoint& point,			//quickly find what source this point belongs to and what is the distance to this source
+		double& best_source_distance);
+
+	void print_statistics();
+
+private:
+	typedef std::set<interval_pointer, Interval> IntervalQueue;
+
+	void update_list_and_queue(list_pointer list,
+							   IntervalWithStop* candidates,	//up to two candidates
+							   unsigned num_candidates);
+
+	unsigned compute_propagated_parameters(double pseudo_x,
+											double pseudo_y,
+											double d,		//parameters of the interval
+											double start,
+											double end,		//start/end of the interval
+											double alpha,	//corner angle
+											double L,		//length of the new edge
+											bool first_interval,		//if it is the first interval on the edge
+											bool last_interval,
+											bool turn_left,
+											bool turn_right,
+											IntervalWithStop* candidates);		//if it is the last interval on the edge
+
+	void construct_propagated_intervals(bool invert,
+									  edge_pointer edge,
+									  face_pointer face,		//constructs iNew from the rest of the data
+									  IntervalWithStop* candidates,
+									  unsigned& num_candidates,
+									  interval_pointer source_interval);
+
+	double compute_positive_intersection(double start,
+										 double pseudo_x,
+										 double pseudo_y,
+										 double sin_alpha,
+										 double cos_alpha);		//used in construct_propagated_intervals
+
+	unsigned intersect_intervals(interval_pointer zero,
+								    IntervalWithStop* one);			//intersecting two intervals with up to three intervals in the end
+
+	interval_pointer best_first_interval(SurfacePoint& point,
+										double& best_total_distance,
+										double& best_interval_position,
+										unsigned& best_source_index);
+
+	bool check_stop_conditions(unsigned& index);
+
+	void clear()
+	{
+		m_memory_allocator.clear();
+		m_queue.clear();
+		for(unsigned i=0; i<m_edge_interval_lists.size(); ++i)
+		{
+			m_edge_interval_lists[i].clear();
+		}
+		m_propagation_distance_stopped = GEODESIC_INF;
+	};
+
+	list_pointer interval_list(edge_pointer e)
+	{
+		return &m_edge_interval_lists[e->id()];
+	};
+
+	void set_sources(std::vector<SurfacePoint>& sources)
+	{
+		m_sources.initialize(sources);
+	}
+
+	void initialize_propagation_data();
+
+	void list_edges_visible_from_source(MeshElementBase* p,
+										std::vector<edge_pointer>& storage); //used in initialization
+
+	long visible_from_source(SurfacePoint& point);	//used in backtracing
+
+	void best_point_on_the_edge_set(SurfacePoint& point,
+									std::vector<edge_pointer> const& storage,
+									interval_pointer& best_interval,
+									double& best_total_distance,
+									double& best_interval_position);
+
+	void possible_traceback_edges(SurfacePoint& point,
+								  std::vector<edge_pointer>& storage);
+
+	bool erase_from_queue(interval_pointer p);
+
+	IntervalQueue m_queue;	//interval queue
+
+	MemoryAllocator<Interval> m_memory_allocator;			//quickly allocate and deallocate intervals
+	std::vector<IntervalList> m_edge_interval_lists;		//every edge has its interval data
+
+	enum MapType {OLD, NEW};		//used for interval intersection
+	MapType map[5];
+	double start[6];
+	interval_pointer i_new[5];
+
+	unsigned m_queue_max_size;			//used for statistics
+	unsigned m_iterations;			//used for statistics
+
+	SortedSources m_sources;
+};
+
+inline void GeodesicAlgorithmExact::best_point_on_the_edge_set(SurfacePoint& point,
+															   std::vector<edge_pointer> const& storage,
+															   interval_pointer& best_interval,
+															   double& best_total_distance,
+															   double& best_interval_position)
+{
+	best_total_distance = 1e100;
+	for(unsigned i=0; i<storage.size(); ++i)
+	{
+		edge_pointer e = storage[i];
+		list_pointer list = interval_list(e);
+
+		double offset;
+		double distance;
+		interval_pointer interval;
+
+		list->find_closest_point(&point,
+								 offset,
+								 distance,
+								 interval);
+
+		if(distance < best_total_distance)
+		{
+			best_interval = interval;
+			best_total_distance = distance;
+			best_interval_position = offset;
+		}
+	}
+}
+
+inline void GeodesicAlgorithmExact::possible_traceback_edges(SurfacePoint& point,
+															 std::vector<edge_pointer>& storage)
+{
+	storage.clear();
+
+	if(point.type() == VERTEX)
+	{
+		vertex_pointer v = static_cast<vertex_pointer>(point.base_element());
+		for(unsigned i=0; i<v->adjacent_faces().size(); ++i)
+		{
+			face_pointer f = v->adjacent_faces()[i];
+			storage.push_back(f->opposite_edge(v));
+		}
+	}
+	else if(point.type() == EDGE)
+	{
+		edge_pointer e = static_cast<edge_pointer>(point.base_element());
+		for(unsigned i=0; i<e->adjacent_faces().size(); ++i)
+		{
+			face_pointer f = e->adjacent_faces()[i];
+
+			storage.push_back(f->next_edge(e,e->v0()));
+			storage.push_back(f->next_edge(e,e->v1()));
+		}
+	}
+	else
+	{
+		face_pointer f = static_cast<face_pointer>(point.base_element());
+		storage.push_back(f->adjacent_edges()[0]);
+		storage.push_back(f->adjacent_edges()[1]);
+		storage.push_back(f->adjacent_edges()[2]);
+	}
+}
+
+
+inline long GeodesicAlgorithmExact::visible_from_source(SurfacePoint& point)	//negative if not visible
+{
+	assert(point.type() != UNDEFINED_POINT);
+
+	if(point.type() == EDGE)
+	{
+		edge_pointer e = static_cast<edge_pointer>(point.base_element());
+		list_pointer list = interval_list(e);
+		double position = std::min(point.distance(e->v0()), e->length());
+		interval_pointer interval = list->covering_interval(position);
+		//assert(interval);
+		if(interval && interval->visible_from_source())
+		{
+			return (long)interval->source_index();
+		}
+		else
+		{
+			return -1;
+		}
+	}
+	else if(point.type() == FACE)
+	{
+		return -1;
+	}
+	else if(point.type() == VERTEX)
+	{
+		vertex_pointer v = static_cast<vertex_pointer>(point.base_element());
+		for(unsigned i=0; i<v->adjacent_edges().size(); ++i)
+		{
+			edge_pointer e = v->adjacent_edges()[i];
+			list_pointer list = interval_list(e);
+
+			double position = e->v0()->id() == v->id() ? 0.0 : e->length();
+			interval_pointer interval = list->covering_interval(position);
+			if(interval && interval->visible_from_source())
+			{
+				return (long)interval->source_index();
+			}
+		}
+
+		return -1;
+	}
+
+	assert(0);
+	return 0;
+}
+
+inline double GeodesicAlgorithmExact::compute_positive_intersection(double start,
+																	double pseudo_x,
+																	double pseudo_y,
+																	double sin_alpha,
+																	double cos_alpha)
+{
+	assert(pseudo_y < 0);
+
+	double denominator = sin_alpha*(pseudo_x - start) - cos_alpha*pseudo_y;
+	if(denominator<0.0)
+	{
+		return -1.0;
+	}
+
+	double numerator = -pseudo_y*start;
+
+	if(numerator < 1e-30)
+	{
+		return 0.0;
+	}
+
+	if(denominator < 1e-30)
+	{
+		return -1.0;
+	}
+
+	return numerator/denominator;
+}
+
+inline void GeodesicAlgorithmExact::list_edges_visible_from_source(MeshElementBase* p,
+																   std::vector<edge_pointer>& storage)
+{
+	assert(p->type() != UNDEFINED_POINT);
+
+	if(p->type() == FACE)
+	{
+		face_pointer f = static_cast<face_pointer>(p);
+		for(unsigned i=0; i<3; ++i)
+		{
+			storage.push_back(f->adjacent_edges()[i]);
+		}
+	}
+	else if(p->type() == EDGE)
+	{
+		edge_pointer e = static_cast<edge_pointer>(p);
+		storage.push_back(e);
+	}
+	else			//VERTEX
+	{
+		vertex_pointer v = static_cast<vertex_pointer>(p);
+		for(unsigned i=0; i<v->adjacent_edges().size(); ++i)
+		{
+			storage.push_back(v->adjacent_edges()[i]);
+		}
+
+	}
+}
+
+inline bool GeodesicAlgorithmExact::erase_from_queue(interval_pointer p)
+{
+	if(p->min() < GEODESIC_INF/10.0)// && p->min >= queue->begin()->first)
+	{
+		assert(m_queue.count(p)<=1);			//the set is unique
+
+		IntervalQueue::iterator it = m_queue.find(p);
+
+		if(it != m_queue.end())
+		{
+			m_queue.erase(it);
+			return true;
+		}
+	}
+
+	return false;
+}
+
+inline unsigned GeodesicAlgorithmExact::intersect_intervals(interval_pointer zero,
+															   IntervalWithStop* one)			//intersecting two intervals with up to three intervals in the end
+{
+	assert(zero->edge()->id() == one->edge()->id());
+	assert(zero->stop() > one->start() && zero->start() < one->stop());
+	assert(one->min() < GEODESIC_INF/10.0);
+
+	double const local_epsilon = SMALLEST_INTERVAL_RATIO*one->edge()->length();
+
+	unsigned N=0;
+	if(zero->min() > GEODESIC_INF/10.0)
+	{
+		start[0] = zero->start();
+		if(zero->start() < one->start() - local_epsilon)
+		{
+			map[0] = OLD;
+			start[1] = one->start();
+			map[1] = NEW;
+			N = 2;
+		}
+		else
+		{
+			map[0] = NEW;
+			N = 1;
+		}
+
+		if(zero->stop() > one->stop() + local_epsilon)
+		{
+			map[N] = OLD;							//"zero" interval
+			start[N++] = one->stop();
+		}
+
+		start[N+1] = zero->stop();
+		return N;
+	}
+
+	double const local_small_epsilon = 1e-8*one->edge()->length();
+
+	double D = zero->d() - one->d();
+	double x0 = zero->pseudo_x();
+	double x1 = one->pseudo_x();
+	double R0 = x0*x0 + zero->pseudo_y()*zero->pseudo_y();
+	double R1 = x1*x1 + one->pseudo_y()*one->pseudo_y();
+
+	double inter[2];									//points of intersection
+	char Ninter=0;										//number of the points of the intersection
+
+	if(std::abs(D)<local_epsilon)					//if d1 == d0, equation is linear
+	{
+		double denom = x1 - x0;
+		if(std::abs(denom)>local_small_epsilon)
+		{
+			inter[0] =  (R1 - R0)/(2.*denom);					//one solution
+			Ninter = 1;
+		}
+	}
+	else
+	{
+		double D2 = D*D;
+		double Q = 0.5*(R1-R0-D2);
+		double X = x0 - x1;
+
+		double A = X*X - D2;
+		double B = Q*X + D2*x0;
+		double C = Q*Q - D2*R0;
+
+		if (std::abs(A)<local_small_epsilon)							//if A == 0, linear equation
+		{
+			if(std::abs(B)>local_small_epsilon)
+			{
+				inter[0] =  -C/B;							//one solution
+				Ninter = 1;
+			}
+		}
+		else
+		{
+			double det = B*B-A*C;
+			if(det>local_small_epsilon*local_small_epsilon)			//two roots
+			{
+				det = sqrt(det);
+				if(A>0.0)								//make sure that the roots are ordered
+				{
+					inter[0] = (-B - det)/A;
+					inter[1] = (-B + det)/A;
+				}
+				else
+				{
+					inter[0] = (-B + det)/A;
+					inter[1] = (-B - det)/A;
+				}
+
+				if(inter[1] - inter[0] > local_small_epsilon)
+				{
+					Ninter = 2;
+				}
+				else
+				{
+					Ninter = 1;
+				}
+			}
+			else if(det>=0.0)					//single root
+			{
+				inter[0] = -B/A;
+				Ninter = 1;
+			}
+		}
+	}
+	//---------------------------find possible intervals---------------------------------------
+	double left = std::max(zero->start(), one->start());		//define left and right boundaries of the intersection of the intervals
+	double right = std::min(zero->stop(), one->stop());
+
+	double good_start[4];										//points of intersection within the (left, right) limits +"left" + "right"
+	good_start[0] = left;
+	char Ngood_start=1;										//number of the points of the intersection
+
+	for(char i=0; i<Ninter; ++i)							//for all points of intersection
+	{
+		double x = inter[i];
+		if(x > left + local_epsilon && x < right - local_epsilon)
+		{
+			good_start[Ngood_start++] = x;
+		}
+	}
+	good_start[Ngood_start++] = right;
+
+	MapType mid_map[3];
+	for(char i=0; i<Ngood_start-1; ++i)
+	{
+		double mid = (good_start[i] + good_start[i+1])*0.5;
+		mid_map[i] = zero->signal(mid) <= one->signal(mid) ? OLD : NEW;
+	}
+
+	//-----------------------------------output----------------------------------
+	N = 0;
+	if(zero->start() < left - local_epsilon)						//additional "zero" interval
+	{
+		if(mid_map[0] == OLD)				//first interval in the map is already the old one
+		{
+			good_start[0] = zero->start();
+		}
+		else
+		{
+			map[N] = OLD;					//"zero" interval
+			start[N++] = zero->start();
+		}
+	}
+
+	for(long i=0;i<Ngood_start-1;++i)							//for all intervals
+	{
+		MapType current_map = mid_map[i];
+		if(N==0 || map[N-1] != current_map)
+		{
+			map[N] = current_map;
+			start[N++] = good_start[i];
+		}
+	}
+
+	if(zero->stop() > one->stop() + local_epsilon)
+	{
+		if(N==0 || map[N-1] == NEW)
+		{
+			map[N] = OLD;							//"zero" interval
+			start[N++] = one->stop();
+		}
+	}
+
+	start[0] = zero->start();		// just to make sure that epsilons do not damage anything
+	//start[N] = zero->stop();
+
+	return N;
+}
+
+inline void GeodesicAlgorithmExact::initialize_propagation_data()
+{
+	clear();
+
+	IntervalWithStop candidate;
+	std::vector<edge_pointer> edges_visible_from_source;
+	for(unsigned i=0; i<m_sources.size(); ++i)		//for all edges adjacent to the starting vertex
+	{
+		SurfacePoint* source = &m_sources[i];
+
+		edges_visible_from_source.clear();
+		list_edges_visible_from_source(source->base_element(),
+									   edges_visible_from_source);
+
+		for(unsigned j=0; j<edges_visible_from_source.size(); ++j)
+		{
+			edge_pointer e = edges_visible_from_source[j];
+			candidate.initialize(e, source, i);
+            candidate.stop() = e->length();
+			candidate.compute_min_distance(candidate.stop());
+			candidate.direction() = Interval::FROM_SOURCE;
+
+			update_list_and_queue(interval_list(e), &candidate, 1);
+		}
+	}
+}
+
+inline void GeodesicAlgorithmExact::propagate(std::vector<SurfacePoint>& sources,
+   									   double max_propagation_distance,			//propagation algorithm stops after reaching the certain distance from the source
+									   std::vector<SurfacePoint>* stop_points)
+{
+	set_stop_conditions(stop_points, max_propagation_distance);
+	set_sources(sources);
+	initialize_propagation_data();
+
+	clock_t start = clock();
+
+	unsigned satisfied_index = 0;
+
+	m_iterations = 0;		//for statistics
+	m_queue_max_size = 0;
+
+	IntervalWithStop candidates[2];
+
+	while(!m_queue.empty())
+	{
+		m_queue_max_size = std::max(static_cast<unsigned int>(m_queue.size()), m_queue_max_size);
+
+		unsigned const check_period = 10;
+    	if(++m_iterations % check_period == 0)		//check if we covered all required vertices
+		{
+			if (check_stop_conditions(satisfied_index))
+			{
+				break;
+			}
+		}
+
+		interval_pointer min_interval = *m_queue.begin();
+		m_queue.erase(m_queue.begin());
+		edge_pointer edge = min_interval->edge();
+		list_pointer list = interval_list(edge);
+
+		assert(min_interval->d() < GEODESIC_INF);
+
+		bool const first_interval = min_interval->start() == 0.0;
+		//bool const last_interval = min_interval->stop() == edge->length();
+		bool const last_interval = min_interval->next() == NULL;
+
+		bool const turn_left = edge->v0()->saddle_or_boundary();
+		bool const turn_right = edge->v1()->saddle_or_boundary();
+
+		for(unsigned i=0; i<edge->adjacent_faces().size(); ++i)		//two possible faces to propagate
+		{
+			if(!edge->is_boundary())		//just in case, always propagate boundary edges
+			{
+				if((i == 0 && min_interval->direction() == Interval::FROM_FACE_0) ||
+					(i == 1 && min_interval->direction() == Interval::FROM_FACE_1))
+				{
+					continue;
+				}
+			}
+
+			face_pointer face = edge->adjacent_faces()[i];			//if we come from 1, go to 2
+			edge_pointer next_edge = face->next_edge(edge,edge->v0());
+
+			unsigned num_propagated = compute_propagated_parameters(min_interval->pseudo_x(),
+																	 min_interval->pseudo_y(),
+																	 min_interval->d(),		//parameters of the interval
+																	 min_interval->start(),
+																	 min_interval->stop(),		//start/end of the interval
+																	 face->vertex_angle(edge->v0()),	//corner angle
+																	 next_edge->length(),		//length of the new edge
+																	 first_interval,		//if it is the first interval on the edge
+																	 last_interval,
+																	 turn_left,
+																	 turn_right,
+																	 candidates);		//if it is the last interval on the edge
+			bool propagate_to_right = true;
+
+			if(num_propagated)
+			{
+				if(candidates[num_propagated-1].stop() != next_edge->length())
+				{
+					propagate_to_right = false;
+				}
+
+				bool const invert = next_edge->v0()->id() != edge->v0()->id(); //if the origins coinside, do not invert intervals
+
+				construct_propagated_intervals(invert,		//do not inverse
+											 next_edge,
+											 face,
+											 candidates,
+											 num_propagated,
+											 min_interval);
+
+				update_list_and_queue(interval_list(next_edge),
+									  candidates,
+									  num_propagated);
+			}
+
+			if(propagate_to_right)
+			{
+									//propogation to the right edge
+				double length = edge->length();
+				next_edge = face->next_edge(edge,edge->v1());
+
+				num_propagated = compute_propagated_parameters(length - min_interval->pseudo_x(),
+															 min_interval->pseudo_y(),
+															 min_interval->d(),		//parameters of the interval
+															 length - min_interval->stop(),
+															 length - min_interval->start(),		//start/end of the interval
+															 face->vertex_angle(edge->v1()),	//corner angle
+															 next_edge->length(),		//length of the new edge
+															 last_interval,		//if it is the first interval on the edge
+															 first_interval,
+															 turn_right,
+															 turn_left,
+															 candidates);		//if it is the last interval on the edge
+
+				if(num_propagated)
+				{
+					bool const invert = next_edge->v0()->id() != edge->v1()->id();		//if the origins coinside, do not invert intervals
+
+					construct_propagated_intervals(invert,		//do not inverse
+												 next_edge,
+												 face,
+												 candidates,
+												 num_propagated,
+												 min_interval);
+
+					update_list_and_queue(interval_list(next_edge),
+									      candidates,
+										  num_propagated);
+				}
+			}
+		}
+	}
+
+	m_propagation_distance_stopped = m_queue.empty() ? GEODESIC_INF : (*m_queue.begin())->min();
+	clock_t stop = clock();
+	m_time_consumed = (static_cast<double>(stop)-static_cast<double>(start))/CLOCKS_PER_SEC;
+
+/*	for(unsigned i=0; i<m_edge_interval_lists.size(); ++i)
+	{
+		list_pointer list = &m_edge_interval_lists[i];
+		interval_pointer p = list->first();
+		assert(p->start() == 0.0);
+		while(p->next())
+		{
+			assert(p->stop() == p->next()->start());
+			assert(p->d() < GEODESIC_INF);
+			p = p->next();
+		}
+	}*/
+}
+
+
+inline bool GeodesicAlgorithmExact::check_stop_conditions(unsigned& index)
+{
+	double queue_distance = (*m_queue.begin())->min();
+	if(queue_distance < stop_distance())
+	{
+		return false;
+	}
+
+	while(index < m_stop_vertices.size())
+	{
+		vertex_pointer v = m_stop_vertices[index].first;
+		edge_pointer edge = v->adjacent_edges()[0];				//take any edge
+
+		double distance = edge->v0()->id() == v->id() ?
+						  interval_list(edge)->signal(0.0) :
+						  interval_list(edge)->signal(edge->length());
+
+		if(queue_distance < distance + m_stop_vertices[index].second)
+		{
+			return false;
+		}
+
+		++index;
+	}
+	return true;
+}
+
+
+inline void GeodesicAlgorithmExact::update_list_and_queue(list_pointer list,
+												IntervalWithStop* candidates,	//up to two candidates
+												unsigned num_candidates)
+{
+	assert(num_candidates <= 2);
+	//assert(list->first() != NULL);
+	edge_pointer edge = list->edge();
+	double const local_epsilon = SMALLEST_INTERVAL_RATIO * edge->length();
+
+	if(list->first() == NULL)
+	{
+		interval_pointer* p = &list->first();
+		IntervalWithStop* first;
+		IntervalWithStop* second;
+
+		if(num_candidates == 1)
+		{
+			first = candidates;
+			second = candidates;
+			first->compute_min_distance(first->stop());
+		}
+		else
+		{
+			if(candidates->start() <= (candidates+1)->start())
+			{
+				first = candidates;
+				second = candidates+1;
+			}
+			else
+			{
+				first = candidates+1;
+				second = candidates;
+			}
+			assert(first->stop() == second->start());
+
+			first->compute_min_distance(first->stop());
+			second->compute_min_distance(second->stop());
+		}
+
+		if(first->start() > 0.0)
+		{
+			*p = m_memory_allocator.allocate();
+			(*p)->initialize(edge);
+			p = &(*p)->next();
+		}
+
+		*p = m_memory_allocator.allocate();
+		memcpy(*p,first,sizeof(Interval));
+		m_queue.insert(*p);
+
+		if(num_candidates == 2)
+		{
+			p = &(*p)->next();
+			*p = m_memory_allocator.allocate();
+			memcpy(*p,second,sizeof(Interval));
+			m_queue.insert(*p);
+		}
+
+		if(second->stop() < edge->length())
+		{
+			p = &(*p)->next();
+			*p = m_memory_allocator.allocate();
+			(*p)->initialize(edge);
+			(*p)->start() = second->stop();
+		}
+		else
+		{
+			(*p)->next() = NULL;
+		}
+		return;
+	}
+
+	bool propagate_flag;
+
+	for(unsigned i=0; i<num_candidates; ++i)				//for all new intervals
+	{
+		IntervalWithStop* q = &candidates[i];
+
+		interval_pointer previous = NULL;
+
+		interval_pointer p = list->first();
+		assert(p->start() == 0.0);
+
+		while(p != NULL && p->stop() - local_epsilon < q->start())
+		{
+			p = p->next();
+		}
+
+		while(p != NULL && p->start() < q->stop() - local_epsilon)			//go through all old intervals
+		{
+			unsigned const N = intersect_intervals(p, q);								//interset two intervals
+
+			if(N == 1)
+			{
+				if(map[0]==OLD)	//if "p" is always better, we do not need to update anything)
+				{
+					if(previous)		//close previous interval and put in into the queue
+					{
+						previous->next() = p;
+						previous->compute_min_distance(p->start());
+						m_queue.insert(previous);
+						previous = NULL;
+					}
+
+					p = p->next();
+
+				}
+				else if(previous)	//extend previous interval to cover everything; remove p
+				{
+					previous->next() = p->next();
+					erase_from_queue(p);
+					m_memory_allocator.deallocate(p);
+
+					p = previous->next();
+				}
+				else				//p becomes "previous"
+				{
+					previous = p;
+					interval_pointer next = p->next();
+					erase_from_queue(p);
+
+					memcpy(previous,q,sizeof(Interval));
+
+					previous->start() = start[0];
+					previous->next() = next;
+
+					p = next;
+				}
+				continue;
+			}
+
+			//update_flag = true;
+
+			Interval swap(*p);							//used for swapping information
+			propagate_flag = erase_from_queue(p);
+
+			for(unsigned j=1; j<N; ++j)				//no memory is needed for the first one
+			{
+				i_new[j] = m_memory_allocator.allocate();	//create new intervals
+			}
+
+			if(map[0]==OLD)	//finish previous, if any
+			{
+				if(previous)
+				{
+					previous->next() = p;
+					previous->compute_min_distance(previous->stop());
+					m_queue.insert(previous);
+					previous = NULL;
+				}
+				i_new[0] = p;
+				p->next() = i_new[1];
+				p->start() = start[0];
+			}
+			else if(previous)	//extend previous interval to cover everything; remove p
+			{
+				i_new[0] = previous;
+				previous->next() = i_new[1];
+				m_memory_allocator.deallocate(p);
+				previous = NULL;
+			}
+			else				//p becomes "previous"
+			{
+				i_new[0] = p;
+				memcpy(p,q,sizeof(Interval));
+
+				p->next() = i_new[1];
+				p->start() = start[0];
+			}
+
+			assert(!previous);
+
+			for(unsigned j=1; j<N; ++j)
+			{
+				interval_pointer current_interval = i_new[j];
+
+				if(map[j] == OLD)
+				{
+					memcpy(current_interval,&swap,sizeof(Interval));
+				}
+				else
+				{
+					memcpy(current_interval,q,sizeof(Interval));
+				}
+
+				if(j == N-1)
+				{
+					current_interval->next() = swap.next();
+				}
+				else
+				{
+					current_interval->next() = i_new[j+1];
+				}
+
+				current_interval->start() = start[j];
+			}
+
+			for(unsigned j=0; j<N; ++j)								//find "min" and add the intervals to the queue
+			{
+				if(j==N-1 && map[j]==NEW)
+				{
+					previous = i_new[j];
+				}
+				else
+				{
+					interval_pointer current_interval = i_new[j];
+
+					current_interval->compute_min_distance(current_interval->stop());					//compute minimal distance
+
+					if(map[j]==NEW || (map[j]==OLD && propagate_flag))
+					{
+						m_queue.insert(current_interval);
+					}
+				}
+			}
+
+			p = swap.next();
+		}
+
+		if(previous)		//close previous interval and put in into the queue
+		{
+			previous->compute_min_distance(previous->stop());
+			m_queue.insert(previous);
+			previous = NULL;
+		}
+	}
+}
+
+inline unsigned GeodesicAlgorithmExact::compute_propagated_parameters(double pseudo_x,
+																		double pseudo_y,
+																		double d,		//parameters of the interval
+																		double begin,
+																		double end,		//start/end of the interval
+																		double alpha,	//corner angle
+																		double L,		//length of the new edge
+																		bool first_interval,		//if it is the first interval on the edge
+																		bool last_interval,
+																		bool turn_left,
+																		bool turn_right,
+																		IntervalWithStop* candidates)		//if it is the last interval on the edge
+{
+	assert(pseudo_y<=0.0);
+	assert(d<GEODESIC_INF/10.0);
+	assert(begin<=end);
+	assert(first_interval ? (begin == 0.0) : true);
+
+	IntervalWithStop* p = candidates;
+
+	if(std::abs(pseudo_y) <= 1e-30)				//pseudo-source is on the edge
+	{
+		if(first_interval && pseudo_x <= 0.0)
+		{
+			p->start() = 0.0;
+			p->stop() = L;
+			p->d() = d - pseudo_x;
+			p->pseudo_x() = 0.0;
+			p->pseudo_y() = 0.0;
+			return 1;
+		}
+		else if(last_interval && pseudo_x >= end)
+		{
+			p->start() = 0.0;
+			p->stop() = L;
+			p->d() = d + pseudo_x-end;
+			p->pseudo_x() = end*cos(alpha);
+			p->pseudo_y() = -end*sin(alpha);
+			return 1;
+		}
+		else if(pseudo_x >= begin && pseudo_x <= end)
+		{
+			p->start() = 0.0;
+			p->stop() = L;
+			p->d() = d;
+			p->pseudo_x() = pseudo_x*cos(alpha);
+			p->pseudo_y() = -pseudo_x*sin(alpha);
+			return 1;
+		}
+		else
+		{
+			return 0;
+		}
+	}
+
+	double sin_alpha = sin(alpha);
+	double cos_alpha = cos(alpha);
+
+	//important: for the first_interval, this function returns zero only if the new edge is "visible" from the source
+	//if the new edge can be covered only after turn_over, the value is negative (-1.0)
+	double L1 = compute_positive_intersection(begin,
+											  pseudo_x,
+											  pseudo_y,
+										      sin_alpha,
+										      cos_alpha);
+
+	if(L1 < 0 || L1 >= L)
+	{
+		if(first_interval && turn_left)
+		{
+			p->start() = 0.0;
+			p->stop() = L;
+			p->d() = d + sqrt(pseudo_x*pseudo_x + pseudo_y*pseudo_y);
+			p->pseudo_y() = 0.0;
+			p->pseudo_x() = 0.0;
+			return 1;
+		}
+		else
+		{
+			return 0;
+		}
+	}
+
+	double L2 = compute_positive_intersection(end,
+											  pseudo_x,
+											  pseudo_y,
+											  sin_alpha,
+											  cos_alpha);
+
+	if(L2 < 0 || L2 >= L)
+	{
+		p->start() = L1;
+		p->stop() = L;
+		p->d() = d;
+		p->pseudo_x() = cos_alpha*pseudo_x + sin_alpha*pseudo_y;
+		p->pseudo_y() = -sin_alpha*pseudo_x + cos_alpha*pseudo_y;
+
+		return 1;
+	}
+
+	p->start() = L1;
+	p->stop() = L2;
+	p->d() = d;
+	p->pseudo_x() = cos_alpha*pseudo_x + sin_alpha*pseudo_y;
+	p->pseudo_y() = -sin_alpha*pseudo_x + cos_alpha*pseudo_y;
+	assert(p->pseudo_y() <= 0.0);
+
+	if(!(last_interval && turn_right))
+	{
+		return 1;
+	}
+	else
+	{
+		p = candidates + 1;
+
+		p->start() = L2;
+		p->stop() = L;
+		double dx = pseudo_x - end;
+		p->d() = d + sqrt(dx*dx + pseudo_y*pseudo_y);
+		p->pseudo_x() = end*cos_alpha;
+		p->pseudo_y() = -end*sin_alpha;
+
+		return 2;
+	}
+}
+
+inline void GeodesicAlgorithmExact::construct_propagated_intervals(bool invert,
+																	edge_pointer edge,
+																	face_pointer face,		//constructs iNew from the rest of the data
+																	IntervalWithStop* candidates,
+																	unsigned& num_candidates,
+																	interval_pointer source_interval)	//up to two candidates
+{
+	double edge_length = edge->length();
+	double local_epsilon = SMALLEST_INTERVAL_RATIO * edge_length;
+
+		//kill very small intervals in order to avoid precision problems
+	if(num_candidates == 2)
+	{
+		double start = std::min(candidates->start(), (candidates+1)->start());
+		double stop = std::max(candidates->stop(), (candidates+1)->stop());
+		if(candidates->stop()-candidates->start() < local_epsilon) // kill interval 0
+		{
+			*candidates = *(candidates+1);
+			num_candidates = 1;
+			candidates->start() = start;
+			candidates->stop() = stop;
+		}
+		else if ((candidates+1)->stop() - (candidates+1)->start() < local_epsilon)
+		{
+			num_candidates = 1;
+			candidates->start() = start;
+			candidates->stop() = stop;
+		}
+	}
+
+	IntervalWithStop* first;
+	IntervalWithStop* second;
+	if(num_candidates == 1)
+	{
+		first = candidates;
+		second = candidates;
+	}
+	else
+	{
+		if(candidates->start() <= (candidates+1)->start())
+		{
+			first = candidates;
+			second = candidates+1;
+		}
+		else
+		{
+			first = candidates+1;
+			second = candidates;
+		}
+		assert(first->stop() == second->start());
+	}
+
+	if(first->start() < local_epsilon)
+	{
+		first->start() = 0.0;
+	}
+	if(edge_length - second->stop() < local_epsilon)
+	{
+		second->stop() = edge_length;
+	}
+
+		//invert intervals if necessary; fill missing data and set pointers correctly
+	Interval::DirectionType direction = edge->adjacent_faces()[0]->id() == face->id() ?
+										Interval::FROM_FACE_0 :
+										Interval::FROM_FACE_1;
+
+	if(!invert)					//in this case everything is straighforward, we do not have to invert the intervals
+	{
+		for(unsigned i=0; i<num_candidates; ++i)
+		{
+			IntervalWithStop* p = candidates + i;
+
+			p->next() = (i == num_candidates - 1) ? NULL : candidates + i + 1;
+			p->edge() = edge;
+			p->direction() = direction;
+			p->source_index() = source_interval->source_index();
+
+			p->min() = 0.0;					//it will be changed later on
+
+			assert(p->start() < p->stop());
+		}
+	}
+	else				//now we have to invert the intervals
+	{
+		for(unsigned i=0; i<num_candidates; ++i)
+		{
+			IntervalWithStop* p = candidates + i;
+
+			p->next() = (i == 0) ? NULL : candidates + i - 1;
+			p->edge() = edge;
+			p->direction() = direction;
+			p->source_index() = source_interval->source_index();
+
+			double length = edge_length;
+			p->pseudo_x() = length - p->pseudo_x();
+
+			double start = length - p->stop();
+			p->stop() = length - p->start();
+			p->start() = start;
+
+			p->min() = 0;
+
+			assert(p->start() < p->stop());
+			assert(p->start() >= 0.0);
+			assert(p->stop() <= edge->length());
+		}
+	}
+}
+
+
+inline unsigned GeodesicAlgorithmExact::best_source(SurfacePoint& point,			//quickly find what source this point belongs to and what is the distance to this source
+													   double& best_source_distance)
+{
+	double best_interval_position;
+	unsigned best_source_index;
+
+	best_first_interval(point,
+						best_source_distance,
+						best_interval_position,
+						best_source_index);
+
+	return best_source_index;
+}
+
+inline interval_pointer GeodesicAlgorithmExact::best_first_interval(SurfacePoint& point,
+															 double& best_total_distance,
+															 double& best_interval_position,
+															 unsigned& best_source_index)
+{
+	assert(point.type() != UNDEFINED_POINT);
+
+	interval_pointer best_interval = NULL;
+	best_total_distance = GEODESIC_INF;
+
+	if(point.type() == EDGE)
+	{
+		edge_pointer e = static_cast<edge_pointer>(point.base_element());
+		list_pointer list = interval_list(e);
+
+		best_interval_position = point.distance(e->v0());
+		best_interval = list->covering_interval(best_interval_position);
+		if(best_interval)
+		{
+			//assert(best_interval && best_interval->d() < GEODESIC_INF);
+			best_total_distance = best_interval->signal(best_interval_position);
+			best_source_index = best_interval->source_index();
+		}
+	}
+	else if(point.type() == FACE)
+	{
+		face_pointer f = static_cast<face_pointer>(point.base_element());
+		for(unsigned i=0; i<3; ++i)
+		{
+			edge_pointer e = f->adjacent_edges()[i];
+			list_pointer list = interval_list(e);
+
+			double offset;
+			double distance;
+			interval_pointer interval;
+
+			list->find_closest_point(&point,
+									 offset,
+									 distance,
+									 interval);
+
+			if(interval && distance < best_total_distance)
+			{
+				best_interval = interval;
+				best_total_distance = distance;
+				best_interval_position = offset;
+				best_source_index = interval->source_index();
+			}
+		}
+
+			//check for all sources that might be located inside this face
+		SortedSources::sorted_iterator_pair local_sources = m_sources.sources(f);
+		for(SortedSources::sorted_iterator it=local_sources.first; it != local_sources.second; ++it)
+		{
+			SurfacePointWithIndex* source = *it;
+			double distance = point.distance(source);
+			if(distance < best_total_distance)
+			{
+				best_interval = NULL;
+				best_total_distance = distance;
+				best_interval_position = 0.0;
+				best_source_index = source->index();
+			}
+		}
+	}
+	else if(point.type() == VERTEX)
+	{
+		vertex_pointer v = static_cast<vertex_pointer>(point.base_element());
+		for(unsigned i=0; i<v->adjacent_edges().size(); ++i)
+		{
+			edge_pointer e = v->adjacent_edges()[i];
+			list_pointer list = interval_list(e);
+
+			double position = e->v0()->id() == v->id() ? 0.0 : e->length();
+			interval_pointer interval = list->covering_interval(position);
+			if(interval)
+			{
+				double distance = interval->signal(position);
+
+				if(distance < best_total_distance)
+				{
+					best_interval = interval;
+					best_total_distance = distance;
+					best_interval_position = position;
+					best_source_index = interval->source_index();
+				}
+			}
+		}
+	}
+
+	if(best_total_distance > m_propagation_distance_stopped)		//result is unreliable
+	{
+		best_total_distance = GEODESIC_INF;
+		return NULL;
+	}
+	else
+	{
+		return best_interval;
+	}
+}
+
+inline void GeodesicAlgorithmExact::trace_back(SurfacePoint& destination,		//trace back piecewise-linear path
+										std::vector<SurfacePoint>& path)
+{
+	path.clear();
+	double best_total_distance;
+	double best_interval_position;
+	unsigned source_index = std::numeric_limits<unsigned>::max();
+	interval_pointer best_interval = best_first_interval(destination,
+														 best_total_distance,
+														 best_interval_position,
+														 source_index);
+
+	if(best_total_distance >= GEODESIC_INF/2.0)		//unable to find the right path
+	{
+		return;
+	}
+
+	path.push_back(destination);
+
+	if(best_interval)	//if we did not hit the face source immediately
+	{
+		std::vector<edge_pointer> possible_edges;
+		possible_edges.reserve(10);
+
+		while(visible_from_source(path.back()) < 0)		//while this point is not in the direct visibility of some source (if we are inside the FACE, we obviously hit the source)
+		{
+			SurfacePoint& q = path.back();
+
+			possible_traceback_edges(q, possible_edges);
+
+			interval_pointer interval;
+			double total_distance;
+			double position;
+
+			best_point_on_the_edge_set(q,
+									   possible_edges,
+									   interval,
+									   total_distance,
+									   position);
+
+			//std::cout << total_distance + length(path) << std::endl;
+			assert(total_distance<GEODESIC_INF);
+			source_index = interval->source_index();
+
+			edge_pointer e = interval->edge();
+			double local_epsilon = SMALLEST_INTERVAL_RATIO*e->length();
+			if(position < local_epsilon)
+			{
+				path.push_back(SurfacePoint(e->v0()));
+			}
+			else if(position > e->length()-local_epsilon)
+			{
+				path.push_back(SurfacePoint(e->v1()));
+			}
+			else
+			{
+				double normalized_position = position/e->length();
+				path.push_back(SurfacePoint(e, normalized_position));
+			}
+		}
+	}
+
+	SurfacePoint& source = static_cast<SurfacePoint&>(m_sources[source_index]);
+	if(path.back().distance(&source) > 0)
+	{
+		path.push_back(source);
+	}
+}
+
+inline void GeodesicAlgorithmExact::print_statistics()
+{
+	GeodesicAlgorithmBase::print_statistics();
+
+	unsigned interval_counter = 0;
+	for(unsigned i=0; i<m_edge_interval_lists.size(); ++i)
+	{
+		interval_counter += m_edge_interval_lists[i].number_of_intervals();
+	}
+	double intervals_per_edge = (double)interval_counter/(double)m_edge_interval_lists.size();
+
+	double memory = m_edge_interval_lists.size()*sizeof(IntervalList) +
+					interval_counter*sizeof(Interval);
+
+	std::cout << "uses about " << memory/1e6 << "Mb of memory" <<std::endl;
+	std::cout << interval_counter << " total intervals, or "
+			  << intervals_per_edge << " intervals per edge"
+			  << std::endl;
+	std::cout << "maximum interval queue size is " << m_queue_max_size << std::endl;
+	std::cout << "number of interval propagations is " << m_iterations << std::endl;
+}
+
+}		//geodesic
+}
+
+template <
+  typename DerivedV,
+  typename DerivedF,
+  typename DerivedVS,
+  typename DerivedFS,
+  typename DerivedVT,
+  typename DerivedFT,
+  typename DerivedD>
+IGL_INLINE void igl::exact_geodesic(
+  const Eigen::MatrixBase<DerivedV> &V,
+  const Eigen::MatrixBase<DerivedF> &F,
+  const Eigen::MatrixBase<DerivedVS> &VS,
+  const Eigen::MatrixBase<DerivedFS> &FS,
+  const Eigen::MatrixBase<DerivedVT> &VT,
+  const Eigen::MatrixBase<DerivedFT> &FT,
+  Eigen::PlainObjectBase<DerivedD> &D)
+{
+  assert(V.cols() == 3 && F.cols() == 3 && "Only support 3D triangle mesh");
+  assert(VS.cols() <=1 && FS.cols() <= 1 && VT.cols() <= 1 && FT.cols() <=1 && "Only support one dimensional inputs");
+  std::vector<typename DerivedV::Scalar> points(V.rows() * V.cols());
+  std::vector<typename DerivedF::Scalar> faces(F.rows() * F.cols());
+  for (int i = 0; i < points.size(); i++)
+  {
+    points[i] = V(i / 3, i % 3);
+  }
+  for (int i = 0; i < faces.size(); i++)
+  {
+    faces[i] = F(i / 3, i % 3);
+  }
+
+  igl::geodesic::Mesh mesh;
+  mesh.initialize_mesh_data(points, faces);
+  igl::geodesic::GeodesicAlgorithmExact exact_algorithm(&mesh);
+
+  std::vector<igl::geodesic::SurfacePoint> source(VS.rows() + FS.rows());
+  std::vector<igl::geodesic::SurfacePoint> target(VT.rows() + FT.rows());
+  for (int i = 0; i < VS.rows(); i++)
+  {
+    source[i] = (igl::geodesic::SurfacePoint(&mesh.vertices()[VS(i)]));
+  }
+  for (int i = 0; i < FS.rows(); i++)
+  {
+    source[i] = (igl::geodesic::SurfacePoint(&mesh.faces()[FS(i)]));
+  }
+
+  for (int i = 0; i < VT.rows(); i++)
+  {
+    target[i] = (igl::geodesic::SurfacePoint(&mesh.vertices()[VT(i)]));
+  }
+  for (int i = 0; i < FT.rows(); i++)
+  {
+    target[i] = (igl::geodesic::SurfacePoint(&mesh.faces()[FT(i)]));
+  }
+
+  exact_algorithm.propagate(source);
+  std::vector<igl::geodesic::SurfacePoint> path;
+  D.resize(target.size(), 1);
+  for (int i = 0; i < target.size(); i++)
+  {
+    exact_algorithm.trace_back(target[i], path);
+    D(i) = igl::geodesic::length(path);
+  }
+}
+
+#ifdef IGL_STATIC_LIBRARY
+template void igl::exact_geodesic<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, 1, 0, -1, 1>, Eigen::Matrix<int, -1, 1, 0, -1, 1>, Eigen::Matrix<int, -1, 1, 0, -1, 1>, Eigen::Matrix<int, -1, 1, 0, -1, 1>, Eigen::Matrix<double, -1, 1, 0, -1, 1>>(Eigen::MatrixBase<Eigen::Matrix<double, -1, -1, 0, -1, -1>> const &, Eigen::MatrixBase<Eigen::Matrix<int, -1, -1, 0, -1, -1>> const &, Eigen::MatrixBase<Eigen::Matrix<int, -1, 1, 0, -1, 1>> const &, Eigen::MatrixBase<Eigen::Matrix<int, -1, 1, 0, -1, 1>> const &, Eigen::MatrixBase<Eigen::Matrix<int, -1, 1, 0, -1, 1>> const &, Eigen::MatrixBase<Eigen::Matrix<int, -1, 1, 0, -1, 1>> const &, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, 1, 0, -1, 1>> &);
+template void igl::exact_geodesic<Eigen::Matrix<double, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, Eigen::Matrix<int, -1, -1, 0, -1, -1>, Eigen::Matrix<double, -1, -1, 0, -1, -1> >(Eigen::MatrixBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> > const&, Eigen::MatrixBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> > const&, Eigen::MatrixBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> > const&, Eigen::MatrixBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> > const&, Eigen::MatrixBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> > const&, Eigen::MatrixBase<Eigen::Matrix<int, -1, -1, 0, -1, -1> > const&, Eigen::PlainObjectBase<Eigen::Matrix<double, -1, -1, 0, -1, -1> >&);
+#endif

+ 0 - 1
include/igl/exact_geodesic.cpp.REMOVED.git-id

@@ -1 +0,0 @@
-d962df24e92081e5b1fa2cd25f3123a7b0e03235

+ 3168 - 0
include/igl/ply.h

@@ -0,0 +1,3168 @@
+#ifndef IGL_PLY_H
+#define IGL_PLY_H
+/*
+
+Header for PLY polygon files.
+
+- Greg Turk, March 1994
+
+A PLY file contains a single polygonal _object_.
+
+An object is composed of lists of _elements_.  Typical elements are
+vertices, faces, edges and materials.
+
+Each type of element for a given object has one or more _properties_
+associated with the element type.  For instance, a vertex element may
+have as properties three floating-point values x,y,z and three unsigned
+chars for red, green and blue.
+
+---------------------------------------------------------------
+
+Copyright (c) 1994 The Board of Trustees of The Leland Stanford
+Junior University.  All rights reserved.   
+  
+Permission to use, copy, modify and distribute this software and its   
+documentation for any purpose is hereby granted without fee, provided   
+that the above copyright notice and this permission notice appear in   
+all copies of this software and that you do not sell the software.   
+  
+THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,   
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY   
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.   
+
+*/
+
+/*
+--------------------------------------------------------------------------------
+Joao Fradinho Oliveira, July 2005
+Copyright (c) 2005 University College London 
+copyright conditions as above
+
+update for ply reading of multi OS ply files, in any OS (Unix, Macintosh, PC)
+--------------------------------------------------------------------------------
+
+ply_open_for_reading
+
+* was changed to always open files in binary mode, files written in ascii can also be
+read with this binary mode.
+
+* allows opening of filenames that are alias files in macintosh
+
+* code tested on pc and mac
+
+
+get_words
+
+* was changed to handle line breaks in UNIX, MACINTOSH, PC, it resets the file pointer
+accordingly for the next read.
+
+
+NOTES:
+The ply file, has always an ascii part for the header, and a binary or ascii
+part for the data.
+The header part in ascii, dictates that linebreaks are used, this make models 
+operating system dependent, as a line break in unix is indicated with the escape character \n,
+on a macintosh, with \r, and on a pc with \r\n  <--2 unsigned chars, 2 bytes, instead of 1 byte.
+
+get_words allows reading of any OS, text editors such as BBEdit do not save the linebreaks
+properly to target OSs with binary files.  
+
+*/
+
+#ifndef __PLY_H__
+#define __PLY_H__
+
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+
+namespace igl {
+    namespace ply {
+    
+#define PLY_ASCII         1      /* ascii PLY file */
+#define PLY_BINARY_BE     2      /* binary PLY file, big endian */
+#define PLY_BINARY_LE     3      /* binary PLY file, little endian */
+#define PLY_BINARY_NATIVE 4      /* binary PLY file, same endianness as
+				    current architecture */
+    
+#define PLY_OKAY    0           /* ply routine worked okay */
+#define PLY_ERROR  -1           /* error in ply routine */
+
+/* scalar data types supported by PLY format */
+
+#define PLY_START_TYPE 0
+#define PLY_CHAR       1
+#define PLY_SHORT      2
+#define PLY_INT        3
+#define PLY_UCHAR      4
+#define PLY_USHORT     5
+#define PLY_UINT       6
+#define PLY_FLOAT      7
+#define PLY_DOUBLE     8
+#define PLY_END_TYPE   9
+
+#define  PLY_SCALAR  0
+#define  PLY_LIST    1
+
+
+
+
+typedef struct PlyProperty {    /* description of a property */
+
+  const char *name;                           /* property name */
+  int external_type;                    /* file's data type */
+  int internal_type;                    /* program's data type */
+  int offset;                           /* offset bytes of prop in a struct */
+
+  int is_list;                          /* 1 = list, 0 = scalar */
+  int count_external;                   /* file's count type */
+  int count_internal;                   /* program's count type */
+  int count_offset;                     /* offset byte for list count */
+
+} PlyProperty;
+
+typedef struct PlyElement {     /* description of an element */
+  const char *name;                   /* element name */
+  int num;                      /* number of elements in this object */
+  int size;                     /* size of element (bytes) or -1 if variable */
+  int nprops;                   /* number of properties for this element */
+  PlyProperty **props;          /* list of properties in the file */
+  char *store_prop;             /* flags: property wanted by user? */
+  int other_offset;             /* offset to un-asked-for props, or -1 if none*/
+  int other_size;               /* size of other_props structure */
+} PlyElement;
+
+typedef struct PlyOtherProp {   /* describes other properties in an element */
+  const char *name;                   /* element name */
+  int size;                     /* size of other_props */
+  int nprops;                   /* number of properties in other_props */
+  PlyProperty **props;          /* list of properties in other_props */
+} PlyOtherProp;
+
+typedef struct OtherData { /* for storing other_props for an other element */
+  void *other_props;
+} OtherData;
+
+typedef struct OtherElem {     /* data for one "other" element */
+  char *elem_name;             /* names of other elements */
+  int elem_count;              /* count of instances of each element */
+  OtherData **other_data;      /* actual property data for the elements */
+  PlyOtherProp *other_props;   /* description of the property data */
+} OtherElem;
+
+typedef struct PlyOtherElems {  /* "other" elements, not interpreted by user */
+  int num_elems;                /* number of other elements */
+  OtherElem *other_list;        /* list of data for other elements */
+} PlyOtherElems;
+
+typedef struct PlyFile {        /* description of PLY file */
+  FILE *fp;                     /* file pointer */
+  int file_type;                /* ascii or binary */
+  float version;                /* version number of file */
+  int nelems;                   /* number of elements of object */
+  PlyElement **elems;           /* list of elements */
+  int num_comments;             /* number of comments */
+  char **comments;              /* list of comments */
+  int num_obj_info;             /* number of items of object information */
+  char **obj_info;              /* list of object info items */
+  PlyElement *which_elem;       /* which element we're currently writing */
+  PlyOtherElems *other_elems;   /* "other" elements from a PLY file */
+} PlyFile;
+
+/* memory allocation */
+extern char *my_alloc();
+#define myalloc(mem_size) my_alloc((mem_size), __LINE__, __FILE__)
+
+#ifndef ALLOCN
+#define REALLOCN(PTR,TYPE,OLD_N,NEW_N)							\
+        {										\
+	    if ((OLD_N) == 0)                                           		\
+	    {   ALLOCN((PTR),TYPE,(NEW_N));}                            		\
+	    else									\
+	    {								    		\
+	       (PTR) = (TYPE *)realloc((PTR),(NEW_N)*sizeof(TYPE));			\
+	       if (((PTR) == NULL) && ((NEW_N) != 0))					\
+	       {									\
+		   fprintf(stderr, "Memory reallocation failed on line %d in %s\n", 	\
+		           __LINE__, __FILE__);                             		\
+		   fprintf(stderr, "  tried to reallocate %d->%d\n",       		\
+			   (OLD_N), (NEW_N));                              		\
+		   exit(-1);								\
+	       }									\
+	       if ((NEW_N)>(OLD_N))							\
+		   memset((char *)(PTR)+(OLD_N)*sizeof(TYPE), 0,			\
+		          ((NEW_N)-(OLD_N))*sizeof(TYPE));				\
+	    }										\
+	}
+
+#define  ALLOCN(PTR,TYPE,N) 					\
+	{ (PTR) = (TYPE *) calloc(((unsigned)(N)),sizeof(TYPE));\
+	  if ((PTR) == NULL) {    				\
+	  fprintf(stderr, "Memory allocation failed on line %d in %s\n", \
+		 __LINE__, __FILE__);                           \
+	  exit(-1);                                             \
+	  }							\
+	}
+
+
+#define FREE(PTR)  { free((PTR)); (PTR) = NULL; }
+#endif
+    
+
+/*** delcaration of routines ***/
+
+inline int get_native_binary_type2();
+
+inline PlyFile *ply_write(FILE *, int,const char **, int);
+inline PlyFile *ply_open_for_writing(char *, int,const char **, int, float *);
+inline void ply_describe_element(PlyFile *, const char *, int, int, PlyProperty *);
+inline void ply_describe_property(PlyFile *, const char *, PlyProperty *);
+inline void ply_element_count(PlyFile *, const char *, int);
+inline void ply_header_complete(PlyFile *);
+inline void ply_put_element_setup(PlyFile *, const char *);
+inline void ply_put_element(PlyFile *, void *, int*);
+inline void ply_put_comment(PlyFile *, char *);
+inline void ply_put_obj_info(PlyFile *, char *);
+inline PlyFile *ply_read(FILE *, int *, char ***);
+inline PlyFile *ply_open_for_reading( const char *, int *, char ***, int *, float *);
+inline PlyProperty **ply_get_element_description(PlyFile *, const char *, int*, int*);
+inline void ply_get_element_setup( PlyFile *, const char *, int, PlyProperty *);
+inline void ply_get_property(PlyFile *, const char *, PlyProperty *);
+inline PlyOtherProp *ply_get_other_properties(PlyFile *, const char *, int);
+inline void ply_get_element(PlyFile *, void *, int *);
+inline char **ply_get_comments(PlyFile *, int *);
+inline char **ply_get_obj_info(PlyFile *, int *);
+inline void ply_close(PlyFile *);
+inline void ply_get_info(PlyFile *, float *, int *);
+inline PlyOtherElems *ply_get_other_element (PlyFile *, const char *, int);
+inline void ply_describe_other_elements ( PlyFile *, PlyOtherElems *);
+inline void ply_put_other_elements (PlyFile *);
+inline void ply_free_other_elements (PlyOtherElems *);
+inline void ply_describe_other_properties(PlyFile *, PlyOtherProp *, int);
+
+inline int equal_strings(const char *, const char *);
+
+
+}
+}
+#endif /* !__PLY_H__ */
+/*
+
+The interface routines for reading and writing PLY polygon files.
+
+Greg Turk, February 1994
+
+---------------------------------------------------------------
+
+A PLY file contains a single polygonal _object_.
+
+An object is composed of lists of _elements_.  Typical elements are
+vertices, faces, edges and materials.
+
+Each type of element for a given object has one or more _properties_
+associated with the element type.  For instance, a vertex element may
+have as properties the floating-point values x,y,z and the three unsigned
+chars representing red, green and blue.
+
+---------------------------------------------------------------
+
+Copyright (c) 1994 The Board of Trustees of The Leland Stanford
+Junior University.  All rights reserved.   
+  
+Permission to use, copy, modify and distribute this software and its   
+documentation for any purpose is hereby granted without fee, provided   
+that the above copyright notice and this permission notice appear in   
+all copies of this software and that you do not sell the software.   
+  
+THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTY OF ANY KIND,   
+EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY   
+WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.   
+
+*/
+
+/*
+--------------------------------------------------------------------------------
+Joao Fradinho Oliveira, July 2005
+University College London 
+
+update for ply reading of multi OS ply files, in any OS (Unix, Macintosh, PC)
+--------------------------------------------------------------------------------
+
+ply_open_for_reading
+
+* was changed to always open files in binary mode, files written in ascii can also be
+read with this binary mode.
+
+* allows opening of filenames that are alias files in macintosh
+
+* code tested on pc and mac
+
+
+get_words
+
+* was changed to handle line breaks in UNIX, MACINTOSH, PC, it resets the file pointer
+accordingly for the next read.
+
+
+NOTES:
+The ply file, has always an ascii part for the header, and a binary or ascii
+part for the data.
+The header part in ascii, dictates that linebreaks are used, this make models 
+operating system dependent, as a line break in unix is indicated with the escape character \n,
+on a macintosh, with \r, and on a pc with \r\n  <--2 unsigned chars, 2 bytes, instead of 1 byte.
+
+get_words allows reading of any OS, text editors such as BBEdit do not save the linebreaks
+properly to target OSs with binary files.  
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+//#include "ply.h"
+
+
+namespace igl {
+    namespace ply {
+
+
+// Use unnamed namespace to avoid duplicate symbols
+/*
+namespace
+{
+const char *type_names[] = {
+"invalid",
+"char", "short", "int",
+"uchar", "ushort", "uint",
+"float", "double",
+};
+
+//  names of scalar types 
+const char *alt_type_names[] = { 
+"invalid",
+"int8", "int16", "int32", "uint8", "uint16", "uint32", "float32", "float64",
+};
+
+int ply_type_size[] = {
+  0, 1, 2, 4, 1, 2, 4, 4, 8
+};
+}
+
+typedef union
+{
+      int  int_value;
+      char byte_values[sizeof(int)];
+} endian_test_type;
+
+
+namespace
+{
+static int native_binary_type = -1;
+static int types_checked = 0;
+}
+*/
+
+#define NO_OTHER_PROPS  -1
+
+#define DONT_STORE_PROP  0
+#define STORE_PROP       1
+
+#define OTHER_PROP       0
+#define NAMED_PROP       1
+
+/* returns 1 if strings are equal, 0 if not */
+inline int equal_strings(const char *, const char *);
+
+/* find an element in a plyfile's list */
+inline PlyElement *find_element(PlyFile *, const char *);
+
+/* find a property in an element's list */
+inline PlyProperty *find_property(PlyElement *, const char *, int *);
+
+/* write to a file the word describing a PLY file data type */
+inline void write_scalar_type (FILE *, int);
+
+/* read a line from a file and break it up into separate words */
+inline char **get_words(FILE *, int *, char **);
+inline char **old_get_words(FILE *, int *);
+
+/* write an item to a file */
+inline void write_binary_item(FILE *, int, int, unsigned int, double, int, int*);
+inline void write_ascii_item(FILE *, int, unsigned int, double, int);
+inline double old_write_ascii_item(FILE *, char *, int);
+
+/* add information to a PLY file descriptor */
+inline void add_element(PlyFile *, char **);
+inline void add_property(PlyFile *, char **);
+inline void add_comment(PlyFile *, char *);
+inline void add_obj_info(PlyFile *, char *);
+
+/* copy a property */
+inline void copy_property(PlyProperty *, PlyProperty *);
+
+/* store a value into where a pointer and a type specify */
+inline void store_item(char *, int, int, unsigned int, double);
+
+/* return the value of a stored item */
+inline void get_stored_item( void *, int, int *, unsigned int *, double *);
+
+/* return the value stored in an item, given ptr to it and its type */
+inline double get_item_value(char *, int);
+
+/* get binary or ascii item and store it according to ptr and type */
+inline void get_ascii_item(char *, int, int *, unsigned int *, double *);
+inline void get_binary_item(FILE *, int, int, int *, unsigned int *, double *, int*);
+
+/* get a bunch of elements from a file */
+inline void ascii_get_element(PlyFile *, char *);
+inline void binary_get_element(PlyFile *, char *, int*);
+
+/* memory allocation */
+inline char *my_alloc(int, int, const char *);
+
+/* byte ordering */
+inline void get_native_binary_type(int*);
+inline void swap_bytes(char *, int);
+
+inline int check_types();
+
+
+/*************/
+/*  Writing  */
+/*************/
+
+
+/******************************************************************************
+Given a file pointer, get ready to write PLY data to the file.
+
+Entry:
+  fp         - the given file pointer
+  nelems     - number of elements in object
+  elem_names - list of element names
+  file_type  - file type, either ascii or binary
+
+Exit:
+  returns a pointer to a PlyFile, used to refer to this file, or NULL if error
+******************************************************************************/
+
+inline PlyFile *ply_write(
+  FILE *fp,
+  int nelems,
+  const char **elem_names,
+  int file_type
+)
+{
+  int i;
+  PlyFile *plyfile;
+  PlyElement *elem;
+
+  /* check for NULL file pointer */
+  if (fp == NULL)
+    return (NULL);
+	
+	int native_binary_type = -1;
+	int types_checked = 0;
+  if (native_binary_type == -1)
+     native_binary_type = get_native_binary_type2();
+  if (!types_checked)
+     types_checked = check_types();
+  
+  /* create a record for this object */
+
+  plyfile = (PlyFile *) myalloc (sizeof (PlyFile));
+  if (file_type == PLY_BINARY_NATIVE)
+     plyfile->file_type = native_binary_type;
+  else
+     plyfile->file_type = file_type;
+  plyfile->num_comments = 0;
+  plyfile->num_obj_info = 0;
+  plyfile->nelems = nelems;
+  plyfile->version = 1.0;
+  plyfile->fp = fp;
+  plyfile->other_elems = NULL;
+
+  /* tuck aside the names of the elements */
+
+  plyfile->elems = (PlyElement **) myalloc (sizeof (PlyElement *) * nelems);
+  for (i = 0; i < nelems; i++) {
+    elem = (PlyElement *) myalloc (sizeof (PlyElement));
+    plyfile->elems[i] = elem;
+    elem->name = strdup (elem_names[i]);
+    elem->num = 0;
+    elem->nprops = 0;
+  }
+
+  /* return pointer to the file descriptor */
+  return (plyfile);
+}
+
+
+/******************************************************************************
+Open a polygon file for writing.
+
+Entry:
+  filename   - name of file to read from
+  nelems     - number of elements in object
+  elem_names - list of element names
+  file_type  - file type, either ascii or binary
+
+Exit:
+  version - version number of PLY file
+  returns a file identifier, used to refer to this file, or NULL if error
+******************************************************************************/
+
+inline PlyFile *ply_open_for_writing(
+  const char *filename,
+  int nelems,
+  const char **elem_names,
+  int file_type,
+  float *version
+)
+{
+  PlyFile *plyfile;
+  char *name;
+  FILE *fp;
+
+  /* tack on the extension .ply, if necessary */
+
+  name = (char *) myalloc (sizeof (char) * (strlen (filename) + 5));
+  strcpy (name, filename);
+  if (strlen (name) < 4 ||
+      strcmp (name + strlen (name) - 4, ".ply") != 0)
+      strcat (name, ".ply");
+
+  /* open the file for writing */
+
+  fp = fopen (name, "w");
+  if (fp == NULL) {
+    return (NULL);
+  }
+
+  /* create the actual PlyFile structure */
+
+  plyfile = ply_write (fp, nelems, elem_names, file_type);
+  if (plyfile == NULL)
+    return (NULL);
+
+  /* say what PLY file version number we're writing */
+  *version = plyfile->version;
+
+  /* return pointer to the file descriptor */
+  return (plyfile);
+}
+
+
+/******************************************************************************
+Describe an element, including its properties and how many will be written
+to the file.
+
+Entry:
+  plyfile   - file identifier
+  elem_name - name of element that information is being specified about
+  nelems    - number of elements of this type to be written
+  nprops    - number of properties contained in the element
+  prop_list - list of properties
+******************************************************************************/
+
+inline void ply_describe_element(
+  PlyFile *plyfile,
+  const char *elem_name,
+  int nelems,
+  int nprops,
+  PlyProperty *prop_list
+)
+{
+  int i;
+  PlyElement *elem;
+  PlyProperty *prop;
+
+  /* look for appropriate element */
+  elem = find_element (plyfile, elem_name);
+  if (elem == NULL) {
+    fprintf(stderr,"ply_describe_element: can't find element '%s'\n",elem_name);
+    exit (-1);
+  }
+
+  elem->num = nelems;
+
+  /* copy the list of properties */
+
+  elem->nprops = nprops;
+  elem->props = (PlyProperty **) myalloc (sizeof (PlyProperty *) * nprops);
+  elem->store_prop = (char *) myalloc (sizeof (char) * nprops);
+
+  for (i = 0; i < nprops; i++) {
+    prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
+    elem->props[i] = prop;
+    elem->store_prop[i] = NAMED_PROP;
+    copy_property (prop, &prop_list[i]);
+  }
+}
+
+
+/******************************************************************************
+Describe a property of an element.
+
+Entry:
+  plyfile   - file identifier
+  elem_name - name of element that information is being specified about
+  prop      - the new property
+******************************************************************************/
+
+inline void ply_describe_property(
+  PlyFile *plyfile,
+  const char *elem_name,
+  PlyProperty *prop
+)
+{
+  PlyElement *elem;
+  PlyProperty *elem_prop;
+
+  /* look for appropriate element */
+  elem = find_element (plyfile, elem_name);
+  if (elem == NULL) {
+    fprintf(stderr, "ply_describe_property: can't find element '%s'\n",
+            elem_name);
+    return;
+  }
+
+  /* create room for new property */
+
+  if (elem->nprops == 0) {
+    elem->props = (PlyProperty **) myalloc (sizeof (PlyProperty *));
+    elem->store_prop = (char *) myalloc (sizeof (char));
+    elem->nprops = 1;
+  }
+  else {
+    elem->nprops++;
+    elem->props = (PlyProperty **)
+                  realloc (elem->props, sizeof (PlyProperty *) * elem->nprops);
+    elem->store_prop = (char *)
+                  realloc (elem->store_prop, sizeof (char) * elem->nprops);
+  }
+
+  /* copy the new property */
+
+  elem_prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
+  elem->props[elem->nprops - 1] = elem_prop;
+  elem->store_prop[elem->nprops - 1] = NAMED_PROP;
+  copy_property (elem_prop, prop);
+}
+
+
+/******************************************************************************
+Describe what the "other" properties are that are to be stored, and where
+they are in an element.
+******************************************************************************/
+
+inline void ply_describe_other_properties(
+  PlyFile *plyfile,
+  PlyOtherProp *other,
+  int offset
+)
+{
+  int i;
+  PlyElement *elem;
+  PlyProperty *prop;
+
+  /* look for appropriate element */
+  elem = find_element (plyfile, other->name);
+  if (elem == NULL) {
+    fprintf(stderr, "ply_describe_other_properties: can't find element '%s'\n",
+            other->name);
+    return;
+  }
+
+  /* create room for other properties */
+
+  if (elem->nprops == 0) {
+    elem->props = (PlyProperty **)
+                  myalloc (sizeof (PlyProperty *) * other->nprops);
+    elem->store_prop = (char *) myalloc (sizeof (char) * other->nprops);
+    elem->nprops = 0;
+  }
+  else {
+    int newsize;
+    newsize = elem->nprops + other->nprops;
+    elem->props = (PlyProperty **)
+                  realloc (elem->props, sizeof (PlyProperty *) * newsize);
+    elem->store_prop = (char *)
+                  realloc (elem->store_prop, sizeof (char) * newsize);
+  }
+
+  /* copy the other properties */
+
+  for (i = 0; i < other->nprops; i++) {
+    prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
+    copy_property (prop, other->props[i]);
+    elem->props[elem->nprops] = prop;
+    elem->store_prop[elem->nprops] = OTHER_PROP;
+    elem->nprops++;
+  }
+
+  /* save other info about other properties */
+  elem->other_size = other->size;
+  elem->other_offset = offset;
+}
+
+
+/******************************************************************************
+State how many of a given element will be written.
+
+Entry:
+  plyfile   - file identifier
+  elem_name - name of element that information is being specified about
+  nelems    - number of elements of this type to be written
+******************************************************************************/
+
+inline void ply_element_count(
+  PlyFile *plyfile,
+  const char *elem_name,
+  int nelems
+)
+{
+  PlyElement *elem;
+
+  /* look for appropriate element */
+  elem = find_element (plyfile, elem_name);
+  if (elem == NULL) {
+    fprintf(stderr,"ply_element_count: can't find element '%s'\n",elem_name);
+    exit (-1);
+  }
+
+  elem->num = nelems;
+}
+
+
+/******************************************************************************
+Signal that we've described everything a PLY file's header and that the
+header should be written to the file.
+
+Entry:
+  plyfile - file identifier
+******************************************************************************/
+
+inline void ply_header_complete(PlyFile *plyfile)
+{
+  int i,j;
+  FILE *fp = plyfile->fp;
+  PlyElement *elem;
+  PlyProperty *prop;
+
+  fprintf (fp, "ply\n");
+
+  switch (plyfile->file_type) {
+    case PLY_ASCII:
+      fprintf (fp, "format ascii 1.0\n");
+      break;
+    case PLY_BINARY_BE:
+      fprintf (fp, "format binary_big_endian 1.0\n");
+      break;
+    case PLY_BINARY_LE:
+      fprintf (fp, "format binary_little_endian 1.0\n");
+      break;
+    default:
+      fprintf (stderr, "ply_header_complete: bad file type = %d\n",
+               plyfile->file_type);
+      exit (-1);
+  }
+
+  /* write out the comments */
+
+  for (i = 0; i < plyfile->num_comments; i++)
+    fprintf (fp, "comment %s\n", plyfile->comments[i]);
+
+  /* write out object information */
+
+  for (i = 0; i < plyfile->num_obj_info; i++)
+    fprintf (fp, "obj_info %s\n", plyfile->obj_info[i]);
+
+  /* write out information about each element */
+
+  for (i = 0; i < plyfile->nelems; i++) {
+
+    elem = plyfile->elems[i];
+    fprintf (fp, "element %s %d\n", elem->name, elem->num);
+
+    /* write out each property */
+    for (j = 0; j < elem->nprops; j++) {
+      prop = elem->props[j];
+      if (prop->is_list) {
+        fprintf (fp, "property list ");
+        write_scalar_type (fp, prop->count_external);
+        fprintf (fp, " ");
+        write_scalar_type (fp, prop->external_type);
+        fprintf (fp, " %s\n", prop->name);
+      }
+      else {
+        fprintf (fp, "property ");
+        write_scalar_type (fp, prop->external_type);
+        fprintf (fp, " %s\n", prop->name);
+      }
+    }
+  }
+
+  fprintf (fp, "end_header\n");
+}
+
+
+/******************************************************************************
+Specify which elements are going to be written.  This should be called
+before a call to the routine ply_put_element().
+
+Entry:
+  plyfile   - file identifier
+  elem_name - name of element we're talking about
+******************************************************************************/
+
+inline void ply_put_element_setup(PlyFile *plyfile, const char *elem_name)
+{
+  PlyElement *elem;
+
+  elem = find_element (plyfile, elem_name);
+  if (elem == NULL) {
+    fprintf(stderr, "ply_elements_setup: can't find element '%s'\n", elem_name);
+    exit (-1);
+  }
+	
+  plyfile->which_elem = elem;
+}
+
+
+/******************************************************************************
+Write an element to the file.  This routine assumes that we're
+writing the type of element specified in the last call to the routine
+ply_put_element_setup().
+
+Entry:
+  plyfile  - file identifier
+  elem_ptr - pointer to the element
+******************************************************************************/
+
+inline void ply_put_element(PlyFile *plyfile, void *elem_ptr, int *native_binary_type)
+{
+  int j,k;
+  FILE *fp = plyfile->fp;
+  PlyElement *elem;
+  PlyProperty *prop;
+  char *elem_data,*item;
+  char **item_ptr;
+  int list_count;
+  int item_size;
+  int int_val;
+  unsigned int uint_val;
+  double double_val;
+  char **other_ptr;
+
+  elem = plyfile->which_elem;
+  elem_data = (char *)elem_ptr;
+  other_ptr = (char **) (((char *) elem_ptr) + elem->other_offset);
+
+  /* write out either to an ascii or binary file */
+	int ply_type_size[] = {
+	  0, 1, 2, 4, 1, 2, 4, 4, 8
+	};
+
+  if (plyfile->file_type == PLY_ASCII) {
+
+    /* write an ascii file */
+
+    /* write out each property of the element */
+    for (j = 0; j < elem->nprops; j++) {
+      prop = elem->props[j];
+      if (elem->store_prop[j] == OTHER_PROP)
+        elem_data = *other_ptr;
+      else
+        elem_data = (char *)elem_ptr;
+      if (prop->is_list) {
+        item = elem_data + prop->count_offset;
+        get_stored_item ((void *) item, prop->count_internal,
+                         &int_val, &uint_val, &double_val);
+        write_ascii_item (fp, int_val, uint_val, double_val,
+                          prop->count_external);
+        list_count = uint_val;
+        item_ptr = (char **) (elem_data + prop->offset);
+        item = item_ptr[0];
+        item_size = ply_type_size[prop->internal_type];
+        for (k = 0; k < list_count; k++) {
+          get_stored_item ((void *) item, prop->internal_type,
+                           &int_val, &uint_val, &double_val);
+          write_ascii_item (fp, int_val, uint_val, double_val,
+                            prop->external_type);
+          item += item_size;
+        }
+      }
+      else {
+        item = elem_data + prop->offset;
+        get_stored_item ((void *) item, prop->internal_type,
+                         &int_val, &uint_val, &double_val);
+        write_ascii_item (fp, int_val, uint_val, double_val,
+                          prop->external_type);
+      }
+    }
+
+    fprintf (fp, "\n");
+  }
+  else {
+
+    /* write a binary file */
+
+    /* write out each property of the element */
+    for (j = 0; j < elem->nprops; j++) {
+      prop = elem->props[j];
+      if (elem->store_prop[j] == OTHER_PROP)
+        elem_data = *other_ptr;
+      else
+        elem_data = (char *)elem_ptr;
+      if (prop->is_list) {
+        item = elem_data + prop->count_offset;
+        item_size = ply_type_size[prop->count_internal];
+        get_stored_item ((void *) item, prop->count_internal,
+                         &int_val, &uint_val, &double_val);
+        write_binary_item (fp, plyfile->file_type, int_val, uint_val,
+			   double_val, prop->count_external, native_binary_type);
+        list_count = uint_val;
+        item_ptr = (char **) (elem_data + prop->offset);
+        item = item_ptr[0];
+        item_size = ply_type_size[prop->internal_type];
+        for (k = 0; k < list_count; k++) {
+          get_stored_item ((void *) item, prop->internal_type,
+                           &int_val, &uint_val, &double_val);
+          write_binary_item (fp, plyfile->file_type, int_val, uint_val,
+			     double_val, prop->external_type, native_binary_type);
+          item += item_size;
+        }
+      }
+      else {
+        item = elem_data + prop->offset;
+        item_size = ply_type_size[prop->internal_type];
+        get_stored_item ((void *) item, prop->internal_type,
+                         &int_val, &uint_val, &double_val);
+        write_binary_item (fp, plyfile->file_type, int_val, uint_val,
+			   double_val, prop->external_type, native_binary_type);
+      }
+    }
+
+  }
+}
+
+
+/******************************************************************************
+Specify a comment that will be written in the header.
+
+Entry:
+  plyfile - file identifier
+  comment - the comment to be written
+******************************************************************************/
+
+inline void ply_put_comment(PlyFile *plyfile, char *comment)
+{
+  /* (re)allocate space for new comment */
+  if (plyfile->num_comments == 0)
+    plyfile->comments = (char **) myalloc (sizeof (char *));
+  else
+    plyfile->comments = (char **) realloc (plyfile->comments,
+                         sizeof (char *) * (plyfile->num_comments + 1));
+
+  /* add comment to list */
+  plyfile->comments[plyfile->num_comments] = strdup (comment);
+  plyfile->num_comments++;
+}
+
+
+/******************************************************************************
+Specify a piece of object information (arbitrary text) that will be written
+in the header.
+
+Entry:
+  plyfile  - file identifier
+  obj_info - the text information to be written
+******************************************************************************/
+
+inline void ply_put_obj_info(PlyFile *plyfile, char *obj_info)
+{
+  /* (re)allocate space for new info */
+  if (plyfile->num_obj_info == 0)
+    plyfile->obj_info = (char **) myalloc (sizeof (char *));
+  else
+    plyfile->obj_info = (char **) realloc (plyfile->obj_info,
+                         sizeof (char *) * (plyfile->num_obj_info + 1));
+
+  /* add info to list */
+  plyfile->obj_info[plyfile->num_obj_info] = strdup (obj_info);
+  plyfile->num_obj_info++;
+}
+
+
+
+
+
+
+
+/*************/
+/*  Reading  */
+/*************/
+
+
+
+/******************************************************************************
+Given a file pointer, get ready to read PLY data from the file.
+
+Entry:
+  fp - the given file pointer
+
+Exit:
+  nelems     - number of elements in object
+  elem_names - list of element names
+  returns a pointer to a PlyFile, used to refer to this file, or NULL if error
+******************************************************************************/
+
+inline PlyFile *ply_read(FILE *fp, int *nelems, char ***elem_names)
+{
+  int i,j;
+  PlyFile *plyfile;
+  int nwords;
+  char **words;
+  char **elist;
+  PlyElement *elem;
+  char *orig_line;
+
+  /* check for NULL file pointer */
+  if (fp == NULL)
+    return (NULL);
+	
+	int native_binary_type = -1;
+	int types_checked = 0;
+	
+  if (native_binary_type == -1)
+     native_binary_type = get_native_binary_type2();
+  if (!types_checked)
+     types_checked = check_types();
+  
+  /* create record for this object */
+
+  plyfile = (PlyFile *) myalloc (sizeof (PlyFile));
+  plyfile->nelems = 0;
+  plyfile->comments = NULL;
+  plyfile->num_comments = 0;
+  plyfile->obj_info = NULL;
+  plyfile->num_obj_info = 0;
+  plyfile->fp = fp;
+  plyfile->other_elems = NULL;
+
+  /* read and parse the file's header */
+
+  words = get_words (plyfile->fp, &nwords, &orig_line);
+  if (nwords == 0 || !words || !equal_strings (words[0], "ply"))
+  {
+       if (words)
+	 free(words);
+     
+     
+      return (NULL);
+  }
+  
+  while (words) {
+
+    /* parse words */
+
+    if (equal_strings (words[0], "format")) {
+      if (nwords != 3) {
+	free(words);
+	return (NULL);
+      }
+      if (equal_strings (words[1], "ascii"))
+        plyfile->file_type = PLY_ASCII;
+      else if (equal_strings (words[1], "binary_big_endian"))
+        plyfile->file_type = PLY_BINARY_BE;
+      else if (equal_strings (words[1], "binary_little_endian"))
+        plyfile->file_type = PLY_BINARY_LE;
+      else {
+	free(words);
+        return (NULL);
+      }
+      plyfile->version = atof (words[2]);
+    }
+    else if (equal_strings (words[0], "element"))
+      add_element (plyfile, words);
+    else if (equal_strings (words[0], "property"))
+      add_property (plyfile, words);
+    else if (equal_strings (words[0], "comment"))
+      add_comment (plyfile, orig_line);
+    else if (equal_strings (words[0], "obj_info"))
+      add_obj_info (plyfile, orig_line);
+    else if (equal_strings (words[0], "end_header")) {
+      free(words);
+      break;
+    }
+    
+    /* free up words space */
+    free (words);
+
+    words = get_words (plyfile->fp, &nwords, &orig_line);
+  }
+
+  /* create tags for each property of each element, to be used */
+  /* later to say whether or not to store each property for the user */
+
+  for (i = 0; i < plyfile->nelems; i++) {
+    elem = plyfile->elems[i];
+    elem->store_prop = (char *) myalloc (sizeof (char) * elem->nprops);
+    for (j = 0; j < elem->nprops; j++)
+      elem->store_prop[j] = DONT_STORE_PROP;
+    elem->other_offset = NO_OTHER_PROPS; /* no "other" props by default */
+  }
+
+  /* set return values about the elements */
+
+  elist = (char **) myalloc (sizeof (char *) * plyfile->nelems);
+  for (i = 0; i < plyfile->nelems; i++)
+    elist[i] = strdup (plyfile->elems[i]->name);
+
+  *elem_names = elist;
+  *nelems = plyfile->nelems;
+
+  /* return a pointer to the file's information */
+
+  return (plyfile);
+}
+
+
+/******************************************************************************
+Open a polygon file for reading.
+
+Entry:
+  filename - name of file to read from
+
+Exit:
+  nelems     - number of elements in object
+  elem_names - list of element names
+  file_type  - file type, either ascii or binary
+  version    - version number of PLY file
+  returns a file identifier, used to refer to this file, or NULL if error
+******************************************************************************/
+
+inline PlyFile *ply_open_for_reading(
+  char *filename,
+  int *nelems,
+  char ***elem_names,
+  int *file_type,
+  float *version
+)
+{
+  FILE *fp;
+  PlyFile *plyfile;
+  //char *name;
+  
+  
+
+   /* tack on the extension .ply, if necessary */
+
+  // removing below, to handle also macintosh alias filenames
+  //name = (char *) myalloc (sizeof (char) * (strlen (filename) + 5));
+  //strcpy (name, filename);
+  //if (strlen (name) < 4 ||
+  //    strcmp (name + strlen (name) - 4, ".ply") != 0)
+  //    strcat (name, ".ply");
+
+  /* open the file for reading */
+
+  //fp = fopen (name, "r");
+  
+  //opening file in binary, ascii data can be read in binary with get_words
+  fp = fopen (filename, "rb");
+
+  if (fp == NULL)
+    return (NULL);
+  
+  /* create the PlyFile data structure */
+
+  plyfile = ply_read (fp, nelems, elem_names);
+
+  /* determine the file type and version */
+
+  *file_type = plyfile->file_type;
+  *version = plyfile->version;
+
+  /* return a pointer to the file's information */
+
+  return (plyfile);
+}
+
+
+/******************************************************************************
+Get information about a particular element.
+
+Entry:
+  plyfile   - file identifier
+  elem_name - name of element to get information about
+
+Exit:
+  nelems   - number of elements of this type in the file
+  nprops   - number of properties
+  returns a list of properties, or NULL if the file doesn't contain that elem
+******************************************************************************/
+
+inline PlyProperty **ply_get_element_description(
+  PlyFile *plyfile,
+  const char *elem_name,
+  int *nelems,
+  int *nprops
+)
+{
+  int i;
+  PlyElement *elem;
+  PlyProperty *prop;
+  PlyProperty **prop_list;
+
+  /* find information about the element */
+  elem = find_element (plyfile, elem_name);
+  if (elem == NULL)
+    return (NULL);
+
+  *nelems = elem->num;
+  *nprops = elem->nprops;
+
+  /* make a copy of the element's property list */
+  prop_list = (PlyProperty **) myalloc (sizeof (PlyProperty *) * elem->nprops);
+  for (i = 0; i < elem->nprops; i++) {
+    prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
+    copy_property (prop, elem->props[i]);
+    prop_list[i] = prop;
+  }
+
+  /* return this duplicate property list */
+  return (prop_list);
+}
+
+
+/******************************************************************************
+Specify which properties of an element are to be returned.  This should be
+called before a call to the routine ply_get_element().
+
+Entry:
+  plyfile   - file identifier
+  elem_name - which element we're talking about
+  nprops    - number of properties
+  prop_list - list of properties
+******************************************************************************/
+
+inline void ply_get_element_setup(
+  PlyFile *plyfile,
+  const char *elem_name,
+  int nprops,
+  PlyProperty *prop_list
+)
+{
+  int i;
+  PlyElement *elem;
+  PlyProperty *prop;
+  int index;
+
+  /* find information about the element */
+  elem = find_element (plyfile, elem_name);
+  plyfile->which_elem = elem;
+
+  /* deposit the property information into the element's description */
+  for (i = 0; i < nprops; i++) {
+
+    /* look for actual property */
+    prop = find_property (elem, prop_list[i].name, &index);
+    if (prop == NULL) {
+      fprintf (stderr, "Warning:  Can't find property '%s' in element '%s'\n",
+               prop_list[i].name, elem_name);
+      continue;
+    }
+
+    /* store its description */
+    prop->internal_type = prop_list[i].internal_type;
+    prop->offset = prop_list[i].offset;
+    prop->count_internal = prop_list[i].count_internal;
+    prop->count_offset = prop_list[i].count_offset;
+
+    /* specify that the user wants this property */
+    elem->store_prop[index] = STORE_PROP;
+  }
+}
+
+
+/******************************************************************************
+Specify a property of an element that is to be returned.  This should be
+called (usually multiple times) before a call to the routine ply_get_element().
+This routine should be used in preference to the less flexible old routine
+called ply_get_element_setup().
+
+Entry:
+  plyfile   - file identifier
+  elem_name - which element we're talking about
+  prop      - property to add to those that will be returned
+******************************************************************************/
+
+inline void ply_get_property(
+  PlyFile *plyfile,
+  const char *elem_name,
+  PlyProperty *prop
+)
+{
+  PlyElement *elem;
+  PlyProperty *prop_ptr;
+  int index;
+
+  /* find information about the element */
+  elem = find_element (plyfile, elem_name);
+  plyfile->which_elem = elem;
+
+  /* deposit the property information into the element's description */
+
+  prop_ptr = find_property (elem, prop->name, &index);
+  if (prop_ptr == NULL) {
+    fprintf (stderr, "Warning:  Can't find property '%s' in element '%s'\n",
+             prop->name, elem_name);
+    return;
+  }
+  prop_ptr->internal_type  = prop->internal_type;
+  prop_ptr->offset         = prop->offset;
+  prop_ptr->count_internal = prop->count_internal;
+  prop_ptr->count_offset   = prop->count_offset;
+
+  /* specify that the user wants this property */
+  elem->store_prop[index] = STORE_PROP;
+}
+
+
+/******************************************************************************
+Read one element from the file.  This routine assumes that we're reading
+the type of element specified in the last call to the routine
+ply_get_element_setup().
+
+Entry:
+  plyfile  - file identifier
+  elem_ptr - pointer to location where the element information should be put
+******************************************************************************/
+
+inline void ply_get_element(PlyFile *plyfile, void *elem_ptr, int *native_binary_type)
+{
+  if (plyfile->file_type == PLY_ASCII)
+    ascii_get_element (plyfile, (char *) elem_ptr);
+  else
+    binary_get_element (plyfile, (char *) elem_ptr, native_binary_type);
+}
+
+
+/******************************************************************************
+Extract the comments from the header information of a PLY file.
+
+Entry:
+  plyfile - file identifier
+
+Exit:
+  num_comments - number of comments returned
+  returns a pointer to a list of comments
+******************************************************************************/
+
+inline char **ply_get_comments(PlyFile *plyfile, int *num_comments)
+{
+  *num_comments = plyfile->num_comments;
+  return (plyfile->comments);
+}
+
+
+/******************************************************************************
+Extract the object information (arbitrary text) from the header information
+of a PLY file.
+
+Entry:
+  plyfile - file identifier
+
+Exit:
+  num_obj_info - number of lines of text information returned
+  returns a pointer to a list of object info lines
+******************************************************************************/
+
+inline char **ply_get_obj_info(PlyFile *plyfile, int *num_obj_info)
+{
+  *num_obj_info = plyfile->num_obj_info;
+  return (plyfile->obj_info);
+}
+
+
+/******************************************************************************
+Make ready for "other" properties of an element-- those properties that
+the user has not explicitly asked for, but that are to be stashed away
+in a special structure to be carried along with the element's other
+information.
+
+Entry:
+  plyfile - file identifier
+  elem    - element for which we want to save away other properties
+******************************************************************************/
+
+inline void setup_other_props(PlyElement *elem)
+{
+  int i;
+  PlyProperty *prop;
+  int size = 0;
+  int type_size;
+
+  /* Examine each property in decreasing order of size. */
+  /* We do this so that all data types will be aligned by */
+  /* word, half-word, or whatever within the structure. */
+	int ply_type_size[] = {
+	  0, 1, 2, 4, 1, 2, 4, 4, 8
+	};
+
+  for (type_size = 8; type_size > 0; type_size /= 2) {
+
+    /* add up the space taken by each property, and save this information */
+    /* away in the property descriptor */
+
+    for (i = 0; i < elem->nprops; i++) {
+
+      /* don't bother with properties we've been asked to store explicitly */
+      if (elem->store_prop[i])
+        continue;
+
+      prop = elem->props[i];
+
+      /* internal types will be same as external */
+      prop->internal_type = prop->external_type;
+      prop->count_internal = prop->count_external;
+
+      /* check list case */
+      if (prop->is_list) {
+
+        /* pointer to list */
+        if (type_size == sizeof (void *)) {
+          prop->offset = size;
+          size += sizeof (void *);    /* always use size of a pointer here */
+        }
+
+        /* count of number of list elements */
+        if (type_size == ply_type_size[prop->count_external]) {
+          prop->count_offset = size;
+          size += ply_type_size[prop->count_external];
+        }
+      }
+      /* not list */
+      else if (type_size == ply_type_size[prop->external_type]) {
+        prop->offset = size;
+        size += ply_type_size[prop->external_type];
+      }
+    }
+
+  }
+
+  /* save the size for the other_props structure */
+  elem->other_size = size;
+}
+
+
+/******************************************************************************
+Specify that we want the "other" properties of an element to be tucked
+away within the user's structure.  The user needn't be concerned for how
+these properties are stored.
+
+Entry:
+  plyfile   - file identifier
+  elem_name - name of element that we want to store other_props in
+  offset    - offset to where other_props will be stored inside user's structure
+
+Exit:
+  returns pointer to structure containing description of other_props
+******************************************************************************/
+
+inline PlyOtherProp *ply_get_other_properties(
+  PlyFile *plyfile,
+  const char *elem_name,
+  int offset
+)
+{
+  int i;
+  PlyElement *elem;
+  PlyOtherProp *other;
+  PlyProperty *prop;
+  int nprops;
+
+  /* find information about the element */
+  elem = find_element (plyfile, elem_name);
+  if (elem == NULL) {
+    fprintf (stderr, "ply_get_other_properties: Can't find element '%s'\n",
+             elem_name);
+    return (NULL);
+  }
+
+  /* remember that this is the "current" element */
+  plyfile->which_elem = elem;
+
+  /* save the offset to where to store the other_props */
+  elem->other_offset = offset;
+
+  /* place the appropriate pointers, etc. in the element's property list */
+  setup_other_props (elem);
+
+  /* create structure for describing other_props */
+  other = (PlyOtherProp *) myalloc (sizeof (PlyOtherProp));
+  other->name = strdup (elem_name);
+#if 0
+  if (elem->other_offset == NO_OTHER_PROPS) {
+    other->size = 0;
+    other->props = NULL;
+    other->nprops = 0;
+    return (other);
+  }
+#endif
+  other->size = elem->other_size;
+  other->props = (PlyProperty **) myalloc (sizeof(PlyProperty) * elem->nprops);
+  
+  /* save descriptions of each "other" property */
+  nprops = 0;
+  for (i = 0; i < elem->nprops; i++) {
+    if (elem->store_prop[i])
+      continue;
+    prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
+    copy_property (prop, elem->props[i]);
+    other->props[nprops] = prop;
+    nprops++;
+  }
+  other->nprops = nprops;
+
+#if 1
+  /* set other_offset pointer appropriately if there are NO other properties */
+  if (other->nprops == 0) {
+    elem->other_offset = NO_OTHER_PROPS;
+  }
+#endif
+  
+  /* return structure */
+  return (other);
+}
+
+
+
+
+/*************************/
+/*  Other Element Stuff  */
+/*************************/
+
+
+
+
+/******************************************************************************
+Grab all the data for an element that a user does not want to explicitly
+read in.
+
+Entry:
+  plyfile    - pointer to file
+  elem_name  - name of element whose data is to be read in
+  elem_count - number of instances of this element stored in the file
+
+Exit:
+  returns pointer to ALL the "other" element data for this PLY file
+******************************************************************************/
+
+inline PlyOtherElems *ply_get_other_element (
+  PlyFile *plyfile,
+  char *elem_name,
+  int elem_count
+)
+{
+  int i;
+  PlyElement *elem;
+  PlyOtherElems *other_elems;
+  OtherElem *other;
+
+  /* look for appropriate element */
+  elem = find_element (plyfile, elem_name);
+  if (elem == NULL) {
+    fprintf (stderr,
+             "ply_get_other_element: can't find element '%s'\n", elem_name);
+    exit (-1);
+  }
+
+  /* create room for the new "other" element, initializing the */
+  /* other data structure if necessary */
+
+  if (plyfile->other_elems == NULL) {
+    plyfile->other_elems = (PlyOtherElems *) myalloc (sizeof (PlyOtherElems));
+    other_elems = plyfile->other_elems;
+    other_elems->other_list = (OtherElem *) myalloc (sizeof (OtherElem));
+    other = &(other_elems->other_list[0]);
+    other_elems->num_elems = 1;
+  }
+  else {
+    other_elems = plyfile->other_elems;
+    other_elems->other_list = (OtherElem *) realloc (other_elems->other_list,
+                              sizeof (OtherElem) * (other_elems->num_elems + 1));
+    other = &(other_elems->other_list[other_elems->num_elems]);
+    other_elems->num_elems++;
+  }
+
+  /* count of element instances in file */
+  other->elem_count = elem_count;
+
+  /* save name of element */
+  other->elem_name = strdup (elem_name);
+
+  /* create a list to hold all the current elements */
+  other->other_data = (OtherData **)
+                  malloc (sizeof (OtherData *) * other->elem_count);
+
+  /* set up for getting elements */
+  other->other_props = ply_get_other_properties (plyfile, elem_name,
+                         offsetof(OtherData,other_props));
+
+  /* grab all these elements */
+  int native_binary_type = get_native_binary_type2();
+  for (i = 0; i < other->elem_count; i++) {
+    /* grab and element from the file */
+    other->other_data[i] = (OtherData *) malloc (sizeof (OtherData));
+    ply_get_element (plyfile, (void *) other->other_data[i], &native_binary_type);
+  }
+
+  /* return pointer to the other elements data */
+  return (other_elems);
+}
+
+
+/******************************************************************************
+Pass along a pointer to "other" elements that we want to save in a given
+PLY file.  These other elements were presumably read from another PLY file.
+
+Entry:
+  plyfile     - file pointer in which to store this other element info
+  other_elems - info about other elements that we want to store
+******************************************************************************/
+
+inline void ply_describe_other_elements (
+  PlyFile *plyfile,
+  PlyOtherElems *other_elems
+)
+{
+  int i;
+  OtherElem *other;
+  PlyElement *elem;
+  
+  /* ignore this call if there is no other element */
+  if (other_elems == NULL)
+    return;
+
+  /* save pointer to this information */
+  plyfile->other_elems = other_elems;
+
+  /* describe the other properties of this element */
+  /* store them in the main element list as elements with
+     only other properties */
+  
+  REALLOCN(plyfile->elems, PlyElement *,
+	   plyfile->nelems, plyfile->nelems + other_elems->num_elems);
+  for (i = 0; i < other_elems->num_elems; i++) {
+      other = &(other_elems->other_list[i]);
+      elem = (PlyElement *) myalloc (sizeof (PlyElement));
+      plyfile->elems[plyfile->nelems++] = elem;
+      elem->name = strdup (other->elem_name);
+      elem->num = other->elem_count;
+      elem->nprops = 0;
+      ply_describe_other_properties (plyfile, other->other_props,
+				     offsetof(OtherData,other_props));
+  }
+}
+
+
+/******************************************************************************
+Write out the "other" elements specified for this PLY file.
+
+Entry:
+  plyfile - pointer to PLY file to write out other elements for
+******************************************************************************/
+
+inline void ply_put_other_elements (PlyFile *plyfile, int *native_binary_type)
+{
+  int i,j;
+  OtherElem *other;
+
+  /* make sure we have other elements to write */
+  if (plyfile->other_elems == NULL)
+    return;
+
+  /* write out the data for each "other" element */
+
+  for (i = 0; i < plyfile->other_elems->num_elems; i++) {
+
+    other = &(plyfile->other_elems->other_list[i]);
+    ply_put_element_setup (plyfile, other->elem_name);
+
+    /* write out each instance of the current element */
+    for (j = 0; j < other->elem_count; j++)
+      ply_put_element (plyfile, (void *) other->other_data[j], native_binary_type);
+  }
+}
+
+
+/******************************************************************************
+Free up storage used by an "other" elements data structure.
+
+Entry:
+  other_elems - data structure to free up
+******************************************************************************/
+
+inline void ply_free_other_elements (PlyOtherElems *other_elems)
+{
+  // Alec: 
+  //other_elems = other_elems;
+  delete(other_elems);
+}
+
+
+
+/*******************/
+/*  Miscellaneous  */
+/*******************/
+
+
+
+/******************************************************************************
+Close a PLY file.
+
+Entry:
+  plyfile - identifier of file to close
+******************************************************************************/
+
+inline void ply_close(PlyFile *plyfile)
+{
+  fclose (plyfile->fp);
+  // Alec:
+  plyfile->fp = NULL;
+
+  /* free up memory associated with the PLY file */
+  free (plyfile);
+}
+
+
+/******************************************************************************
+Get version number and file type of a PlyFile.
+
+Entry:
+  ply - pointer to PLY file
+
+Exit:
+  version - version of the file
+  file_type - PLY_ASCII, PLY_BINARY_BE, or PLY_BINARY_LE
+******************************************************************************/
+
+inline void ply_get_info(PlyFile *ply, float *version, int *file_type)
+{
+  if (ply == NULL)
+    return;
+
+  *version = ply->version;
+  *file_type = ply->file_type;
+}
+
+
+/******************************************************************************
+Compare two strings.  Returns 1 if they are the same, 0 if not.
+******************************************************************************/
+
+inline int equal_strings(const char *s1, const char *s2)
+{
+
+  while (*s1 && *s2)
+    if (*s1++ != *s2++)
+      return (0);
+
+  if (*s1 != *s2)
+    return (0);
+  else
+    return (1);
+}
+
+
+/******************************************************************************
+Find an element from the element list of a given PLY object.
+
+Entry:
+  plyfile - file id for PLY file
+  element - name of element we're looking for
+
+Exit:
+  returns the element, or NULL if not found
+******************************************************************************/
+
+inline PlyElement *find_element(PlyFile *plyfile, const char *element)
+{
+  int i;
+
+  for (i = 0; i < plyfile->nelems; i++)
+    if (equal_strings (element, plyfile->elems[i]->name))
+      return (plyfile->elems[i]);
+
+  return (NULL);
+}
+
+
+/******************************************************************************
+Find a property in the list of properties of a given element.
+
+Entry:
+  elem      - pointer to element in which we want to find the property
+  prop_name - name of property to find
+
+Exit:
+  index - index to position in list
+  returns a pointer to the property, or NULL if not found
+******************************************************************************/
+
+inline PlyProperty *find_property(PlyElement *elem, const char *prop_name, int *index)
+{
+  int i;
+
+  for (i = 0; i < elem->nprops; i++)
+    if (equal_strings (prop_name, elem->props[i]->name)) {
+      *index = i;
+      return (elem->props[i]);
+    }
+
+  *index = -1;
+  return (NULL);
+}
+
+
+/******************************************************************************
+Read an element from an ascii file.
+
+Entry:
+  plyfile  - file identifier
+  elem_ptr - pointer to element
+******************************************************************************/
+
+inline void ascii_get_element(PlyFile *plyfile, char *elem_ptr)
+{
+	int ply_type_size[] = {
+	  0, 1, 2, 4, 1, 2, 4, 4, 8
+	};
+
+  int j,k;
+  PlyElement *elem;
+  PlyProperty *prop;
+  char **words;
+  int nwords;
+  int which_word;
+  char *elem_data,*item=NULL;
+  char *item_ptr;
+  int item_size;
+  int int_val;
+  unsigned int uint_val;
+  double double_val;
+  int list_count;
+  int store_it;
+  char **store_array;
+  char *orig_line;
+  char *other_data=NULL;
+  int other_flag;
+
+  /* the kind of element we're reading currently */
+  elem = plyfile->which_elem;
+
+  /* do we need to setup for other_props? */
+
+  if (elem->other_offset != NO_OTHER_PROPS) {
+    char **ptr;
+    other_flag = 1;
+    /* make room for other_props */
+    other_data = (char *) myalloc (elem->other_size);
+    /* store pointer in user's structure to the other_props */
+    ptr = (char **) (elem_ptr + elem->other_offset);
+    *ptr = other_data;
+  }
+  else
+    other_flag = 0;
+
+  /* read in the element */
+
+  words = get_words (plyfile->fp, &nwords, &orig_line);
+  if (words == NULL) {
+    fprintf (stderr, "ply_get_element: unexpected end of file\n");
+    exit (-1);
+  }
+
+  which_word = 0;
+
+  for (j = 0; j < elem->nprops; j++) {
+
+    prop = elem->props[j];
+    store_it = (elem->store_prop[j] | other_flag);
+
+    /* store either in the user's structure or in other_props */
+  //  if (elem->store_prop[j])
+      elem_data = elem_ptr;
+    //else
+      //elem_data = other_data;
+
+    if (prop->is_list) {       /* a list */
+
+      /* get and store the number of items in the list */
+      get_ascii_item (words[which_word++], prop->count_external,
+                      &int_val, &uint_val, &double_val);
+      if (store_it) {
+        item = elem_data + prop->count_offset;
+        store_item(item, prop->count_internal, int_val, uint_val, double_val);
+      }
+
+      /* allocate space for an array of items and store a ptr to the array */
+      list_count = int_val;
+      item_size = ply_type_size[prop->internal_type];
+      store_array = (char **) (elem_data + prop->offset);
+
+      if (list_count == 0) {
+        if (store_it)
+          *store_array = NULL;
+      }
+      else {
+        if (store_it) {
+          item_ptr = (char *) myalloc (sizeof (char) * item_size * list_count);
+           
+          item = item_ptr;
+          *store_array = item_ptr;
+        }
+
+        /* read items and store them into the array */
+        for (k = 0; k < list_count; k++) {
+          get_ascii_item (words[which_word++], prop->external_type,
+                          &int_val, &uint_val, &double_val);
+          if (store_it) {
+            store_item (item, prop->internal_type,
+                        int_val, uint_val, double_val);
+            item += item_size;
+          }
+        }
+      }
+
+    }
+    else {                     /* not a list */
+      get_ascii_item (words[which_word++], prop->external_type,
+                      &int_val, &uint_val, &double_val);
+      if (store_it) {
+        item = elem_data + prop->offset;
+        store_item (item, prop->internal_type, int_val, uint_val, double_val);
+      }
+    }
+
+  }
+
+  free (words);
+}
+
+
+/******************************************************************************
+Read an element from a binary file.
+
+Entry:
+  plyfile  - file identifier
+  elem_ptr - pointer to an element
+******************************************************************************/
+
+inline void binary_get_element(PlyFile *plyfile, char *elem_ptr, int *native_binary_type)
+{
+  int j,k;
+  PlyElement *elem;
+  PlyProperty *prop;
+  FILE *fp = plyfile->fp;
+  char *elem_data,*item=NULL;
+  char *item_ptr;
+  int item_size;
+  int int_val;
+  unsigned int uint_val;
+  double double_val;
+  int list_count;
+  int store_it;
+  char **store_array;
+  char *other_data=NULL;
+  int other_flag;
+	
+	int ply_type_size[] = {
+	  0, 1, 2, 4, 1, 2, 4, 4, 8
+	};
+
+
+  /* the kind of element we're reading currently */
+  elem = plyfile->which_elem;
+
+  /* do we need to setup for other_props? */
+
+  if (elem->other_offset != NO_OTHER_PROPS) {
+    char **ptr;
+    other_flag = 1;
+    /* make room for other_props */
+    other_data = (char *) myalloc (elem->other_size);
+    /* store pointer in user's structure to the other_props */
+    ptr = (char **) (elem_ptr + elem->other_offset);
+    *ptr = other_data;
+  }
+  else
+    other_flag = 0;
+
+  /* read in a number of elements */
+
+  for (j = 0; j < elem->nprops; j++) {
+
+    prop = elem->props[j];
+    store_it = (elem->store_prop[j] | other_flag);
+
+    /* store either in the user's structure or in other_props */
+//    if (elem->store_prop[j])
+      elem_data = elem_ptr;
+//    else
+//      elem_data = other_data;
+
+    if (prop->is_list) {       /* a list */
+
+      /* get and store the number of items in the list */
+      get_binary_item (fp, plyfile->file_type, prop->count_external,
+                      &int_val, &uint_val, &double_val, native_binary_type);
+      if (store_it) {
+        item = elem_data + prop->count_offset;
+        store_item(item, prop->count_internal, int_val, uint_val, double_val);
+      }
+
+      /* allocate space for an array of items and store a ptr to the array */
+      list_count = int_val;
+       
+      item_size = ply_type_size[prop->internal_type];
+      store_array = (char **) (elem_data + prop->offset);
+      if (list_count == 0) {
+        if (store_it)
+          *store_array = NULL;
+      }
+      else {
+        if (store_it) {
+          item_ptr = (char *) myalloc (sizeof (char) * item_size * list_count);
+           
+          item = item_ptr;
+          *store_array = item_ptr;
+        }
+
+        // read items and store them into the array  
+        for (k = 0; k < list_count; k++) {
+          get_binary_item (fp, plyfile->file_type, prop->external_type,
+                          &int_val, &uint_val, &double_val, native_binary_type);
+          if (store_it) {
+             store_item (item, prop->internal_type,
+                       int_val, uint_val, double_val);
+            item += item_size;
+          }
+        }
+        
+         
+        
+      }
+
+    }
+    else {                     /* not a list */
+      get_binary_item (fp, plyfile->file_type, prop->external_type,
+                      &int_val, &uint_val, &double_val, native_binary_type);
+      if (store_it) {
+        item = elem_data + prop->offset;
+        store_item (item, prop->internal_type, int_val, uint_val, double_val);
+      }
+    }
+
+  }
+}
+
+
+/******************************************************************************
+Write to a file the word that represents a PLY data type.
+
+Entry:
+  fp   - file pointer
+  code - code for type
+******************************************************************************/
+
+inline void write_scalar_type (FILE *fp, int code)
+{
+  /* make sure this is a valid code */
+
+  if (code <= PLY_START_TYPE || code >= PLY_END_TYPE) {
+    fprintf (stderr, "write_scalar_type: bad data code = %d\n", code);
+    exit (-1);
+  }
+
+  /* write the code to a file */
+	const char *type_names[] = {
+	"invalid",
+	"char", "short", "int",
+	"uchar", "ushort", "uint",
+	"float", "double",
+	};
+
+
+  fprintf (fp, "%s", type_names[code]);
+}
+
+/******************************************************************************
+  Reverse the order in an array of bytes.  This is the conversion from big
+  endian to little endian and vice versa
+
+Entry:
+  bytes     - array of bytes to reverse (in place)
+  num_bytes - number of bytes in array
+******************************************************************************/
+
+inline void swap_bytes(char *bytes, int num_bytes)
+{
+    int i;
+    char temp;
+    
+    for (i=0; i < num_bytes/2; i++)
+    {
+	temp = bytes[i];
+	bytes[i] = bytes[(num_bytes-1)-i];
+	bytes[(num_bytes-1)-i] = temp;
+    }
+}
+
+/******************************************************************************
+  Find out if this machine is big endian or little endian
+
+  Exit:
+    set global variable, native_binary_type =
+                              either PLY_BINARY_BE or PLY_BINARY_LE
+
+******************************************************************************/
+
+inline void get_native_binary_type(int *native_binary_type)
+{
+    typedef union
+	{
+	      int  int_value;
+	      char byte_values[sizeof(int)];
+	} endian_test_type;
+
+
+	endian_test_type test;
+     
+	test.int_value = 0;
+    test.int_value = 1;
+    if (test.byte_values[0] == 1)
+       *native_binary_type = PLY_BINARY_LE;
+    else if (test.byte_values[sizeof(int)-1] == 1)
+       *native_binary_type = PLY_BINARY_BE;
+    else
+    {
+	fprintf(stderr, "ply: Couldn't determine machine endianness.\n");
+	fprintf(stderr, "ply: Exiting...\n");
+	exit(1);
+    }
+}
+
+inline int get_native_binary_type2()
+{
+	typedef union
+	{
+	      int  int_value;
+	      char byte_values[sizeof(int)];
+	} endian_test_type;
+
+
+    endian_test_type test;
+
+    test.int_value = 0;
+    test.int_value = 1;
+    if (test.byte_values[0] == 1)
+       return PLY_BINARY_LE;
+    else if (test.byte_values[sizeof(int)-1] == 1)
+       return PLY_BINARY_BE;
+    else
+    {
+	fprintf(stderr, "ply: Couldn't determine machine endianness.\n");
+	fprintf(stderr, "ply: Exiting...\n");
+	exit(1);
+    }
+}
+
+/******************************************************************************
+  Verify that all the native types are the sizes we need
+
+
+******************************************************************************/
+
+inline int check_types()
+{
+	int ply_type_size[] = {
+	  0, 1, 2, 4, 1, 2, 4, 4, 8
+	};
+
+    if ((ply_type_size[PLY_CHAR] != sizeof(char)) ||
+	(ply_type_size[PLY_SHORT] != sizeof(short)) ||	
+	(ply_type_size[PLY_INT] != sizeof(int)) ||	
+	(ply_type_size[PLY_UCHAR] != sizeof(unsigned char)) ||	
+	(ply_type_size[PLY_USHORT] != sizeof(unsigned short)) ||	
+	(ply_type_size[PLY_UINT] != sizeof(unsigned int)) ||	
+	(ply_type_size[PLY_FLOAT] != sizeof(float)) ||	
+	(ply_type_size[PLY_DOUBLE] != sizeof(double)))
+    {
+	fprintf(stderr, "ply: Type sizes do not match built-in types\n");
+	fprintf(stderr, "ply: Exiting...\n");
+	exit(1);
+    }
+
+	return 1;    
+}
+
+/******************************************************************************
+Get a text line from a file and break it up into words.
+
+IMPORTANT: The calling routine call "free" on the returned pointer once
+finished with it.
+
+Entry:
+  fp - file to read from
+
+Exit:
+  nwords    - number of words returned
+  orig_line - the original line of characters
+  returns a list of words from the line, or NULL if end-of-file
+******************************************************************************/
+
+inline char **get_words(FILE *fp, int *nwords, char **orig_line)
+{
+  #define BIG_STRING 4096
+  char str[BIG_STRING];
+  char str_copy[BIG_STRING];
+  char **words;
+  int max_words = 10;
+  int num_words = 0;
+  char *ptr,*ptr2;
+  char *result;
+  
+  fpos_t pos; //keep track of file pointer
+  int nbytes;
+  int nonUNIX;
+  nonUNIX=0;
+  nbytes=0;
+  fgetpos(fp, &pos);
+
+  words = (char **) myalloc (sizeof (char *) * max_words);
+
+  /* read in a line */
+  result = fgets (str, BIG_STRING, fp);
+  if (result == NULL) {
+    *nwords = 0;
+    *orig_line = NULL;
+    return (NULL);
+  }
+
+  /* convert line-feed and tabs into spaces */
+  /* (this guarantees that there will be a space before the */
+  /*  null character at the end of the string) */
+
+  str[BIG_STRING-2] = ' ';
+  str[BIG_STRING-1] = '\0';
+
+  for (ptr = str, ptr2 = str_copy; *ptr != '\0'; ptr++, ptr2++) {
+    *ptr2 = *ptr;
+    nbytes++;
+    if (*ptr == '\t') {
+      *ptr = ' ';
+      *ptr2 = ' ';
+    }
+    else if (*ptr == '\n') {
+      *ptr = ' '; //has to have a space, to be caught later when grouping words
+      *ptr2 = '\0';
+      break;
+    }
+    else if (*ptr == '\r') 
+    {	  //MAC line break
+      nonUNIX=1;
+      if(*(ptr+1)=='\n')		  //actuall PC line break
+      {	
+      	nbytes++;
+      }
+       
+     *ptr = ' '; 
+     
+     *(ptr+1) = '\0';	//when reading mac, best end string here
+     *ptr2 = '\0'; 		//note a pc \r is followed by \n
+      
+      break;
+    }
+  }
+
+
+  /*check to see if a PC or MAC header was detected instead of UNIX*/
+  if(nonUNIX==1)
+  {
+  	fsetpos(fp, &pos);
+  	fseek(fp, nbytes, SEEK_CUR);	
+  }
+
+  /* find the words in the line */
+
+  ptr = str;
+  while (*ptr != '\0') {
+
+    /* jump over leading spaces */
+    while (*ptr == ' ')
+      ptr++;
+
+    /* break if we reach the end */
+    if (*ptr == '\0')
+      break;
+
+    /* save pointer to beginning of word */
+    if (num_words >= max_words) {
+      max_words += 10;
+      words = (char **) realloc (words, sizeof (char *) * max_words);
+    }
+    words[num_words++] = ptr;
+
+    /* jump over non-spaces */
+    while (*ptr != ' ')
+      ptr++;
+
+    /* place a null character here to mark the end of the word */
+    *ptr++ = '\0';
+  }
+
+  /* return the list of words */
+  *nwords = num_words;
+  *orig_line = str_copy;
+  return (words);
+}
+
+/*
+char **get_words(FILE *fp, int *nwords, char **orig_line)
+{
+#define BIG_STRING 4096
+  static char str[BIG_STRING];
+  static char str_copy[BIG_STRING];
+  char **words;
+  int max_words = 10;
+  int num_words = 0;
+  char *ptr,*ptr2;
+  char *result;
+
+  words = (char **) myalloc (sizeof (char *) * max_words);
+
+  // read in a line  
+  result = fgets (str, BIG_STRING, fp);
+  if (result == NULL) {
+    *nwords = 0;
+    *orig_line = NULL;
+    return (NULL);
+  }
+
+  // convert line-feed and tabs into spaces  
+  // (this guarantees that there will be a space before the  
+  //  null character at the end of the string)  
+
+  str[BIG_STRING-2] = ' ';
+  str[BIG_STRING-1] = '\0';
+
+  for (ptr = str, ptr2 = str_copy; *ptr != '\0'; ptr++, ptr2++) {
+    *ptr2 = *ptr;
+    if (*ptr == '\t') {
+      *ptr = ' ';
+      *ptr2 = ' ';
+    }
+    else if (*ptr == '\n') {
+      *ptr = ' ';
+      *ptr2 = '\0';
+      break;
+    }
+    else if (*ptr == '\r') {
+      *ptr = '\0';
+      *ptr2 = '\0'; //note don't break yet, on a pc \r is followed by \n
+    }
+  }
+
+  // find the words in the line  
+
+  ptr = str;
+  while (*ptr != '\0') {
+
+    // jump over leading spaces  
+    while (*ptr == ' ')
+      ptr++;
+
+    // break if we reach the end  
+    if (*ptr == '\0')
+      break;
+
+    // save pointer to beginning of word  
+    if (num_words >= max_words) {
+      max_words += 10;
+      words = (char **) realloc (words, sizeof (char *) * max_words);
+    }
+    words[num_words++] = ptr;
+
+    // jump over non-spaces  
+    while (*ptr != ' ')
+      ptr++;
+
+    // place a null character here to mark the end of the word  
+    *ptr++ = '\0';
+  }
+
+  // return the list of words  
+  *nwords = num_words;
+  *orig_line = str_copy;
+  return (words);
+}*/
+
+/******************************************************************************
+Return the value of an item, given a pointer to it and its type.
+
+Entry:
+  item - pointer to item
+  type - data type that "item" points to
+
+Exit:
+  returns a double-precision float that contains the value of the item
+******************************************************************************/
+
+inline double get_item_value(char *item, int type)
+{
+  unsigned char *puchar;
+  char *pchar;
+  short int *pshort;
+  unsigned short int *pushort;
+  int *pint;
+  unsigned int *puint;
+  float *pfloat;
+  double *pdouble;
+  int int_value;
+  unsigned int uint_value;
+  double double_value;
+
+  switch (type) {
+    case PLY_CHAR:
+      pchar = (char *) item;
+      int_value = *pchar;
+      return ((double) int_value);
+    case PLY_UCHAR:
+      puchar = (unsigned char *) item;
+      int_value = *puchar;
+      return ((double) int_value);
+    case PLY_SHORT:
+      pshort = (short int *) item;
+      int_value = *pshort;
+      return ((double) int_value);
+    case PLY_USHORT:
+      pushort = (unsigned short int *) item;
+      int_value = *pushort;
+      return ((double) int_value);
+    case PLY_INT:
+      pint = (int *) item;
+      int_value = *pint;
+      return ((double) int_value);
+    case PLY_UINT:
+      puint = (unsigned int *) item;
+      uint_value = *puint;
+      return ((double) uint_value);
+    case PLY_FLOAT:
+      pfloat = (float *) item;
+      double_value = *pfloat;
+      return (double_value);
+    case PLY_DOUBLE:
+      pdouble = (double *) item;
+      double_value = *pdouble;
+      return (double_value);
+    default:
+      fprintf (stderr, "get_item_value: bad type = %d\n", type);
+      exit (-1);
+  }
+}
+
+
+/******************************************************************************
+Write out an item to a file as raw binary bytes.
+
+Entry:
+  fp         - file to write to
+  int_val    - integer version of item
+  uint_val   - unsigned integer version of item
+  double_val - double-precision float version of item
+  type       - data type to write out
+******************************************************************************/
+
+inline void write_binary_item(
+  FILE *fp,
+  int file_type,
+  int int_val,
+  unsigned int uint_val,
+  double double_val,
+  int type,
+  int *native_binary_type
+)
+{
+  unsigned char uchar_val;
+  char char_val;
+  unsigned short ushort_val;
+  short short_val;
+  float float_val;
+  void  *value;
+  
+  switch (type) {
+    case PLY_CHAR:
+      char_val = int_val;
+      value = &char_val;
+      break;
+    case PLY_SHORT:
+      short_val = int_val;
+      value = &short_val;
+      break;
+    case PLY_INT:
+      value = &int_val;
+      break;
+    case PLY_UCHAR:
+      uchar_val = uint_val;
+      value = &uchar_val;
+      break;
+    case PLY_USHORT:
+      ushort_val = uint_val;
+      value = &ushort_val;
+      break;
+    case PLY_UINT:
+      value = &uint_val;
+      break;
+    case PLY_FLOAT:
+      float_val = double_val;
+      value = &float_val;
+      break;
+    case PLY_DOUBLE:
+      value = &double_val;
+      break;
+    default:
+      fprintf (stderr, "write_binary_item: bad type = %d\n", type);
+      exit (-1);
+  }
+	int ply_type_size[] = {
+	  0, 1, 2, 4, 1, 2, 4, 4, 8
+	};
+
+  if ((file_type != *native_binary_type) && (ply_type_size[type] > 1))
+     swap_bytes((char *)value, ply_type_size[type]);
+  
+  if (fwrite (value, ply_type_size[type], 1, fp) != 1)
+  {
+      fprintf(stderr, "PLY ERROR: fwrite() failed -- aborting.\n");
+      exit(1);
+  }
+}
+
+
+/******************************************************************************
+Write out an item to a file as ascii characters.
+
+Entry:
+  fp         - file to write to
+  int_val    - integer version of item
+  uint_val   - unsigned integer version of item
+  double_val - double-precision float version of item
+  type       - data type to write out
+******************************************************************************/
+
+inline void write_ascii_item(
+  FILE *fp,
+  int int_val,
+  unsigned int uint_val,
+  double double_val,
+  int type
+)
+{
+  switch (type) {
+    case PLY_CHAR:
+    case PLY_SHORT:
+    case PLY_INT:
+      if (fprintf (fp, "%d ", int_val) <= 0)
+      {
+	  fprintf(stderr, "PLY ERROR: fprintf() failed -- aborting.\n");
+	  exit(1);
+      }
+      break;
+    case PLY_UCHAR:
+    case PLY_USHORT:
+    case PLY_UINT:
+      if (fprintf (fp, "%u ", uint_val) <= 0)
+      {
+	  fprintf(stderr, "PLY ERROR: fprintf() failed -- aborting.\n");
+	  exit(1);
+      }
+      break;
+    case PLY_FLOAT:
+    case PLY_DOUBLE:
+      if (fprintf (fp, "%g ", double_val) <= 0)
+      {
+	  fprintf(stderr, "PLY ERROR: fprintf() failed -- aborting.\n");
+	  exit(1);
+      }
+      break;
+    default:
+      fprintf (stderr, "write_ascii_item: bad type = %d\n", type);
+      exit (-1);
+  }
+}
+
+
+/******************************************************************************
+Write out an item to a file as ascii characters.
+
+Entry:
+  fp   - file to write to
+  item - pointer to item to write
+  type - data type that "item" points to
+
+Exit:
+  returns a double-precision float that contains the value of the written item
+******************************************************************************/
+
+inline double old_write_ascii_item(FILE *fp, char *item, int type)
+{
+  unsigned char *puchar;
+  char *pchar;
+  short int *pshort;
+  unsigned short int *pushort;
+  int *pint;
+  unsigned int *puint;
+  float *pfloat;
+  double *pdouble;
+  int int_value;
+  unsigned int uint_value;
+  double double_value;
+
+  switch (type) {
+    case PLY_CHAR:
+      pchar = (char *) item;
+      int_value = *pchar;
+      fprintf (fp, "%d ", int_value);
+      return ((double) int_value);
+    case PLY_UCHAR:
+      puchar = (unsigned char *) item;
+      int_value = *puchar;
+      fprintf (fp, "%d ", int_value);
+      return ((double) int_value);
+    case PLY_SHORT:
+      pshort = (short int *) item;
+      int_value = *pshort;
+      fprintf (fp, "%d ", int_value);
+      return ((double) int_value);
+    case PLY_USHORT:
+      pushort = (unsigned short int *) item;
+      int_value = *pushort;
+      fprintf (fp, "%d ", int_value);
+      return ((double) int_value);
+    case PLY_INT:
+      pint = (int *) item;
+      int_value = *pint;
+      fprintf (fp, "%d ", int_value);
+      return ((double) int_value);
+    case PLY_UINT:
+      puint = (unsigned int *) item;
+      uint_value = *puint;
+      fprintf (fp, "%u ", uint_value);
+      return ((double) uint_value);
+    case PLY_FLOAT:
+      pfloat = (float *) item;
+      double_value = *pfloat;
+      fprintf (fp, "%g ", double_value);
+      return (double_value);
+    case PLY_DOUBLE:
+      pdouble = (double *) item;
+      double_value = *pdouble;
+      fprintf (fp, "%g ", double_value);
+      return (double_value);
+    default:
+      fprintf (stderr, "old_write_ascii_item: bad type = %d\n", type);
+      exit (-1);
+  }
+}
+
+
+/******************************************************************************
+Get the value of an item that is in memory, and place the result
+into an integer, an unsigned integer and a double.
+
+Entry:
+  ptr  - pointer to the item
+  type - data type supposedly in the item
+
+Exit:
+  int_val    - integer value
+  uint_val   - unsigned integer value
+  double_val - double-precision floating point value
+******************************************************************************/
+
+inline void get_stored_item(
+  void *ptr,
+  int type,
+  int *int_val,
+  unsigned int *uint_val,
+  double *double_val
+)
+{
+  switch (type) {
+    case PLY_CHAR:
+      *int_val = *((char *) ptr);
+      *uint_val = *int_val;
+      *double_val = *int_val;
+      break;
+    case PLY_UCHAR:
+      *uint_val = *((unsigned char *) ptr);
+      *int_val = *uint_val;
+      *double_val = *uint_val;
+      break;
+    case PLY_SHORT:
+      *int_val = *((short int *) ptr);
+      *uint_val = *int_val;
+      *double_val = *int_val;
+      break;
+    case PLY_USHORT:
+      *uint_val = *((unsigned short int *) ptr);
+      *int_val = *uint_val;
+      *double_val = *uint_val;
+      break;
+    case PLY_INT:
+      *int_val = *((int *) ptr);
+      *uint_val = *int_val;
+      *double_val = *int_val;
+      break;
+    case PLY_UINT:
+      *uint_val = *((unsigned int *) ptr);
+      *int_val = *uint_val;
+      *double_val = *uint_val;
+      break;
+    case PLY_FLOAT:
+      *double_val = *((float *) ptr);
+      *int_val = (int) *double_val;
+      *uint_val = (unsigned int) *double_val;
+      break;
+    case PLY_DOUBLE:
+      *double_val = *((double *) ptr);
+      *int_val = (int) *double_val;
+      *uint_val = (unsigned int) *double_val;
+      break;
+    default:
+      fprintf (stderr, "get_stored_item: bad type = %d\n", type);
+      exit (-1);
+  }
+}
+
+
+/******************************************************************************
+Get the value of an item from a binary file, and place the result
+into an integer, an unsigned integer and a double.
+
+Entry:
+  fp   - file to get item from
+  type - data type supposedly in the word
+
+Exit:
+  int_val    - integer value
+  uint_val   - unsigned integer value
+  double_val - double-precision floating point value
+******************************************************************************/
+
+inline void get_binary_item(
+  FILE *fp,
+  int file_type,
+  int type,
+  int *int_val,
+  unsigned int *uint_val,
+  double *double_val,
+  int *native_binary_type
+)
+{
+  char c[8];
+  void *ptr;
+
+  ptr = (void *) c;
+	int ply_type_size[] = {
+	  0, 1, 2, 4, 1, 2, 4, 4, 8
+	};
+
+  if (fread (ptr, ply_type_size[type], 1, fp) != 1)
+  {
+      fprintf(stderr, "PLY ERROR: fread() failed -- aborting.\n");
+      exit(1);
+  }
+  
+
+  if ((file_type != *native_binary_type) && (ply_type_size[type] > 1))
+     swap_bytes((char *)ptr, ply_type_size[type]);
+
+  switch (type) {
+    case PLY_CHAR:
+      *int_val = *((char *) ptr);
+      *uint_val = *int_val;
+      *double_val = *int_val;
+      break;
+    case PLY_UCHAR:
+      *uint_val = *((unsigned char *) ptr);
+      *int_val = *uint_val;
+      *double_val = *uint_val;
+      break;
+    case PLY_SHORT:
+      *int_val = *((short int *) ptr);
+      *uint_val = *int_val;
+      *double_val = *int_val;
+      break;
+    case PLY_USHORT:
+      *uint_val = *((unsigned short int *) ptr);
+      *int_val = *uint_val;
+      *double_val = *uint_val;
+      break;
+    case PLY_INT:
+      *int_val = *((int *) ptr);
+      *uint_val = *int_val;
+      *double_val = *int_val;
+      break;
+    case PLY_UINT:
+      *uint_val = *((unsigned int *) ptr);
+      *int_val = *uint_val;
+      *double_val = *uint_val;
+      break;
+    case PLY_FLOAT:
+      *double_val = *((float *) ptr);
+      *int_val = (int) *double_val;
+      *uint_val = (unsigned int) *double_val;
+      break;
+    case PLY_DOUBLE:
+      *double_val = *((double *) ptr);
+      *int_val = (int) *double_val;
+      *uint_val = (unsigned int) *double_val;
+      break;
+    default:
+      fprintf (stderr, "get_binary_item: bad type = %d\n", type);
+      exit (-1);
+  }
+}
+
+
+/******************************************************************************
+Extract the value of an item from an ascii word, and place the result
+into an integer, an unsigned integer and a double.
+
+Entry:
+  word - word to extract value from
+  type - data type supposedly in the word
+
+Exit:
+  int_val    - integer value
+  uint_val   - unsigned integer value
+  double_val - double-precision floating point value
+******************************************************************************/
+
+inline void get_ascii_item(
+  char *word,
+  int type,
+  int *int_val,
+  unsigned int *uint_val,
+  double *double_val
+)
+{
+  switch (type) {
+    case PLY_CHAR:
+    case PLY_UCHAR:
+    case PLY_SHORT:
+    case PLY_USHORT:
+    case PLY_INT:
+      *int_val = atoi (word);
+      *uint_val = (unsigned int) *int_val;
+      *double_val = (double) *int_val;
+      break;
+
+    case PLY_UINT:
+      *uint_val = strtol (word, (char **) NULL, 10);
+      *int_val = (int) *uint_val;
+      *double_val = (double) *uint_val;
+      break;
+
+    case PLY_FLOAT:
+    case PLY_DOUBLE:
+      *double_val = atof (word);
+      *int_val = (int) *double_val;
+      *uint_val = (unsigned int) *double_val;
+      break;
+
+    default:
+      fprintf (stderr, "get_ascii_item: bad type = %d\n", type);
+      exit (-1);
+  }
+}
+
+
+/******************************************************************************
+Store a value into a place being pointed to, guided by a data type.
+
+Entry:
+  item       - place to store value
+  type       - data type
+  int_val    - integer version of value
+  uint_val   - unsigned integer version of value
+  double_val - double version of value
+
+Exit:
+  item - pointer to stored value
+******************************************************************************/
+
+inline void store_item (
+  char *item,
+  int type,
+  int int_val,
+  unsigned int uint_val,
+  double double_val
+)
+{
+  unsigned char *puchar;
+  short int *pshort;
+  unsigned short int *pushort;
+  int *pint;
+  unsigned int *puint;
+  float *pfloat;
+  double *pdouble;
+
+  switch (type) {
+    case PLY_CHAR:
+      *item = int_val;
+      break;
+    case PLY_UCHAR:
+      puchar = (unsigned char *) item;
+      *puchar = uint_val;
+      break;
+    case PLY_SHORT:
+      pshort = (short *) item;
+      *pshort = int_val;
+      break;
+    case PLY_USHORT:
+      pushort = (unsigned short *) item;
+      *pushort = uint_val;
+      break;
+    case PLY_INT:
+      pint = (int *) item;
+      *pint = int_val;
+      break;
+    case PLY_UINT:
+      puint = (unsigned int *) item;
+      *puint = uint_val;
+      break;
+    case PLY_FLOAT:
+      pfloat = (float *) item;
+      *pfloat = double_val;
+      break;
+    case PLY_DOUBLE:
+      pdouble = (double *) item;
+      *pdouble = double_val;
+      break;
+    default:
+      fprintf (stderr, "store_item: bad type = %d\n", type);
+      exit (-1);
+  }
+}
+
+
+/******************************************************************************
+Add an element to a PLY file descriptor.
+
+Entry:
+  plyfile - PLY file descriptor
+  words   - list of words describing the element
+  nwords  - number of words in the list
+******************************************************************************/
+
+inline void add_element (PlyFile *plyfile, char **words)
+{
+  PlyElement *elem;
+
+  /* create the new element */
+  elem = (PlyElement *) myalloc (sizeof (PlyElement));
+  elem->name = strdup (words[1]);
+  elem->num = atoi (words[2]);
+  elem->nprops = 0;
+
+  /* make room for new element in the object's list of elements */
+  if (plyfile->nelems == 0)
+    plyfile->elems = (PlyElement **) myalloc (sizeof (PlyElement *));
+  else
+    plyfile->elems = (PlyElement **) realloc (plyfile->elems,
+                     sizeof (PlyElement *) * (plyfile->nelems + 1));
+
+  /* add the new element to the object's list */
+  plyfile->elems[plyfile->nelems] = elem;
+  plyfile->nelems++;
+}
+
+
+/******************************************************************************
+Return the type of a property, given the name of the property.
+
+Entry:
+  name - name of property type
+
+Exit:
+  returns integer code for property, or 0 if not found
+******************************************************************************/
+
+inline int get_prop_type(char *type_name)
+{
+  int i;
+	const char *type_names[] = {
+	"invalid",
+	"char", "short", "int",
+	"uchar", "ushort", "uint",
+	"float", "double",
+	};
+	
+	const char *alt_type_names[] = { 
+	"invalid",
+	"int8", "int16", "int32", "uint8", "uint16", "uint32", "float32", "float64",
+	};
+
+
+  for (i = PLY_START_TYPE + 1; i < PLY_END_TYPE; i++)
+    if (equal_strings (type_name, type_names[i]))
+      return (i);
+
+  for (i = PLY_START_TYPE + 1; i < PLY_END_TYPE; i++)
+    if (equal_strings (type_name, alt_type_names[i]))
+      return (i);
+
+  /* if we get here, we didn't find the type */
+  return (0);
+}
+
+
+/******************************************************************************
+Add a property to a PLY file descriptor.
+
+Entry:
+  plyfile - PLY file descriptor
+  words   - list of words describing the property
+  nwords  - number of words in the list
+******************************************************************************/
+
+inline void add_property (PlyFile *plyfile, char **words)
+{
+  PlyProperty *prop;
+  PlyElement *elem;
+
+  /* create the new property */
+
+  prop = (PlyProperty *) myalloc (sizeof (PlyProperty));
+
+  if (equal_strings (words[1], "list")) {       /* is a list */
+    prop->count_external = get_prop_type (words[2]);
+    prop->external_type = get_prop_type (words[3]);
+    prop->name = strdup (words[4]);
+    prop->is_list = 1;
+  }
+  else {                                        /* not a list */
+    prop->external_type = get_prop_type (words[1]);
+    prop->name = strdup (words[2]);
+    prop->is_list = 0;
+  }
+
+  /* add this property to the list of properties of the current element */
+
+  elem = plyfile->elems[plyfile->nelems - 1];
+
+  if (elem->nprops == 0)
+    elem->props = (PlyProperty **) myalloc (sizeof (PlyProperty *));
+  else
+    elem->props = (PlyProperty **) realloc (elem->props,
+                  sizeof (PlyProperty *) * (elem->nprops + 1));
+
+  elem->props[elem->nprops] = prop;
+  elem->nprops++;
+}
+
+
+/******************************************************************************
+Add a comment to a PLY file descriptor.
+
+Entry:
+  plyfile - PLY file descriptor
+  line    - line containing comment
+******************************************************************************/
+
+inline void add_comment (PlyFile *plyfile, char *line)
+{
+  int i;
+
+  /* skip over "comment" and leading spaces and tabs */
+  i = 7;
+  while (line[i] == ' ' || line[i] == '\t')
+    i++;
+
+  ply_put_comment (plyfile, &line[i]);
+}
+
+
+/******************************************************************************
+Add a some object information to a PLY file descriptor.
+
+Entry:
+  plyfile - PLY file descriptor
+  line    - line containing text info
+******************************************************************************/
+
+inline void add_obj_info (PlyFile *plyfile, char *line)
+{
+  int i;
+
+  /* skip over "obj_info" and leading spaces and tabs */
+  i = 8;
+  while (line[i] == ' ' || line[i] == '\t')
+    i++;
+
+  ply_put_obj_info (plyfile, &line[i]);
+}
+
+
+/******************************************************************************
+Copy a property.
+******************************************************************************/
+
+inline void copy_property(PlyProperty *dest, PlyProperty *src)
+{
+  dest->name = strdup (src->name);
+  dest->external_type = src->external_type;
+  dest->internal_type = src->internal_type;
+  dest->offset = src->offset;
+
+  dest->is_list = src->is_list;
+  dest->count_external = src->count_external;
+  dest->count_internal = src->count_internal;
+  dest->count_offset = src->count_offset;
+}
+
+
+/******************************************************************************
+Allocate some memory.
+
+Entry:
+  size  - amount of memory requested (in bytes)
+  lnum  - line number from which memory was requested
+  fname - file name from which memory was requested
+******************************************************************************/
+
+inline char *my_alloc(int size, int lnum, const char *fe)
+{
+  char *ptr;
+
+  ptr = (char *) malloc (size);
+
+  if (ptr == 0) {
+    fprintf(stderr, "Memory allocation bombed on line %d in %s\n", lnum, fe);
+  }
+
+  return (ptr);
+}
+
+}
+}
+#endif

+ 0 - 1
include/igl/ply.h.REMOVED.git-id

@@ -1 +0,0 @@
-60f2b29de2e29d034e068eb5ae4bf9747a63232e