12 роки тому · 2dc872211e
--- a/documentation/active-set.tex
+++ b/documentation/active-set.tex
@@ -1,4 +1,4 @@
 
				-\documentclass{diary}
			
 
				+\documentclass[12pt]{diary}
			
 
				 \title{Active set solver for quadratic programming}
			
 
				 \author{Alec Jacobson}
			
 
				 \date{18 September 2013}
			
@@ -8,8 +8,8 @@
 
				 \newcommand{\RR}{\mat{R}}
			
 
				 \newcommand{\Aeq}{\mat{A}_\textrm{eq}}
			
 
				 \newcommand{\Aieq}{\mat{A}_\textrm{ieq}}
			
 
				-\newcommand{\Beq}{\vc{B}_\textrm{eq}}
			
 
				-\newcommand{\Bieq}{\vc{B}_\textrm{ieq}}
			
 
				+\newcommand{\beq}{\vc{b}_\textrm{eq}}
			
 
				+\newcommand{\bieq}{\vc{b}_\textrm{ieq}}
			
 
				 \newcommand{\lx}{\Bell}
			
 
				 \newcommand{\ux}{\vc{u}}
			
 
				 \newcommand{\lameq} {\lambda_\textrm{eq}}
			
@@ -17,17 +17,28 @@
 
				 \newcommand{\lamlu}  {\lambda_\textrm{lu}}
			
 
				 
			
 
				 \begin{document}
			
 
				+
			
 
				+\begin{pullout}
			
 
				+\footnotesize
			
 
				+\emph{Disclaimer: This document rewrites and paraphrases the ideas in
			
 
				+\url{http://www.math.uh.edu/~rohop/fall_06/Chapter3.pdf},
			
 
				+\url{http://www.math.uh.edu/~rohop/fall_06/Chapter2.pdf}, and
			
 
				+\url{http://www.cs.cornell.edu/courses/cs322/2007sp/notes/qr.pdf}. Mostly this
			
 
				+is to put everything in the same place and use notation compatible with the
			
 
				+libigl implementation. The ideas and descriptions, however, are not novel.}
			
 
				+\end{pullout}
			
 
				+
			
 
				 Quadratic programming problems (QPs) can be written in general as:
			
 
				 \begin{align}
			
 
				 \argmin \limits_\z &
			
 
				-  \z^\transpose \A \z + \z^\transpose \B + \text{ constant}\\
			
 
				-\text{subject to } & \Aieq \z ≤ \Bieq,
			
 
				+  \z^\transpose \A \z + \z^\transpose \b + \text{ constant}\\
			
 
				+\text{subject to } & \Aieq \z ≤ \bieq,
			
 
				 \end{align}
			
 
				 where $\z \in \R^n$ is a vector of unknowns,  $\A \in \R^{n \times n}$ is a (in
			
 
				-our case sparse) matrix of quadratic coefficients, $\B \in \R^n$ is a vector of
			
 
				+our case sparse) matrix of quadratic coefficients, $\b \in \R^n$ is a vector of
			
 
				 linear coefficients, $\Aieq \in \R^{m_\text{ieq} \times n}$ is a matrix (also
			
 
				-sparse) linear inequality coefficients and $\Bieq \in \R^{m_\text{ieq}}$ is a
			
 
				-vector of corresponding right-hand sides. Each row in $\Aieq \z ≤ \Bieq$
			
 
				+sparse) linear inequality coefficients and $\bieq \in \R^{m_\text{ieq}}$ is a
			
 
				+vector of corresponding right-hand sides. Each row in $\Aieq \z ≤ \bieq$
			
 
				 corresponds to a single linear inequality constraint.
			
 
				 
			
 
				 Though representable by the linear inequality constraints above---linear
			
@@ -35,16 +46,16 @@ Though representable by the linear inequality constraints above---linear
 
				 so often that we can write a more practical form
			
 
				 \begin{align}
			
 
				 \argmin \limits_\z &
			
 
				-  \z^\transpose \A \z + \z^\transpose \B + \text{ constant}\\
			
 
				-\text{subject to } & \z_\text{known} = \Y,\\
			
 
				-                   & \Aeq \z = \Beq,\\
			
 
				-                   & \Aieq \z ≤ \Bieq,\\
			
 
				+  \z^\transpose \A \z + \z^\transpose \b + \text{ constant}\\
			
 
				+\text{subject to } & \z_\text{known} = \y,\\
			
 
				+                   & \Aeq \z = \beq,\\
			
 
				+                   & \Aieq \z ≤ \bieq,\\
			
 
				                    & \z ≥ \lx,\\
			
 
				                    & \z ≤ \ux,
			
 
				 \end{align}
			
 
				 where $\z_\text{known} \in \R^{n_\text{known}}$ is a subvector of our unknowns $\z$ which
			
 
				-are known or fixed to obtain corresponding values $\Y \in \R^{n_\text{known}}$,
			
 
				-$\Aeq \in \R^{m_\text{eq} \times n}$ and $\Beq \in \R^{m_\text{eq} \times n}$
			
 
				+are known or fixed to obtain corresponding values $\y \in \R^{n_\text{known}}$,
			
 
				+$\Aeq \in \R^{m_\text{eq} \times n}$ and $\beq \in \R^{m_\text{eq} \times n}$
			
 
				 are linear \emph{equality} coefficients and right-hand sides respectively, and
			
 
				 $\lx, \ux \in \R^n$ are vectors of constant lower and upper bound constraints.
			
 
				 
			
@@ -59,13 +70,13 @@ set'' of constraints. So at any given iterations $i$ we might have a new
 
				 problem:
			
 
				 \begin{align}
			
 
				 \argmin \limits_\z &
			
 
				-  \z^\transpose \A \z + \z^\transpose \B + \text{ constant}\\
			
 
				-\text{subject to } & \z_\text{known}^i = \Y^i,\\
			
 
				-                   & \Aeq^i \z = \Beq^i,
			
 
				+  \z^\transpose \A \z + \z^\transpose \b + \text{ constant}\\
			
 
				+\text{subject to } & \z_\text{known}^i = \y^i,\\
			
 
				+                   & \Aeq^i \z = \beq^i,
			
 
				 \end{align}
			
 
				 where the active rows from 
			
 
				-$\lx ≤ \z ≤ \ux$ and $\Aieq \z ≤ \Bieq$  have been appended into
			
 
				-$\z_\text{known}^i = \Y^i$ and $\Aeq^i \z = \Beq^i$ respectively.
			
 
				+$\lx ≤ \z ≤ \ux$ and $\Aieq \z ≤ \bieq$  have been appended into
			
 
				+$\z_\text{known}^i = \y^i$ and $\Aeq^i \z = \beq^i$ respectively.
			
 
				 
			
 
				 This may be optimized by solving a sparse linear system, resulting in the
			
 
				 current solution $\z^i$. For equality constraint we can also find a
			
@@ -81,20 +92,20 @@ follows:
 
				 
			
 
				 \begin{lstlisting}[keywordstyle=,mathescape]
			
 
				 while not converged
			
 
				-  add to active set all rows where $\Aieq \z > \Bieq$, $\z < \lx$ or $\z > \ux$
			
 
				-    $\Aeq^i,\Beq^i \leftarrow \Aeq,\Beq + \text{active rows of} \Aieq,\Bieq$
			
 
				-    $\z_\text{known}^i,\Y^i \leftarrow \z_\text{known},\Y + \text{active indices and values of} \lx,\ux$
			
 
				+  add to active set all rows where $\Aieq \z > \bieq$, $\z < \lx$ or $\z > \ux$
			
 
				+    $\Aeq^i,\beq^i \leftarrow \Aeq,\beq + \text{active rows of} \Aieq,\bieq$
			
 
				+    $\z_\text{known}^i,\y^i \leftarrow \z_\text{known},\y + \text{active indices and values of} \lx,\ux$
			
 
				   solve problem treating active constraints as equality constraints $\rightarrow \z,\lamieq,\lamlu$
			
 
				   remove from active set all rows with $\lamieq < 0$ or $\lamlu < 0$
			
 
				 end
			
 
				 \end{lstlisting}
			
 
				 
			
 
				-The fixed values constraints of $\z_\text{known}^i = \Y^i$ may be enforced by
			
 
				-substituting $\z_\text{known}^i$ for $\Y^i$ in the energy directly during the
			
 
				+The fixed values constraints of $\z_\text{known}^i = \y^i$ may be enforced by
			
 
				+substituting $\z_\text{known}^i$ for $\y^i$ in the energy directly during the
			
 
				 solve.  Corresponding Lagrange multiplier values $\lambda_\text{known}^i$ can
			
 
				 be recovered after we've found the rest of $\z$.
			
 
				 
			
 
				-The linear equality constraints $\Aeq^i \z = \Beq^i$ are a little trickier. If
			
 
				+The linear equality constraints $\Aeq^i \z = \beq^i$ are a little trickier. If
			
 
				 the rows of 
			
 
				 $\Aeq^i \in \R^{(<m_\text{eq}+ m_\text{ieq}) \times n}$ are linearly
			
 
				 independent then it is straightforward how to build a Lagrangian which enforces
			
@@ -114,8 +125,8 @@ each constraint. This results in solving a system roughly of the form:
 
				 =
			
 
				 \left(
			
 
				 \begin{array}{l}
			
 
				--2\B\\
			
 
				--\Beq^i
			
 
				+-\onehalf\b\\
			
 
				+-\beq^i
			
 
				 \end{array}
			
 
				 \right)
			
 
				 \end{equation}
			
@@ -133,11 +144,11 @@ Let's also assume we have no fixed or known values. Then the linear system
 
				 above corresponds to the following optimization problem:
			
 
				 \begin{align}
			
 
				 \argmin \limits_\z &
			
 
				-  \z^\transpose \A \z + \z^\transpose \B + \text{ constant}\\
			
 
				-\text{subject to } & \Aeq \z = \Beq.
			
 
				+  \z^\transpose \A \z + \z^\transpose \b + \text{ constant}\\
			
 
				+\text{subject to } & \Aeq \z = \beq.
			
 
				 \end{align}
			
 
				 For the sake of cleaner notation, let $m = m_\text{eq}$ so that $\Aeq \in \R^{m
			
 
				-\times n}$ and $\Beq \in \R^m$.
			
 
				+\times n}$ and $\beq \in \R^m$.
			
 
				 
			
 
				 We can construct the null space of the constraints by computing a QR
			
 
				 decomposition of $\Aeq^\transpose$:
			
@@ -160,7 +171,7 @@ n-r}$, and $\R_1 \in \RR^{r \times m}$.
 
				 
			
 
				 Notice that 
			
 
				 \begin{align}
			
 
				-\Aeq \z &= \Beq \\
			
 
				+\Aeq \z &= \beq \\
			
 
				 \P \P^\transpose \Aeq \z &= \\
			
 
				 \P \left(\Aeq^\transpose \P\right)^\transpose \z &= \\
			
 
				 \P\left(\Q\RR\right)^\transpose \z &= \\
			
@@ -175,16 +186,25 @@ Notice that
 
				 \end{align}
			
 
				 Here we see that $\Q_1^\transpose \z$ affects this equality but
			
 
				 $\Q_2^\transpose \z$ does not. Thus
			
 
				-we say that $\Q_2$ forms a basis that spans the null space of $\Aeq$. That is, $\Aeq \Q_2 \w =
			
 
				+we say that $\Q_2$ forms a basis that spans the null space of $\Aeq$. That is,
			
 
				+$\Aeq \Q_2 \w_2 =
			
 
				 \vc{0},
			
 
				-\forall \w \in \R^{n-r}$. Let's write 
			
 
				+\forall \w_2 \in \R^{n-r}$. Let's write 
			
 
				 \begin{equation}
			
 
				 \z = \Q_1 \w_1 + \Q_2 \w_2.
			
 
				 \end{equation}
			
 
				 We're only interested in solutions $\z$ which satisfy our equality constraints
			
 
				-$\Aeq \z = \Beq$. If we plug in the above then we get
			
 
				-
			
 
				-
			
 
				+$\Aeq \z = \beq$. If we plug in the above then we get
			
 
				+\begin{align}
			
 
				+\P \Aeq \left(\Q_1 \w_1 + \Q_2 \w_2\right) &= \beq \\
			
 
				+\P \Aeq \Q_1 \w_1 &= \beq \\
			
 
				+\P \left(\RR^\transpose \quad
			
 
				+\mat{0}\right)\left(\begin{array}{c}\Q_1^\transpose\\
			
 
				+\Q_2^\transpose\end{array}\right) \Q_1 \w_1 &= \beq \\
			
 
				+\P \RR^\transpose \Q_1^\transpose \Q_1 \w_1 &= \beq \\
			
 
				+\P \RR^\transpose \w_1 &= \beq \\
			
 
				+\w_1 &= {\RR^\transpose}^{-1} \P^\transpose \beq.
			
 
				+\end{align}
			
 
				 % P RT Q1T z = Beq
			
 
				 % RT Q1T z = PT Beq
			
 
				 % Q1T z = RT \ PT Beq
			
@@ -192,36 +212,108 @@ $\Aeq \z = \Beq$. If we plug in the above then we get
 
				 % z = Q2 w2 + Q1 * w1 
			
 
				 % z = Q2 w2 + Q1 * RT \ P * Beq
			
 
				 % z = Q2 w2 + lambda0
			
 
				+So then
			
 
				+\begin{align}
			
 
				+z &= \Q_1 {\RR^\transpose}^{-1} \P^\transpose \beq  + \Q_2 \w_2 \\
			
 
				+z &= \Q_2 \w_2 + \overline{\w}_1\\
			
 
				+\end{align}
			
 
				+where $\overline{\w}_1$ is just collecting the known terms that do not depend on the
			
 
				+unknowns $\w_2$.
			
 
				+
			
 
				+Now, by construction, $\Q_2 \w_2 + \overline{\w}_1$ spans (all) possible solutions
			
 
				+for $\z$  that satisfy $\Aeq \z = \beq$
			
 
				+% Let:
			
 
				+% z = Q2 w2 + Q1 * w1 
			
 
				+% z = Q2 w2 + Q1 * RT \ P * Beq
			
 
				+% z = Q2 w2 + lambda0
			
 
				+% Q2 w2 + lambda0 spans all z such that Aeq z = Beq, we have essentially
			
 
				+% "factored out" the constraints. We can then simply make this substitution
			
 
				+% into our original optimization problem, leaving an \emph{unconstrained}
			
 
				+% quadratic energy optimization
			
 
				+\begin{align}
			
 
				+\argmin \limits_{\w_2} & (\Q_2 \w_2 + \overline{\w}_1)^\transpose \A (\Q_2 \w_2 + \overline{\w}_1)  +
			
 
				+(\Q_2 \w_2 + \overline{\w}_1)^\transpose \b + \textrm{ constant}\\
			
 
				+&\text{\emph{or equivalently}} \notag\\
			
 
				+\argmin \limits_{\w_2} & 
			
 
				+\w_2^\transpose \Q_2^\transpose \A \Q_2 + \w_2^\transpose \left(2\Q_2^\transpose \A
			
 
				+\Q_2 \overline{\w}_1 + \Q_2^\transpose \b\right) + \textrm{ constant}
			
 
				+\end{align}
			
 
				+which is solved with a linear system solve
			
 
				+\begin{equation}
			
 
				+\left(\Q_2^\transpose \A \Q_2\right) \w_2 = 
			
 
				+  -2\Q_2^\transpose \A \Q_2 \overline{\w}_1 + \Q_2^\transpose \b
			
 
				+\end{equation}
			
 
				+\todo{I've almost surely lost a sign or factor of 2 here.}
			
 
				+If $\A$ is semi-positive definite then $\Q_2^\transpose\A\Q_2$ will also be
			
 
				+semi-positive definite. Thus Cholesky factorization can be used. If $\A$ is
			
 
				+sparse and $\Aeq$ is sparse and a good column pivoting sparse QR library is
			
 
				+used then $\Q_2$ will be sparse and so will $\Q_2^\transpose\A\Q_2$.
			
 
				+
			
 
				+
			
 
				+After $\w_2$ is known we can compute the solution $\z$ as 
			
 
				+\begin{equation}
			
 
				+\z = \Q_2 \w_2 + \overline{\w}_1.
			
 
				+\end{equation}
			
 
				+
			
 
				+Recovering the Lagrange multipliers $\lameq \in \R^{m}$ corresponding to $\Aeq
			
 
				+\z = \b$ is a bit trickier. The equation is:
			
 
				+\begin{align}
			
 
				+\left(\A\quad \Aeq^\transpose\right)
			
 
				+\left(\begin{array}{c}
			
 
				+\z\\
			
 
				+\lameq
			
 
				+\end{array}\right) &= -\onehalf\b \\
			
 
				+\Aeq^\transpose \lameq &= - \A \z - \onehalf\b \\
			
 
				+\Q_1^\transpose \Aeq^\transpose \lameq &= - \Q_1^\transpose \A \z - \onehalf\Q_1^\transpose \b \\
			
 
				+% AT P = Q_1 R
			
 
				+% AT = Q_1 R PT
			
 
				+%
			
 
				+% Q1T AT
			
 
				+% Q1T Q_1 R PT
			
 
				+% R PT
			
 
				+& \textrm{\emph{Recall that $\Aeq^\transpose\P = \Q_1\RR$ so then $
			
 
				+\Q_1^\transpose \Aeq^\transpose = \RR \P^\transpose$}}\notag\\
			
 
				+\RR \P^\transpose \lameq &= - \Q_1^\transpose \A \z - \onehalf\Q_1^\transpose \b \\
			
 
				+\lameq &= \P
			
 
				+\RR^{-1}
			
 
				+\left(-\Q_1^\transpose \A \z - \onehalf\Q_1^\transpose \b\right).
			
 
				+\end{align}
			
 
				+
			
 
				+\alec{If $r<m$ then I think it is enough to define the permutation matrix as
			
 
				+rectangular $\P \in \R^{m \times r}$.}
			
 
				 
			
 
				-% Aeq z = Beq
			
 
				-% Aeq (Q2 w2 + Q1 w1) = Beq
			
 
				-% Aeq Q1 w1 = Beq
			
 
				+We assumes that there were now known or fixed values. If there are, special
			
 
				+care only needs to be taken to adjust $\Aeq \z = \beq$ to account for
			
 
				+substituting $\z_\text{known}$ with $\y$. 
			
 
				 
			
 
				+\hr
			
 
				+\clearpage
			
 
				 
			
 
				 The following table describes the translation between the entities described in
			
 
				 this document and those used in \texttt{igl/active\_set} and
			
 
				 \texttt{igl/min\_quad\_with\_fixed}.
			
 
				 \begin{lstlisting}[keywordstyle=,mathescape]
			
 
				 $\A$: A
			
 
				-$\B$: B
			
 
				+$\b$: B
			
 
				 $\z$: Z
			
 
				 $\Aeq$: Aeq
			
 
				 $\Aieq$: Aieq
			
 
				-$\Beq$: Beq
			
 
				-$\Bieq$: Bieq
			
 
				+$\beq$: Beq
			
 
				+$\bieq$: Bieq
			
 
				 $\lx$: lx
			
 
				 $\ux$: ux
			
 
				 $\lamieq$: ~Lambda_Aieq_i
			
 
				 $\lamlu$: ~Lambda_known_i
			
 
				 $\P$: AeqTE
			
 
				 $\Q$: AeqTQ
			
 
				-$\RR$: AeqTR
			
 
				-$\RR^\transpose$: AeqTRT
			
 
				+$\RR$: AeqTR1
			
 
				+$\RR^\transpose$: AeqTR1T
			
 
				 $\Q_1$: AeqTQ1
			
 
				 $\Q_1^\transpose$: AeqTQ1T
			
 
				 $\Q_2$: AeqTQ2
			
 
				 $\Q_2^\transpose$: AeqTQ2T
			
 
				-$w_2$: lambda
			
 
				+$\w_2$: lambda
			
 
				+$\overline{\w}_1$: lambda_0
			
 
				 \end{lstlisting}