ThreeB 1.1
forward_algorithm.h
Go to the documentation of this file.
00001 /*
00002     This file is part of B-cubed.
00003 
00004     Copyright (C) 2009, 2010, 2011, Edward Rosten and Susan Cox
00005 
00006     B-cubed is free software; you can redistribute it and/or
00007     modify it under the terms of the GNU Lesser General Public
00008     License as published by the Free Software Foundation; either
00009     version 3.0 of the License, or (at your option) any later version.
00010 
00011     This library is distributed in the hope that it will be useful,
00012     but WITHOUT ANY WARRANTY; without even the implied warranty of
00013     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014     Lesser General Public License for more details.
00015 
00016     You should have received a copy of the GNU General Public License     
00017     along with this program.  If not, see <http://www.gnu.org/licenses/>
00018 */
00019 
00020 #ifndef FORWARD_ALGORITHM_H
00021 #define FORWARD_ALGORITHM_H
00022 #include <tr1/tuple>
00023 #include <tr1/array>
00024 #include <TooN/TooN.h>
00025 #include <vector>
00026 #include <cmath>
00027 
00028 /** Computes the natural logarithm, but returns -1e100 instead of inf
00029 for an input of 0. This prevents trapping of FPU exceptions.
00030 @param x \e x
00031 @return ln \e x
00032 @ingroup gUtility
00033 */
00034 inline double ln(double x)
00035 {
00036     if(x == 0)
00037         return -1e100;
00038     else
00039         return std::log(x);
00040 }
00041 
00042 
00043 /**
00044 The forward algorithm is defined as:
00045 \f{align}
00046     \alpha_1(i) &= \pi_i b_i(O_1) \\
00047     \alpha_t(j) &= b_j(O(t)) \sum_i \alpha_{t-1}(i) a_{ij}
00048 \f}
00049 And the probability of observing the data is just:
00050 \f{equation}
00051     P(O_1 \cdots O_T|\lambda) = P(O|\lambda) \sum_i \alpha_T(i),
00052 \f}
00053 where the state, \f$\lambda = \{ A, \pi, B \}\f$. All multipliers are much less
00054 than 1, to \f$\alpha\f$ rapidly ends up as zero. Instead, store the logarithm:
00055 \f{align}
00056     \delta_t(i) &= \ln \alpha_t(i) \\
00057     \delta_1(i) &= \ln \pi_i + \ln b_j(O_t)
00058 \f}
00059 and the recursion is:
00060 \f{align}
00061     \delta_t(j) &= \ln b_j(O_t) + \ln \sum_i \alpha_{t-1}(i) a_{ij} \\
00062                 &= \ln b_j(O_t) + \ln \sum_i e^{\delta_{t-1}(i) + \ln a_{ij}} \\
00063 \f}
00064 including an arbitrary constant, \f$Z_t(j)\f$ gives:
00065 \f{align}
00066     \delta_t(j) &= \ln b_j(O_t) + \ln \sum_i e^{Z_t(j)} e^{\delta_{t-1}(i) + \ln a_{ij} - Z_t(j)} \\
00067                 &= \ln b_j(O_t) + Z_t(j) + \ln \sum_i e^{\delta_{t-1}(i) + \ln a_{ij} - Z_t(j)}.
00068 \f}
00069 In order to prevent a loss of scale on the addition:
00070 \f{equation}
00071     Z_t(j) \overset{\text{def}}{=} \operatorname*{max}_i \delta_{t-1}(i) + \ln a_{ij},
00072 \f}
00073 so the largest exponent will be exactly 0. The final log probability is, similarly:
00074 \f{equation}
00075     \ln P(O|\lambda) = Z + \ln \sum_i e^{\delta_T(i) - Z}, 
00076 \f}
00077 \e Z can take any value, but to keep the numbers within a convenient range:
00078 \f{equation}
00079     Z  \overset{\text{def}}{=}  \operatorname*{max}_i \delta_T(i).
00080 \f}
00081 
00082 For computing derivatives, two useful results are:
00083 \f{align}
00084     \PD{}{x} \ln f(x) &= \frac{f'(x)}{f(x)}\\
00085     \PD{}{x} e^{f(x)} &= f'(x) e^{f(x)}
00086 \f}
00087 There are \e M parameters of \e B, denoted \f$\phi_1 \ldots \phi_M\f$. The derivatives of \e P are:
00088 \f{align}   
00089     \DD P(O|\lambda) &= \SSum \DD e^{\delta_T(i)}  \\
00090                      &= \SSum e^{\delta_T(i)} \DD \delta_T(i)\\
00091 \f}
00092 Taking derivatives of \f$ \ln P\f$ and rearranging to get numerically more convenient 
00093 results gives:
00094 \f{align}   
00095     \DD \ln P(O|\lambda) & = \frac{\DD P(O|\lambda)}{P(O|\lambda)} \\
00096                          & = \frac{\SSum e^{\delta_T(i)} \DD \delta_T(i)}{P(O|\lambda)}\\
00097                          & = \SSum e^{\delta_T(i) - \ln P(O|\lambda)} \DD \delta_T(i)
00098 \f}
00099 The derivarives of \f$\delta\f$ are:
00100 \f{align}
00101     \gdef\dtj{\delta_T(j)}
00102     \PD{\dtj}{\en} &= \DD \ln \left[ b_j(O_t) \SSum e^{\delta_{t-1}(i) + \ln a_{ij}} \right] \\
00103                    &= \DD \left[ \ln b_j(O_t) \right]  + \frac{\SSum \DD e^{\delta_{t-1}(i) + \ln a_{ij}}}{\SSum e^{\delta_{t-1}(i) + \ln a_{ij}}}\\
00104     \underset{\text{\tt diff\_delta[t][j]}}{\underbrace{\PD{\dtj}{\en}}} &= \underset{\text{\tt B.diff\_log(j, O[t])}}{\underbrace{\DD \left[ \ln b_j(O_t) \right]}}  + 
00105                    
00106                    \frac{\overset{\text{\tt sum\_top}}{\overbrace{\SSum e^{\delta_{t-1}(i) + \ln a_{ij} - Z_t(j)} \DD \delta_{t-1}(i)}}
00107                          }{\underset{\text{\tt sum}}{\underbrace{\SSum e^{\delta_{t-1}(i) + \ln a_{ij} -Z_t(j)}}}},
00108 \f}
00109 with \f$Z_t(j)\f$ as defined in ::forward_algorithm. 
00110 
00111 For computing second derivatives, with \f$\Grad\f$ yielding column vectors, two useful results are:
00112 \f{align}
00113     \Hess \ln f(\Vec{x})  & = \frac{\Hess f(\Vec{x})}{f(\Vec{x})} - \Grad f(\Vec{x}) \Grad f(\Vec{x})\Trn \\
00114     \Hess e^f(\Vec{x})    & = e^{f(\Vec{x})}(\Grad f(\Vec{x}) \Grad f(\Vec{x})\Trn +  \Hess f(\Vec{x})),
00115 \f}
00116 therefore:
00117 \f{equation}
00118     \Hess \ln P(O|\lambda) = \frac{\Hess f(\Vec{x})}{P(O|\lambda)} - \Grad P(O|\lambda) \Grad P(O|\lambda)\Trn,
00119 \f}
00120 and:
00121 \f{equation}
00122     \Hess P(O|\lambda) = \sum_i e^{\delta_t(i) - \ln P(O|\lambda)}\left[ \Grad\delta_t \Grad\delta_t\Trn  + \Hess \delta_t\right].
00123 \f}
00124 Define \f$s_t(j)\f$ as:
00125 \f{equation}
00126     s_t(j) = \sum_i e^{\delta_{t-1}(j) + \ln a_{ij}}
00127 \f}
00128 so that:
00129 \f{equation}
00130     \delta_t(j) = \ln b_j(O_t) + \ln s_t(j).
00131 \f}
00132 The derivatives and Hessian recursion are therefore:
00133 \f{align}
00134     \Grad \delta_t(j) &= \Grad\ln b_j(O_t) + \frac{\Grad s_t(j)}{s_t(j)} \\
00135     \Hess \delta_t(j) &= \Hess\ln b_j(O_t) + \frac{\Hess s_t(j)}{s_t(j)} - \frac{\Grad s_t(j)}{s_t(j)}\frac{\Grad s_t(j)}{s_t(j)}\Trn.\\ 
00136                       &= \underset{\text{\tt B.hess\_log(j, O[t])}}{\underbrace{\Hess\ln b_j(O_t)}} +
00137                       \frac{
00138                         \overset{\text{\tt sum\_top2}}{
00139                           \overbrace{ 
00140                             \sum_i e^{\delta_{t-1}(j) + \ln a_{ij} - Z_t(j)}\left[\Hess\delta_{t-1}(i) + \Grad\delta_{t-1}(i)\Grad\delta_{t-1}(i)\Trn\right]}}
00141                         }{\text{\tt sum}} - \frac{\text{\tt sum\_top sum\_top}\Trn}{\text{\tt sum}^2}
00142 \f}
00143 
00144 @ingroup gHMM
00145 @param A \e A: State transition probabilities.
00146 @param pi \e \f$\pi\f$: initial state probabilities.
00147 @param O \e O or \e I: the observed data (ie the images).
00148 @param B \f$B\f$: A function object giving the (log) probability of an observation given a state, and derivatives with respect to the parameters.
00149 @param compute_deriv Whether to compute the derivative, or return zero.
00150 @param compute_hessian Whether to compute the Hessian, or return zero. This implies \c compute_deriv.
00151 @returns the log probability of observing all the data, and the derivatives of the log probability with respect to the parameters, and the Hessian.
00152 */
00153 template<int States, class Btype, class Otype> std::tr1::tuple<double, TooN::Vector<Btype::NumParameters>, TooN::Matrix<Btype::NumParameters> > forward_algorithm_hessian(TooN::Matrix<States> A, TooN::Vector<States> pi, const Btype& B, const std::vector<Otype>& O, bool compute_deriv=1, bool compute_hessian=1)
00154 {
00155     using namespace TooN;
00156     using namespace std;
00157     using namespace std::tr1;
00158 
00159     if(compute_hessian == 1)
00160         compute_deriv=1;
00161 
00162     static const int M = Btype::NumParameters;
00163     int states = pi.size();
00164     
00165     //delta[j][i] = delta_t(i)
00166     vector<array<double, States> > delta(O.size());
00167 
00168     //diff_delta[t][j][n] = d/de_n delta_t(j)
00169     vector<array<Vector<M>,States > > diff_delta(O.size());
00170     
00171     
00172     //hess_delta[t][j][m][n] = d2/de_n de_m delta_t(j)
00173     vector<array<Matrix<M>,States > > hess_delta(O.size());
00174     
00175     //Initialization: Eqn 19, P 262
00176     //Set initial partial log probabilities:
00177     for(int i=0; i < states; i++)
00178     {
00179         delta[0][i] = ln(pi[i]) + B.log(i, O[0]);
00180         
00181         if(compute_deriv)
00182             diff_delta[0][i] = B.diff_log(i, O[0]);
00183 
00184         if(compute_hessian)
00185             hess_delta[0][i] = B.hess_log(i, O[0]);
00186     }
00187 
00188     //Perform the recursion: Eqn 20, P262
00189     //Note, use T and T-1. Rather than  T+1 and T.
00190     for(unsigned int t=1; t < O.size(); t++)
00191     {   
00192         for(int j=0; j < states; j++)
00193         {
00194             double Ztj = -HUGE_VAL; //This is Z_t(j)
00195             for(int i=0; i < states; i++)
00196                 Ztj = max(Ztj, delta[t-1][i] + ln(A[i][j]));
00197 
00198             double sum=0;
00199             for(int i=0; i < states; i++)
00200                 sum += exp(delta[t-1][i] + ln(A[i][j]) - Ztj);
00201 
00202             delta[t][j] = B.log(j, O[t]) + Ztj + ln(sum);
00203 
00204             if(compute_deriv)
00205             {
00206                 Vector<M> sum_top = Zeros;
00207                 for(int i=0; i < states; i++)
00208                     sum_top += diff_delta[t-1][i] * exp(delta[t-1][i] + ln(A[i][j]) - Ztj);
00209 
00210                 diff_delta[t][j] = B.diff_log(j, O[t]) +  (sum_top) / sum;
00211                 
00212                 if(compute_hessian)
00213                 {
00214                     Matrix<M> sum_top2 = Zeros;
00215                     for(int i=0; i < states; i++)
00216                         sum_top2 += exp(delta[t-1][i] + ln(A[i][j]) - Ztj) * ( hess_delta[t-1][i] + diff_delta[t-1][i].as_col() * diff_delta[t-1][i].as_row());
00217                     
00218                     hess_delta[t][j] = B.hess_log(j, O[t]) + sum_top2 / sum - sum_top.as_col() * sum_top.as_row() / (sum*sum);
00219                 }
00220             }
00221         }
00222     }
00223 
00224     //Compute the log prob using normalization
00225     double Z = -HUGE_VAL;
00226     for(int i=0; i < states; i++)
00227         Z = max(Z, delta.back()[i]);
00228 
00229     double sum =0;
00230     for(int i=0; i < states; i++)
00231         sum += exp(delta.back()[i] - Z);
00232 
00233     double log_prob = Z  + ln(sum);
00234 
00235     //Compute the differential of the log
00236     Vector<M> diff_log = Zeros;
00237     //Compute the differential of the log using normalization
00238     //The convenient normalizer is ln P(O|lambda) which makes the bottom 1.
00239     for(int i=0; compute_deriv && i < states; i++)
00240         diff_log += exp(delta.back()[i] - log_prob)*diff_delta.back()[i];
00241 
00242     Matrix<M> hess_log = Zeros;
00243     //Compute the hessian of the log using normalization
00244     //The convenient normalizer is ln P(O|lambda) which makes the bottom 1.
00245     for(int i=0; compute_hessian && i < states; i++)
00246         hess_log += exp(delta.back()[i] - log_prob) * (hess_delta.back()[i] + diff_delta.back()[i].as_col() * diff_delta.back()[i].as_row());
00247 
00248     hess_log -= diff_log.as_col() * diff_log.as_row();
00249 
00250     //Compute the differential of the Hessian
00251     return  make_tuple(log_prob, diff_log, hess_log);
00252 }
00253 
00254 /**
00255 Run the forward algorithm and return the log probability.
00256 @ingroup gHMM
00257 @param A \e A: State transition probabilities.
00258 @param pi \e \f$\pi\f$: initial state probabilities.
00259 @param O \e O or \e I: the observed data (ie the images).
00260 @param B \f$B\f$: A function object giving the (log) probability of an observation given a state.
00261 @returns the log probability of observing all the data.
00262 */
00263 template<int States, class Btype, class Otype> double forward_algorithm(TooN::Matrix<States> A, TooN::Vector<States> pi, const Btype& B, const std::vector<Otype>& O)
00264 {
00265     using namespace TooN;
00266     using namespace std;
00267     using namespace std::tr1;
00268 
00269     int states = pi.size();
00270     
00271     //delta[j][i] = delta_t(i)
00272     vector<array<double, States> > delta(O.size());
00273 
00274     //Initialization: Eqn 19, P 262
00275     //Set initial partial log probabilities:
00276     for(int i=0; i < states; i++)
00277         delta[0][i] = ln(pi[i]) + B.log(i, O[0]);
00278 
00279     //Perform the recursion: Eqn 20, P262
00280     //Note, use T and T-1. Rather than  T+1 and T.
00281     for(unsigned int t=1; t < O.size(); t++)
00282     {   
00283         for(int j=0; j < states; j++)
00284         {
00285             double Ztj = -HUGE_VAL; //This is Z_t(j)
00286             for(int i=0; i < states; i++)
00287                 Ztj = max(Ztj, delta[t-1][i] + ln(A[i][j]));
00288 
00289             double sum=0;
00290             for(int i=0; i < states; i++)
00291                 sum += exp(delta[t-1][i] + ln(A[i][j]) - Ztj);
00292 
00293             delta[t][j] = B.log(j, O[t]) + Ztj + ln(sum);
00294         }
00295     }
00296 
00297     //Compute the log prob using normalization
00298     double Z = -HUGE_VAL;
00299     for(int i=0; i < states; i++)
00300         Z = max(Z, delta.back()[i]);
00301 
00302     double sum =0;
00303     for(int i=0; i < states; i++)
00304         sum += exp(delta.back()[i] - Z);
00305 
00306     double log_prob = Z  + ln(sum);
00307 
00308     return  log_prob;
00309 }
00310 
00311 
00312 /**
00313 Run the forward algorithm and return the log probability and its derivatives.
00314 @ingroup gHMM
00315 @param A \e A: State transition probabilities.
00316 @param pi \e \f$\pi\f$: initial state probabilities.
00317 @param O \e O or \e I: the observed data (ie the images).
00318 @param B \f$B\f$: A function object giving the (log) probability of an observation given a state, and derivatives with respect to the parameters.
00319 @returns the log probability of observing all the data.
00320 */
00321 template<int States, class Btype, class Otype> std::pair<double, TooN::Vector<Btype::NumParameters> > forward_algorithm_deriv(TooN::Matrix<States> A, TooN::Vector<States> pi, const Btype& B, const std::vector<Otype>& O)
00322 {
00323     using namespace std::tr1;
00324     double p;
00325     TooN::Vector<Btype::NumParameters> v;
00326     tie(p,v, ignore) = forward_algorithm_hessian(A, pi, B, O, 1, 0);
00327     return make_pair(p,v);
00328 }
00329 
00330 
00331 /**
00332 Run the forward algorithm and return the log partials (delta)
00333 @ingroup gHMM
00334 @param A \e A: State transition probabilities.
00335 @param pi \e \f$\pi\f$: initial state probabilities.
00336 @param O \e O or \e I: the observed data (ie the images).
00337 @param B \f$B\f$: A function object giving the (log) probability of an observation given a state, and derivatives with respect to the parameters.
00338 @returns the log probability of observing all the data.
00339 */
00340 template<int States, class Btype, class Otype> 
00341 std::vector<std::tr1::array<double, States> >
00342 forward_algorithm_delta(TooN::Matrix<States> A, TooN::Vector<States> pi, const Btype& B, const std::vector<Otype>& O)
00343 {
00344     using namespace TooN;
00345     using namespace std;
00346     using namespace std::tr1;
00347 
00348     int states = pi.size();
00349     
00350     //delta[j][i] = delta_t(i)
00351     vector<array<double, States> > delta(O.size());
00352 
00353     //Initialization: Eqn 19, P 262
00354     //Set initial partial log probabilities:
00355     for(int i=0; i < states; i++)
00356         delta[0][i] = ln(pi[i]) + B.log(i, O[0]);
00357 
00358     //Forward pass...
00359     //Perform the recursion: Eqn 20, P262
00360     //Note, use T and T-1. Rather than  T+1 and T.
00361     for(unsigned int t=1; t < O.size(); t++)
00362     {   
00363         for(int j=0; j < states; j++)
00364         {
00365             double Ztj = -HUGE_VAL; //This is Z_t(j)
00366             for(int i=0; i < states; i++)
00367                 Ztj = max(Ztj, delta[t-1][i] + ln(A[i][j]));
00368 
00369             double sum=0;
00370             for(int i=0; i < states; i++)
00371                 sum += exp(delta[t-1][i] + ln(A[i][j]) - Ztj);
00372 
00373             delta[t][j] = B.log(j, O[t]) + Ztj + ln(sum);
00374         }
00375     }
00376 
00377     return delta;
00378 }
00379 
00380 /**
00381 Run the forward algorithm and return the log partials (delta)
00382 @ingroup gHMM
00383 @param A \e A: State transition probabilities.
00384 @param pi \e \f$\pi\f$: initial state probabilities.
00385 @param O \e O or \e I: the observed data (ie the images).
00386 @param B \f$B\f$: A function object giving the (log) probability of an observation given a state, and derivatives with respect to the parameters.
00387 @param delta the \f$\delta\f$ values
00388 */
00389 template<int States, class Btype, class Otype> 
00390 void forward_algorithm_delta2(TooN::Matrix<States> A, TooN::Vector<States> pi, const Btype& B, const std::vector<Otype>& O, std::vector<std::tr1::array<double, States> >& delta)
00391 {
00392     using namespace TooN;
00393     using namespace std;
00394     using namespace std::tr1;
00395 
00396     int states = pi.size();
00397     
00398     //delta[j][i] = delta_t(i)
00399     delta.resize(O.size());
00400 
00401     //Initialization: Eqn 19, P 262
00402     //Set initial partial log probabilities:
00403     for(int i=0; i < states; i++)
00404         delta[0][i] = ln(pi[i]) + B.log(i, O[0]);
00405 
00406     Matrix<States> lA;
00407     for(int r=0; r < States; r++)
00408         for(int c=0; c < States; c++)
00409             lA[r][c] = ln(A[r][c]);
00410 
00411     //Forward pass...
00412     //Perform the recursion: Eqn 20, P262
00413     //Note, use T and T-1. Rather than  T+1 and T.
00414     for(unsigned int t=1; t < O.size(); t++)
00415     {   
00416         for(int j=0; j < states; j++)
00417         {
00418             double Ztj = -HUGE_VAL; //This is Z_t(j)
00419             for(int i=0; i < states; i++)
00420                 Ztj = max(Ztj, delta[t-1][i] + lA[i][j]);
00421 
00422             double sum=0;
00423             for(int i=0; i < states; i++)
00424                 sum += exp(delta[t-1][i] + lA[i][j] - Ztj);
00425 
00426             delta[t][j] = B.log(j, O[t]) + Ztj + ln(sum);
00427         }
00428     }
00429 }
00430 
00431 
00432 /**
00433 Run the forward-backwards algorithm and return the log partials (delta and epsilon).
00434 @ingroup gHMM
00435 @param A \e A: State transition probabilities.
00436 @param pi \e \f$\pi\f$: initial state probabilities.
00437 @param O \e O or \e I: the observed data (ie the images).
00438 @param B \f$B\f$: A function object giving the (log) probability of an observation given a state, and derivatives with respect to the parameters.
00439 @returns the log probability of observing all the data.
00440 */
00441 template<int States, class Btype, class Otype> 
00442 std::pair<std::vector<std::tr1::array<double, States> >,  std::vector<std::tr1::array<double, States> > >
00443 forward_backward_algorithm(TooN::Matrix<States> A, TooN::Vector<States> pi, const Btype& B, const std::vector<Otype>& O)
00444 {
00445     using namespace TooN;
00446     using namespace std;
00447     using namespace std::tr1;
00448 
00449     int states = pi.size();
00450     
00451     //delta[j][i] = delta_t(i)
00452     vector<array<double, States> > delta = forward_algorithm_delta(A, pi, B, O);
00453 
00454     ///Backward pass
00455     ///Epsilon is log beta
00456     vector<array<double, States> > epsilon(O.size());
00457 
00458     //Initialize beta to 1, ie epsilon to 0
00459     for(int i=0; i < states; i++)
00460         epsilon[O.size()-1][i] = 0;
00461 
00462     //Perform the backwards recursion
00463     for(int t=O.size()-2; t >= 0; t--)
00464     {
00465         for(int i=0; i < states; i++)
00466         {
00467             //Find a normalizing constant
00468             double Z = -HUGE_VAL;
00469             
00470             for(int j=0; j < states; j++)
00471                 Z = max(Z, ln(A[i][j]) + B.log(j, O[t+1]) + epsilon[t+1][j]);
00472             
00473             double sum=0;
00474             for(int j= 0; j < states; j++)
00475                 sum += exp(ln(A[i][j]) + B.log(j, O[t+1]) + epsilon[t+1][j] - Z);
00476 
00477             epsilon[t][i] = ln(sum) + Z;
00478         }
00479     }
00480     
00481     return make_pair(delta, epsilon);
00482 }
00483 
00484 /*struct RngDrand48
00485 {
00486     double operator()()
00487     {
00488         return drand48();
00489     }
00490 };*/
00491 
00492 ///Select an element from the container v, assuming that v is a 
00493 ///probability distribution over elements up to some scale.
00494 ///@param v Uscaled probability distribution
00495 ///@param scale Scale of v
00496 ///@param rng Random number generator to use
00497 ///@ingroup gHMM
00498 template<class A, class Rng> int select_random_element(const A& v, const double scale, Rng& rng)
00499 {
00500     double total=0, choice = rng()*scale;
00501 
00502     for(int i=0; i < (int)v.size(); i++)
00503     {
00504         total += v[i];  
00505         if(choice <= total)
00506             return i;
00507     }
00508     return v.size()-1;
00509 }
00510 
00511 ///Select an element from the a, assuming that a stores unscaled
00512 ///log probabilities of the elements
00513 ///@param a Uscaled probability distribution, stored as logarithms.
00514 ///@param rng Random number generator to use
00515 ///@ingroup gHMM
00516 template<int N, class Rng> int sample_unscaled_log(std::tr1::array<double, N> a, Rng& rng)
00517 {
00518     double hi = *max_element(a.begin(), a.end());
00519     double sum=0;
00520 
00521     for(unsigned int i=0; i < a.size(); i++)
00522     {
00523         a[i] = exp(a[i] - hi);
00524         sum += a[i];
00525     }
00526 
00527     return select_random_element(a, sum, rng);
00528 }
00529 
00530 ///An implementation of the backwards sampling part of the forwards filtering/backwards sampling algorithm.
00531 /// See `Monte Carlo smoothing for non-linear time series',   Godsill and Doucet, JASA 2004
00532 ///@param A HMM transition matrix.
00533 ///@param delta Forward partial probabilities stored as logarithms.
00534 ///@param rng Random number generator to use
00535 ///@returns state at each time step.
00536 ///@ingroup gHMM
00537 template<int States, class StateType, class Rng>
00538 std::vector<StateType> backward_sampling(TooN::Matrix<States> A, const std::vector<std::tr1::array<double, States> >& delta, Rng& rng)
00539 {
00540     //Compute the elementwise log of A
00541     for(int r=0; r < A.num_rows(); r++)
00542         for(int c=0; c < A.num_cols(); c++)
00543             A[r][c] = ln(A[r][c]);
00544 
00545     std::vector<StateType> samples(delta.size());
00546     
00547     samples.back() = sample_unscaled_log<States, Rng>(delta.back(), rng);
00548     
00549     //A is A[t][t+1]
00550 
00551     for(int i=delta.size()-2; i >= 0; i--)
00552     {
00553         std::tr1::array<double, States> reverse_probabilities = delta[i];
00554 
00555         for(int j=0; j < States; j++)
00556             reverse_probabilities[j] += A[j][samples[i+1]];
00557 
00558         samples[i] = sample_unscaled_log<States, Rng>(reverse_probabilities, rng);
00559     }
00560 
00561     return samples;
00562 }
00563 /*
00564 template<int States, class StateType>
00565 std::vector<StateType> backward_sampling(const TooN::Matrix<States> &A, const std::vector<std::tr1::array<double, States> >& delta)
00566 {
00567     RngDrand48 d;
00568     return backward_sampling<States, StateType, RngDrand48>(A, delta, d);
00569 }
00570 
00571 ///@overload
00572 template<int States>
00573 std::vector<int> backward_sampling(const TooN::Matrix<States>& A, const std::vector<std::tr1::array<double, States> >& delta)
00574 {
00575     RngDrand48 d;
00576     return backward_sampling<States, int, RngDrand48>(A, delta, d);
00577 }
00578 */
00579 
00580 #endif